toon_format/utils/
string.rs

1use crate::{
2    types::Delimiter,
3    utils::literal,
4};
5
6/// Escape special characters in a string for quoted output.
7pub fn escape_string(s: &str) -> String {
8    let mut result = String::with_capacity(s.len());
9
10    for ch in s.chars() {
11        match ch {
12            '\n' => result.push_str("\\n"),
13            '\r' => result.push_str("\\r"),
14            '\t' => result.push_str("\\t"),
15            '"' => result.push_str("\\\""),
16            '\\' => result.push_str("\\\\"),
17            _ => result.push(ch),
18        }
19    }
20
21    result
22}
23
24/// Unescape special characters in a quoted string.
25///
26/// Per TOON spec §7.1, only these escape sequences are valid:
27/// - `\\` → `\`
28/// - `\"` → `"`
29/// - `\n` → newline
30/// - `\r` → carriage return
31/// - `\t` → tab
32///
33/// Any other escape sequence MUST cause an error.
34///
35/// # Errors
36///
37/// Returns an error if the string contains an invalid escape sequence
38/// or if a backslash appears at the end of the string.
39pub fn unescape_string(s: &str) -> Result<String, String> {
40    let mut result = String::with_capacity(s.len());
41    let mut chars = s.chars().peekable();
42    let mut position = 0;
43
44    while let Some(ch) = chars.next() {
45        position += 1;
46
47        if ch == '\\' {
48            if let Some(&next) = chars.peek() {
49                match next {
50                    'n' => {
51                        result.push('\n');
52                        chars.next(); // consume the 'n'
53                        position += 1;
54                    }
55                    'r' => {
56                        result.push('\r');
57                        chars.next();
58                        position += 1;
59                    }
60                    't' => {
61                        result.push('\t');
62                        chars.next();
63                        position += 1;
64                    }
65                    '"' => {
66                        result.push('"');
67                        chars.next();
68                        position += 1;
69                    }
70                    '\\' => {
71                        result.push('\\');
72                        chars.next();
73                        position += 1;
74                    }
75                    _ => {
76                        return Err(format!(
77                            "Invalid escape sequence '\\{next}' at position {position}. Only \
78                             \\\\, \\\", \\n, \\r, \\t are valid",
79                        ));
80                    }
81                }
82            } else {
83                return Err(format!(
84                    "Unterminated escape sequence at end of string (position {position})",
85                ));
86            }
87        } else {
88            result.push(ch);
89        }
90    }
91
92    Ok(result)
93}
94
95/// Check if a key can be written without quotes (alphanumeric, underscore,
96/// dot).
97pub fn is_valid_unquoted_key(key: &str) -> bool {
98    if key.is_empty() {
99        return false;
100    }
101
102    let mut chars = key.chars();
103    let first = match chars.next() {
104        Some(c) => c,
105        None => return false,
106    };
107
108    if !first.is_alphabetic() && first != '_' {
109        return false;
110    }
111
112    chars.all(|c| c.is_alphanumeric() || c == '_' || c == '.')
113}
114
115/// Determine if a string needs quoting based on content and delimiter.
116pub fn needs_quoting(s: &str, delimiter: char) -> bool {
117    if s.is_empty() {
118        return true;
119    }
120
121    if literal::is_literal_like(s) {
122        return true;
123    }
124
125    if s.chars().any(literal::is_structural_char) {
126        return true;
127    }
128
129    if s.contains('\\') || s.contains('"') {
130        return true;
131    }
132
133    if s.contains(delimiter) {
134        return true;
135    }
136
137    if s.contains('\n') || s.contains('\r') || s.contains('\t') {
138        return true;
139    }
140
141    if s.starts_with(char::is_whitespace) || s.ends_with(char::is_whitespace) {
142        return true;
143    }
144
145    if s.starts_with("-") {
146        return true;
147    }
148
149    // Check for leading zeros (e.g., "05", "007", "0123")
150    // Numbers with leading zeros must be quoted
151    if s.starts_with('0') && s.len() > 1 && s.chars().nth(1).is_some_and(|c| c.is_ascii_digit()) {
152        return true;
153    }
154
155    false
156}
157
158/// Quote and escape a string.
159pub fn quote_string(s: &str) -> String {
160    format!("\"{}\"", escape_string(s))
161}
162
163pub fn split_by_delimiter(s: &str, delimiter: Delimiter) -> Vec<String> {
164    let mut result = Vec::new();
165    let mut current = String::new();
166    let mut in_quotes = false;
167    let chars = s.chars().peekable();
168    let delim_char = delimiter.as_char();
169
170    for ch in chars {
171        if ch == '"' && (current.is_empty() || !current.ends_with('\\')) {
172            in_quotes = !in_quotes;
173            current.push(ch);
174        } else if ch == delim_char && !in_quotes {
175            result.push(current.trim().to_string());
176            current.clear();
177        } else {
178            current.push(ch);
179        }
180    }
181
182    if !current.is_empty() {
183        result.push(current.trim().to_string());
184    }
185
186    result
187}
188
189#[cfg(test)]
190mod tests {
191    use super::*;
192
193    #[test]
194    fn test_escape_string() {
195        assert_eq!(escape_string("hello"), "hello");
196        assert_eq!(escape_string("hello\nworld"), "hello\\nworld");
197        assert_eq!(escape_string("say \"hi\""), "say \\\"hi\\\"");
198        assert_eq!(escape_string("back\\slash"), "back\\\\slash");
199    }
200
201    #[test]
202    fn test_unescape_string() {
203        // Valid escapes
204        assert_eq!(unescape_string("hello").unwrap(), "hello");
205        assert_eq!(unescape_string("hello\\nworld").unwrap(), "hello\nworld");
206        assert_eq!(unescape_string("say \\\"hi\\\"").unwrap(), "say \"hi\"");
207        assert_eq!(unescape_string("back\\\\slash").unwrap(), "back\\slash");
208        assert_eq!(unescape_string("tab\\there").unwrap(), "tab\there");
209        assert_eq!(unescape_string("return\\rhere").unwrap(), "return\rhere");
210    }
211
212    #[test]
213    fn test_unescape_string_invalid_escapes() {
214        // Invalid escape sequences should error
215        assert!(unescape_string("bad\\xescape").is_err());
216        assert!(unescape_string("bad\\uescape").is_err());
217        assert!(unescape_string("bad\\0escape").is_err());
218        assert!(unescape_string("bad\\aescape").is_err());
219
220        // Unterminated escape at end
221        assert!(unescape_string("ends\\").is_err());
222    }
223
224    #[test]
225    fn test_unescape_string_error_messages() {
226        let result = unescape_string("bad\\x");
227        assert!(result.is_err());
228        let err = result.unwrap_err();
229        assert!(err.contains("Invalid escape sequence"));
230        assert!(err.contains("\\x"));
231    }
232
233    #[test]
234    fn test_needs_quoting() {
235        let comma = Delimiter::Comma.as_char();
236
237        assert!(needs_quoting("", comma));
238
239        assert!(needs_quoting("true", comma));
240        assert!(needs_quoting("false", comma));
241        assert!(needs_quoting("null", comma));
242        assert!(needs_quoting("123", comma));
243
244        assert!(needs_quoting("hello[world]", comma));
245        assert!(needs_quoting("key:value", comma));
246
247        assert!(needs_quoting("a,b", comma));
248        assert!(!needs_quoting("a,b", Delimiter::Pipe.as_char()));
249
250        assert!(!needs_quoting("hello world", comma));
251        assert!(needs_quoting(" hello", comma));
252        assert!(needs_quoting("hello ", comma));
253
254        assert!(!needs_quoting("hello", comma));
255        assert!(!needs_quoting("world", comma));
256        assert!(!needs_quoting("helloworld", comma));
257    }
258
259    #[test]
260    fn test_quote_string() {
261        assert_eq!(quote_string("hello"), "\"hello\"");
262        assert_eq!(quote_string("hello\nworld"), "\"hello\\nworld\"");
263    }
264
265    #[test]
266    fn test_split_by_delimiter() {
267        let comma = Delimiter::Comma;
268
269        assert_eq!(split_by_delimiter("a,b,c", comma), vec!["a", "b", "c"]);
270
271        assert_eq!(split_by_delimiter("a, b, c", comma), vec!["a", "b", "c"]);
272
273        assert_eq!(split_by_delimiter("\"a,b\",c", comma), vec!["\"a,b\"", "c"]);
274    }
275
276    #[test]
277    fn test_is_valid_unquoted_key() {
278        // Valid keys (should return true)
279        assert!(is_valid_unquoted_key("normal_key"));
280        assert!(is_valid_unquoted_key("key123"));
281        assert!(is_valid_unquoted_key("key.value"));
282        assert!(is_valid_unquoted_key("_private"));
283        assert!(is_valid_unquoted_key("KeyName"));
284        assert!(is_valid_unquoted_key("key_name"));
285        assert!(is_valid_unquoted_key("key.name.sub"));
286        assert!(is_valid_unquoted_key("a"));
287        assert!(is_valid_unquoted_key("_"));
288        assert!(is_valid_unquoted_key("key_123.value"));
289
290        assert!(!is_valid_unquoted_key(""));
291        assert!(!is_valid_unquoted_key("123"));
292        assert!(!is_valid_unquoted_key("key:value"));
293        assert!(!is_valid_unquoted_key("key-value"));
294        assert!(!is_valid_unquoted_key("key value"));
295        assert!(!is_valid_unquoted_key(".key"));
296        assert!(is_valid_unquoted_key("key.value.sub."));
297        assert!(is_valid_unquoted_key("key."));
298        assert!(!is_valid_unquoted_key("key[value]"));
299        assert!(!is_valid_unquoted_key("key{value}"));
300    }
301}