Skip to main content

toon/decode/
parser.rs

1use crate::error::{Result, ToonError};
2use crate::shared::constants::{
3    BACKSLASH, CLOSE_BRACE, CLOSE_BRACKET, COLON, DOUBLE_QUOTE, OPEN_BRACE, OPEN_BRACKET, PIPE, TAB,
4};
5use crate::shared::literal_utils::{is_boolean_or_null_literal, is_numeric_literal};
6use crate::shared::string_utils::{find_closing_quote, find_unquoted_char, unescape_string};
7
8#[derive(Debug, Clone, PartialEq, Eq)]
9pub struct ArrayHeaderInfo {
10    pub key: Option<String>,
11    pub key_was_quoted: bool,
12    pub length: usize,
13    pub delimiter: char,
14    pub fields: Option<Vec<FieldName>>,
15}
16
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub struct FieldName {
19    pub name: String,
20    pub was_quoted: bool,
21}
22
23#[derive(Debug, Clone, PartialEq, Eq)]
24pub struct ArrayHeaderParseResult {
25    pub header: ArrayHeaderInfo,
26    pub inline_values: Option<String>,
27}
28
29/// Parse a TOON array header line, returning header metadata and inline values.
30///
31/// # Errors
32///
33/// Returns an error for malformed quoted keys or string literals.
34pub fn parse_array_header_line(
35    content: &str,
36    default_delimiter: char,
37) -> Result<Option<ArrayHeaderParseResult>> {
38    let trimmed = content.trim_start();
39
40    let bracket_start = if trimmed.starts_with(DOUBLE_QUOTE) {
41        let closing = find_closing_quote(trimmed, 0)
42            .ok_or_else(|| ToonError::message("Unterminated string: missing closing quote"))?;
43        let after_quote = &trimmed[closing + 1..];
44        if !after_quote.starts_with(OPEN_BRACKET) {
45            return Ok(None);
46        }
47        let leading_ws = content.len() - trimmed.len();
48        let key_end = leading_ws + closing + 1;
49        content[key_end..]
50            .find(OPEN_BRACKET)
51            .map(|idx| key_end + idx)
52    } else {
53        content.find(OPEN_BRACKET)
54    };
55
56    let Some(bracket_start) = bracket_start else {
57        return Ok(None);
58    };
59
60    let Some(bracket_end) = content[bracket_start..].find(CLOSE_BRACKET) else {
61        return Ok(None);
62    };
63    let bracket_end = bracket_start + bracket_end;
64
65    let mut brace_end = bracket_end + 1;
66    let brace_start = content[bracket_end + 1..]
67        .find(OPEN_BRACE)
68        .map(|idx| bracket_end + 1 + idx);
69    let colon_after_bracket = content[bracket_end + 1..]
70        .find(COLON)
71        .map(|idx| bracket_end + 1 + idx);
72
73    if let (Some(brace_start), Some(colon_after_bracket)) = (brace_start, colon_after_bracket) {
74        if brace_start < colon_after_bracket {
75            if let Some(found_end) = content[brace_start..].find(CLOSE_BRACE) {
76                let found_end = brace_start + found_end;
77                brace_end = found_end + 1;
78            }
79        }
80    }
81
82    let colon_index = content[brace_end..].find(COLON).map(|idx| brace_end + idx);
83    let Some(colon_index) = colon_index else {
84        return Ok(None);
85    };
86
87    let mut key: Option<String> = None;
88    let mut key_was_quoted = false;
89    if bracket_start > 0 {
90        let raw_key = content[..bracket_start].trim();
91        if raw_key.starts_with(DOUBLE_QUOTE) {
92            key = Some(parse_string_literal(raw_key)?);
93            key_was_quoted = true;
94        } else if !raw_key.is_empty() {
95            key = Some(raw_key.to_string());
96        }
97    }
98
99    let after_colon = content[colon_index + 1..].trim();
100    let bracket_content = &content[bracket_start + 1..bracket_end];
101
102    let Ok((length, delimiter)) = parse_bracket_segment(bracket_content, default_delimiter) else {
103        return Ok(None);
104    };
105
106    let mut fields: Option<Vec<FieldName>> = None;
107    if let Some(brace_start) = brace_start {
108        if brace_start < colon_index {
109            if let Some(found_end) = content[brace_start..].find(CLOSE_BRACE) {
110                let found_end = brace_start + found_end;
111                if found_end < colon_index {
112                    let fields_content = &content[brace_start + 1..found_end];
113                    let parsed_fields = parse_delimited_values(fields_content, delimiter)
114                        .into_iter()
115                        .map(|field| {
116                            let trimmed = field.trim();
117                            let was_quoted = trimmed.starts_with(DOUBLE_QUOTE);
118                            let name = parse_string_literal(trimmed)?;
119                            Ok(FieldName { name, was_quoted })
120                        })
121                        .collect::<Result<Vec<_>>>()?;
122                    fields = Some(parsed_fields);
123                }
124            }
125        }
126    }
127
128    Ok(Some(ArrayHeaderParseResult {
129        header: ArrayHeaderInfo {
130            key,
131            key_was_quoted,
132            length,
133            delimiter,
134            fields,
135        },
136        inline_values: if after_colon.is_empty() {
137            None
138        } else {
139            Some(after_colon.to_string())
140        },
141    }))
142}
143
144/// Parse the bracket length segment, extracting length and delimiter.
145///
146/// # Errors
147///
148/// Returns an error if the length is invalid.
149pub fn parse_bracket_segment(seg: &str, default_delimiter: char) -> Result<(usize, char)> {
150    let mut content = seg.to_string();
151    let mut delimiter = default_delimiter;
152
153    if content.ends_with(TAB) {
154        delimiter = TAB;
155        content.pop();
156    } else if content.ends_with(PIPE) {
157        delimiter = PIPE;
158        content.pop();
159    }
160
161    let length = content
162        .parse::<usize>()
163        .map_err(|_| ToonError::message(format!("Invalid array length: {seg}")))?;
164
165    Ok((length, delimiter))
166}
167
168#[must_use]
169pub fn parse_delimited_values(input: &str, delimiter: char) -> Vec<String> {
170    // Pre-estimate capacity based on delimiter count
171    let estimated_count = input.chars().filter(|&c| c == delimiter).count() + 1;
172    let mut values = Vec::with_capacity(estimated_count);
173    let mut buffer = String::with_capacity(64); // Reasonable default for field values
174    let mut in_quotes = false;
175    let mut iter = input.chars();
176
177    while let Some(ch) = iter.next() {
178        if ch == BACKSLASH && in_quotes {
179            buffer.push(ch);
180            if let Some(next) = iter.next() {
181                buffer.push(next);
182            }
183            continue;
184        }
185
186        if ch == DOUBLE_QUOTE {
187            in_quotes = !in_quotes;
188            buffer.push(ch);
189            continue;
190        }
191
192        if ch == delimiter && !in_quotes {
193            values.push(buffer.trim().to_string());
194            buffer.clear();
195            continue;
196        }
197
198        buffer.push(ch);
199    }
200
201    if !buffer.is_empty() || !values.is_empty() {
202        values.push(buffer.trim().to_string());
203    }
204
205    values
206}
207
208/// Map delimited string values into JSON primitives.
209///
210/// # Errors
211///
212/// Returns an error if any token is a malformed quoted string.
213pub fn map_row_values_to_primitives(values: &[String]) -> Result<Vec<crate::JsonPrimitive>> {
214    values
215        .iter()
216        .map(|value| parse_primitive_token(value))
217        .collect()
218}
219
220/// Parse a primitive token into a JSON primitive.
221///
222/// # Errors
223///
224/// Returns an error if a quoted string token is unterminated or malformed.
225pub fn parse_primitive_token(token: &str) -> Result<crate::JsonPrimitive> {
226    let trimmed = token.trim();
227
228    if trimmed.is_empty() {
229        return Ok(crate::StringOrNumberOrBoolOrNull::String(String::new()));
230    }
231
232    if trimmed.starts_with(DOUBLE_QUOTE) {
233        return Ok(crate::StringOrNumberOrBoolOrNull::String(
234            parse_string_literal(trimmed)?,
235        ));
236    }
237
238    if is_boolean_or_null_literal(trimmed) {
239        return Ok(match trimmed {
240            "true" => crate::StringOrNumberOrBoolOrNull::Bool(true),
241            "false" => crate::StringOrNumberOrBoolOrNull::Bool(false),
242            _ => crate::StringOrNumberOrBoolOrNull::Null,
243        });
244    }
245
246    if is_numeric_literal(trimmed) {
247        let parsed = trimmed.parse::<f64>().unwrap_or(f64::NAN);
248        let normalized = if parsed == 0.0 && parsed.is_sign_negative() {
249            0.0
250        } else {
251            parsed
252        };
253        return Ok(crate::StringOrNumberOrBoolOrNull::Number(normalized));
254    }
255
256    Ok(crate::StringOrNumberOrBoolOrNull::String(
257        trimmed.to_string(),
258    ))
259}
260
261/// Parse a quoted string literal, unescaping escape sequences.
262///
263/// # Errors
264///
265/// Returns an error for unterminated quotes or invalid escape sequences.
266pub fn parse_string_literal(token: &str) -> Result<String> {
267    let trimmed = token.trim();
268
269    if trimmed.starts_with(DOUBLE_QUOTE) {
270        let closing = find_closing_quote(trimmed, 0)
271            .ok_or_else(|| ToonError::message("Unterminated string: missing closing quote"))?;
272        if closing != trimmed.len() - 1 {
273            return Err(ToonError::message(
274                "Unexpected characters after closing quote",
275            ));
276        }
277        let content = &trimmed[1..closing];
278        return unescape_string(content).map_err(ToonError::message);
279    }
280
281    Ok(trimmed.to_string())
282}
283
284/// Parse an unquoted key up to the colon delimiter.
285///
286/// # Errors
287///
288/// Returns an error if no colon is found after the key.
289pub fn parse_unquoted_key(content: &str, start: usize) -> Result<(String, usize)> {
290    let mut pos = start;
291    while pos < content.len() && content.as_bytes()[pos] as char != COLON {
292        pos += 1;
293    }
294
295    if pos >= content.len() || content.as_bytes()[pos] as char != COLON {
296        return Err(ToonError::message("Missing colon after key"));
297    }
298
299    let key = content[start..pos].trim().to_string();
300    pos += 1;
301    Ok((key, pos))
302}
303
304/// Parse a quoted key and validate the following colon.
305///
306/// # Errors
307///
308/// Returns an error for unterminated quotes or missing colon.
309pub fn parse_quoted_key(content: &str, start: usize) -> Result<(String, usize)> {
310    let closing = find_closing_quote(content, start)
311        .ok_or_else(|| ToonError::message("Unterminated quoted key"))?;
312    let key_content = &content[start + 1..closing];
313    let key = unescape_string(key_content).map_err(ToonError::message)?;
314    let mut pos = closing + 1;
315    if pos >= content.len() || content.as_bytes()[pos] as char != COLON {
316        return Err(ToonError::message("Missing colon after key"));
317    }
318    pos += 1;
319    Ok((key, pos))
320}
321
322/// Parse a key token (quoted or unquoted) and return key, end index, and quoted flag.
323///
324/// # Errors
325///
326/// Returns an error if the key is malformed or missing a trailing colon.
327pub fn parse_key_token(content: &str, start: usize) -> Result<(String, usize, bool)> {
328    let is_quoted = content.as_bytes().get(start).map(|b| *b as char) == Some(DOUBLE_QUOTE);
329    let (key, end) = if is_quoted {
330        parse_quoted_key(content, start)?
331    } else {
332        parse_unquoted_key(content, start)?
333    };
334    Ok((key, end, is_quoted))
335}
336
337#[must_use]
338pub fn is_array_header_content(content: &str) -> bool {
339    content.trim_start().starts_with(OPEN_BRACKET)
340        && find_unquoted_char(content, COLON, 0).is_some()
341}
342
343#[must_use]
344pub fn is_key_value_content(content: &str) -> bool {
345    find_unquoted_char(content, COLON, 0).is_some()
346}