Skip to main content

toon/decode/
parser.rs

1use crate::error::{Result, ToonError};
2use crate::shared::constants::{
3    BACKSLASH, CLOSE_BRACE, CLOSE_BRACKET, COLON, DOUBLE_QUOTE, OPEN_BRACE, OPEN_BRACKET, PIPE, TAB,
4};
5use crate::shared::literal_utils::{is_boolean_or_null_literal, is_numeric_literal};
6use crate::shared::string_utils::{find_closing_quote, find_unquoted_char, unescape_string};
7
8#[derive(Debug, Clone, PartialEq, Eq)]
9pub struct ArrayHeaderInfo {
10    pub key: Option<String>,
11    pub key_was_quoted: bool,
12    pub length: usize,
13    pub delimiter: char,
14    pub fields: Option<Vec<FieldName>>,
15}
16
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub struct FieldName {
19    pub name: String,
20    pub was_quoted: bool,
21}
22
23#[derive(Debug, Clone, PartialEq, Eq)]
24pub struct ArrayHeaderParseResult {
25    pub header: ArrayHeaderInfo,
26    pub inline_values: Option<String>,
27}
28
29/// Parse a TOON array header line, returning header metadata and inline values.
30///
31/// # Errors
32///
33/// Returns an error for malformed quoted keys or string literals.
34pub fn parse_array_header_line(
35    content: &str,
36    default_delimiter: char,
37) -> Result<Option<ArrayHeaderParseResult>> {
38    let trimmed = content.trim_start();
39
40    let bracket_start = if trimmed.starts_with(DOUBLE_QUOTE) {
41        let closing = find_closing_quote(trimmed, 0)
42            .ok_or_else(|| ToonError::message("Unterminated string: missing closing quote"))?;
43        let after_quote = &trimmed[closing + 1..];
44        if !after_quote.starts_with(OPEN_BRACKET) {
45            return Ok(None);
46        }
47        let leading_ws = content.len() - trimmed.len();
48        let key_end = leading_ws + closing + 1;
49        content[key_end..]
50            .find(OPEN_BRACKET)
51            .map(|idx| key_end + idx)
52    } else {
53        content.find(OPEN_BRACKET)
54    };
55
56    let Some(bracket_start) = bracket_start else {
57        return Ok(None);
58    };
59
60    let Some(bracket_end) = content[bracket_start..].find(CLOSE_BRACKET) else {
61        return Ok(None);
62    };
63    let bracket_end = bracket_start + bracket_end;
64
65    let mut brace_end = bracket_end + 1;
66    let brace_start = content[bracket_end + 1..]
67        .find(OPEN_BRACE)
68        .map(|idx| bracket_end + 1 + idx);
69    let colon_after_bracket = content[bracket_end + 1..]
70        .find(COLON)
71        .map(|idx| bracket_end + 1 + idx);
72
73    if let (Some(brace_start), Some(colon_after_bracket)) = (brace_start, colon_after_bracket)
74        && brace_start < colon_after_bracket
75        && let Some(found_end) = content[brace_start..].find(CLOSE_BRACE)
76    {
77        let found_end = brace_start + found_end;
78        brace_end = found_end + 1;
79    }
80
81    let colon_index = content[brace_end..].find(COLON).map(|idx| brace_end + idx);
82    let Some(colon_index) = colon_index else {
83        return Ok(None);
84    };
85
86    let mut key: Option<String> = None;
87    let mut key_was_quoted = false;
88    if bracket_start > 0 {
89        let raw_key = content[..bracket_start].trim();
90        if raw_key.starts_with(DOUBLE_QUOTE) {
91            key = Some(parse_string_literal(raw_key)?);
92            key_was_quoted = true;
93        } else if !raw_key.is_empty() {
94            key = Some(raw_key.to_string());
95        }
96    }
97
98    let after_colon = content[colon_index + 1..].trim();
99    let bracket_content = &content[bracket_start + 1..bracket_end];
100
101    let Ok((length, delimiter)) = parse_bracket_segment(bracket_content, default_delimiter) else {
102        return Ok(None);
103    };
104
105    // Enforce the declared-length cap only once we know this line is a real
106    // array header (bracket parsed cleanly); surface a hard error instead of
107    // silently falling back to key-value handling.
108    if length > MAX_DECLARED_ARRAY_LENGTH {
109        return Err(ToonError::message(format!(
110            "Declared array length {length} exceeds maximum allowed ({MAX_DECLARED_ARRAY_LENGTH})"
111        )));
112    }
113
114    let mut fields: Option<Vec<FieldName>> = None;
115    if let Some(brace_start) = brace_start
116        && brace_start < colon_index
117        && let Some(found_end) = content[brace_start..].find(CLOSE_BRACE)
118    {
119        let found_end = brace_start + found_end;
120        if found_end < colon_index {
121            let fields_content = &content[brace_start + 1..found_end];
122            let parsed_fields = parse_delimited_values(fields_content, delimiter)
123                .into_iter()
124                .map(|field| {
125                    let trimmed = field.trim();
126                    let was_quoted = trimmed.starts_with(DOUBLE_QUOTE);
127                    let name = parse_string_literal(trimmed)?;
128                    Ok(FieldName { name, was_quoted })
129                })
130                .collect::<Result<Vec<_>>>()?;
131            fields = Some(parsed_fields);
132        }
133    }
134
135    Ok(Some(ArrayHeaderParseResult {
136        header: ArrayHeaderInfo {
137            key,
138            key_was_quoted,
139            length,
140            delimiter,
141            fields,
142        },
143        inline_values: if after_colon.is_empty() {
144            None
145        } else {
146            Some(after_colon.to_string())
147        },
148    }))
149}
150
151/// Hard cap on the declared length that appears inside an array header `[N]`.
152///
153/// A TOON file claiming e.g. `[9999999999]` should not be allowed to drive
154/// downstream allocations or loop bounds sized from that number; 100 million
155/// is well beyond any realistic payload while still preventing resource abuse.
156pub const MAX_DECLARED_ARRAY_LENGTH: usize = 100_000_000;
157
158/// Parse the bracket length segment, extracting length and delimiter.
159///
160/// The cap on declared length is enforced by [`parse_array_header_line`] after a
161/// successful parse so that unrelated `[abc]` literals inside key-value content
162/// still fall through to non-array handling instead of erroring.
163///
164/// # Errors
165///
166/// Returns an error if the length is not a valid unsigned integer.
167pub fn parse_bracket_segment(seg: &str, default_delimiter: char) -> Result<(usize, char)> {
168    let mut content = seg.to_string();
169    let mut delimiter = default_delimiter;
170
171    if content.ends_with(TAB) {
172        delimiter = TAB;
173        content.pop();
174    } else if content.ends_with(PIPE) {
175        delimiter = PIPE;
176        content.pop();
177    }
178
179    let length = content
180        .parse::<usize>()
181        .map_err(|_| ToonError::message(format!("Invalid array length: {seg}")))?;
182
183    Ok((length, delimiter))
184}
185
186#[must_use]
187pub fn parse_delimited_values(input: &str, delimiter: char) -> Vec<String> {
188    // Pre-estimate capacity based on delimiter count
189    let estimated_count = input.chars().filter(|&c| c == delimiter).count() + 1;
190    let mut values = Vec::with_capacity(estimated_count);
191    let mut buffer = String::with_capacity(64); // Reasonable default for field values
192    let mut in_quotes = false;
193    let mut iter = input.chars();
194
195    while let Some(ch) = iter.next() {
196        if ch == BACKSLASH && in_quotes {
197            buffer.push(ch);
198            if let Some(next) = iter.next() {
199                buffer.push(next);
200            }
201            continue;
202        }
203
204        if ch == DOUBLE_QUOTE {
205            in_quotes = !in_quotes;
206            buffer.push(ch);
207            continue;
208        }
209
210        if ch == delimiter && !in_quotes {
211            values.push(buffer.trim().to_string());
212            buffer.clear();
213            continue;
214        }
215
216        buffer.push(ch);
217    }
218
219    if !buffer.is_empty() || !values.is_empty() {
220        values.push(buffer.trim().to_string());
221    }
222
223    values
224}
225
226/// Map delimited string values into JSON primitives.
227///
228/// # Errors
229///
230/// Returns an error if any token is a malformed quoted string.
231pub fn map_row_values_to_primitives(values: &[String]) -> Result<Vec<crate::JsonPrimitive>> {
232    values
233        .iter()
234        .map(|value| parse_primitive_token(value))
235        .collect()
236}
237
238/// Parse a primitive token into a JSON primitive.
239///
240/// # Errors
241///
242/// Returns an error if a quoted string token is unterminated or malformed.
243pub fn parse_primitive_token(token: &str) -> Result<crate::JsonPrimitive> {
244    let trimmed = token.trim();
245
246    if trimmed.is_empty() {
247        return Ok(crate::StringOrNumberOrBoolOrNull::String(String::new()));
248    }
249
250    if trimmed.starts_with(DOUBLE_QUOTE) {
251        return Ok(crate::StringOrNumberOrBoolOrNull::String(
252            parse_string_literal(trimmed)?,
253        ));
254    }
255
256    if is_boolean_or_null_literal(trimmed) {
257        return Ok(match trimmed {
258            "true" => crate::StringOrNumberOrBoolOrNull::Bool(true),
259            "false" => crate::StringOrNumberOrBoolOrNull::Bool(false),
260            _ => crate::StringOrNumberOrBoolOrNull::Null,
261        });
262    }
263
264    if is_numeric_literal(trimmed) {
265        let parsed = trimmed.parse::<f64>().unwrap_or(f64::NAN);
266        let normalized = if parsed == 0.0 && parsed.is_sign_negative() {
267            0.0
268        } else {
269            parsed
270        };
271        return Ok(crate::StringOrNumberOrBoolOrNull::Number(normalized));
272    }
273
274    Ok(crate::StringOrNumberOrBoolOrNull::String(
275        trimmed.to_string(),
276    ))
277}
278
279/// Parse a quoted string literal, unescaping escape sequences.
280///
281/// # Errors
282///
283/// Returns an error for unterminated quotes or invalid escape sequences.
284pub fn parse_string_literal(token: &str) -> Result<String> {
285    let trimmed = token.trim();
286
287    if trimmed.starts_with(DOUBLE_QUOTE) {
288        let closing = find_closing_quote(trimmed, 0)
289            .ok_or_else(|| ToonError::message("Unterminated string: missing closing quote"))?;
290        if closing != trimmed.len() - 1 {
291            return Err(ToonError::message(
292                "Unexpected characters after closing quote",
293            ));
294        }
295        let content = &trimmed[1..closing];
296        return unescape_string(content).map_err(ToonError::message);
297    }
298
299    Ok(trimmed.to_string())
300}
301
302/// Parse an unquoted key up to the colon delimiter.
303///
304/// # Errors
305///
306/// Returns an error if no colon is found after the key.
307pub fn parse_unquoted_key(content: &str, start: usize) -> Result<(String, usize)> {
308    let mut pos = start;
309    while pos < content.len() && content.as_bytes()[pos] as char != COLON {
310        pos += 1;
311    }
312
313    if pos >= content.len() || content.as_bytes()[pos] as char != COLON {
314        return Err(ToonError::message("Missing colon after key"));
315    }
316
317    let key = content[start..pos].trim().to_string();
318    pos += 1;
319    Ok((key, pos))
320}
321
322/// Parse a quoted key and validate the following colon.
323///
324/// # Errors
325///
326/// Returns an error for unterminated quotes or missing colon.
327pub fn parse_quoted_key(content: &str, start: usize) -> Result<(String, usize)> {
328    let closing = find_closing_quote(content, start)
329        .ok_or_else(|| ToonError::message("Unterminated quoted key"))?;
330    let key_content = &content[start + 1..closing];
331    let key = unescape_string(key_content).map_err(ToonError::message)?;
332    let mut pos = closing + 1;
333    if pos >= content.len() || content.as_bytes()[pos] as char != COLON {
334        return Err(ToonError::message("Missing colon after key"));
335    }
336    pos += 1;
337    Ok((key, pos))
338}
339
340/// Parse a key token (quoted or unquoted) and return key, end index, and quoted flag.
341///
342/// # Errors
343///
344/// Returns an error if the key is malformed or missing a trailing colon.
345pub fn parse_key_token(content: &str, start: usize) -> Result<(String, usize, bool)> {
346    let is_quoted = content.as_bytes().get(start).map(|b| *b as char) == Some(DOUBLE_QUOTE);
347    let (key, end) = if is_quoted {
348        parse_quoted_key(content, start)?
349    } else {
350        parse_unquoted_key(content, start)?
351    };
352    Ok((key, end, is_quoted))
353}
354
355#[must_use]
356pub fn is_array_header_content(content: &str) -> bool {
357    content.trim_start().starts_with(OPEN_BRACKET)
358        && find_unquoted_char(content, COLON, 0).is_some()
359}
360
361#[must_use]
362pub fn is_key_value_content(content: &str) -> bool {
363    find_unquoted_char(content, COLON, 0).is_some()
364}