Skip to main content

toon/decode/
parser.rs

1use crate::error::{Result, ToonError};
2use crate::shared::constants::{
3    BACKSLASH, CLOSE_BRACE, CLOSE_BRACKET, COLON, DOUBLE_QUOTE, OPEN_BRACE, OPEN_BRACKET, PIPE, TAB,
4};
5use crate::shared::literal_utils::{is_boolean_or_null_literal, is_numeric_literal};
6use crate::shared::string_utils::{find_closing_quote, find_unquoted_char, unescape_string};
7
8#[derive(Debug, Clone, PartialEq, Eq)]
9pub struct ArrayHeaderInfo {
10    pub key: Option<String>,
11    pub key_was_quoted: bool,
12    pub length: usize,
13    pub delimiter: char,
14    pub fields: Option<Vec<FieldName>>,
15}
16
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub struct FieldName {
19    pub name: String,
20    pub was_quoted: bool,
21}
22
23#[derive(Debug, Clone, PartialEq, Eq)]
24pub struct ArrayHeaderParseResult {
25    pub header: ArrayHeaderInfo,
26    pub inline_values: Option<String>,
27}
28
29/// Parse a TOON array header line, returning header metadata and inline values.
30///
31/// # Errors
32///
33/// Returns an error for malformed quoted keys or string literals.
34pub fn parse_array_header_line(
35    content: &str,
36    default_delimiter: char,
37) -> Result<Option<ArrayHeaderParseResult>> {
38    let trimmed = content.trim_start();
39
40    let bracket_start = if trimmed.starts_with(DOUBLE_QUOTE) {
41        let closing = find_closing_quote(trimmed, 0)
42            .ok_or_else(|| ToonError::message("Unterminated string: missing closing quote"))?;
43        let after_quote = &trimmed[closing + 1..];
44        if !after_quote.starts_with(OPEN_BRACKET) {
45            return Ok(None);
46        }
47        let leading_ws = content.len() - trimmed.len();
48        let key_end = leading_ws + closing + 1;
49        content[key_end..]
50            .find(OPEN_BRACKET)
51            .map(|idx| key_end + idx)
52    } else {
53        content.find(OPEN_BRACKET)
54    };
55
56    let Some(bracket_start) = bracket_start else {
57        return Ok(None);
58    };
59
60    let Some(bracket_end) = content[bracket_start..].find(CLOSE_BRACKET) else {
61        return Ok(None);
62    };
63    let bracket_end = bracket_start + bracket_end;
64
65    let mut brace_end = bracket_end + 1;
66    let brace_start = content[bracket_end + 1..]
67        .find(OPEN_BRACE)
68        .map(|idx| bracket_end + 1 + idx);
69    let colon_after_bracket = content[bracket_end + 1..]
70        .find(COLON)
71        .map(|idx| bracket_end + 1 + idx);
72
73    if let (Some(brace_start), Some(colon_after_bracket)) = (brace_start, colon_after_bracket)
74        && brace_start < colon_after_bracket
75        && let Some(found_end) = content[brace_start..].find(CLOSE_BRACE)
76    {
77        let found_end = brace_start + found_end;
78        brace_end = found_end + 1;
79    }
80
81    let colon_index = content[brace_end..].find(COLON).map(|idx| brace_end + idx);
82    let Some(colon_index) = colon_index else {
83        return Ok(None);
84    };
85
86    let mut key: Option<String> = None;
87    let mut key_was_quoted = false;
88    if bracket_start > 0 {
89        let raw_key = content[..bracket_start].trim();
90        if raw_key.starts_with(DOUBLE_QUOTE) {
91            key = Some(parse_string_literal(raw_key)?);
92            key_was_quoted = true;
93        } else if !raw_key.is_empty() {
94            key = Some(raw_key.to_string());
95        }
96    }
97
98    let after_colon = content[colon_index + 1..].trim();
99    let bracket_content = &content[bracket_start + 1..bracket_end];
100
101    let Ok((length, delimiter)) = parse_bracket_segment(bracket_content, default_delimiter) else {
102        return Ok(None);
103    };
104
105    let mut fields: Option<Vec<FieldName>> = None;
106    if let Some(brace_start) = brace_start
107        && brace_start < colon_index
108        && let Some(found_end) = content[brace_start..].find(CLOSE_BRACE)
109    {
110        let found_end = brace_start + found_end;
111        if found_end < colon_index {
112            let fields_content = &content[brace_start + 1..found_end];
113            let parsed_fields = parse_delimited_values(fields_content, delimiter)
114                .into_iter()
115                .map(|field| {
116                    let trimmed = field.trim();
117                    let was_quoted = trimmed.starts_with(DOUBLE_QUOTE);
118                    let name = parse_string_literal(trimmed)?;
119                    Ok(FieldName { name, was_quoted })
120                })
121                .collect::<Result<Vec<_>>>()?;
122            fields = Some(parsed_fields);
123        }
124    }
125
126    Ok(Some(ArrayHeaderParseResult {
127        header: ArrayHeaderInfo {
128            key,
129            key_was_quoted,
130            length,
131            delimiter,
132            fields,
133        },
134        inline_values: if after_colon.is_empty() {
135            None
136        } else {
137            Some(after_colon.to_string())
138        },
139    }))
140}
141
142/// Parse the bracket length segment, extracting length and delimiter.
143///
144/// # Errors
145///
146/// Returns an error if the length is invalid.
147pub fn parse_bracket_segment(seg: &str, default_delimiter: char) -> Result<(usize, char)> {
148    let mut content = seg.to_string();
149    let mut delimiter = default_delimiter;
150
151    if content.ends_with(TAB) {
152        delimiter = TAB;
153        content.pop();
154    } else if content.ends_with(PIPE) {
155        delimiter = PIPE;
156        content.pop();
157    }
158
159    let length = content
160        .parse::<usize>()
161        .map_err(|_| ToonError::message(format!("Invalid array length: {seg}")))?;
162
163    Ok((length, delimiter))
164}
165
166#[must_use]
167pub fn parse_delimited_values(input: &str, delimiter: char) -> Vec<String> {
168    // Pre-estimate capacity based on delimiter count
169    let estimated_count = input.chars().filter(|&c| c == delimiter).count() + 1;
170    let mut values = Vec::with_capacity(estimated_count);
171    let mut buffer = String::with_capacity(64); // Reasonable default for field values
172    let mut in_quotes = false;
173    let mut iter = input.chars();
174
175    while let Some(ch) = iter.next() {
176        if ch == BACKSLASH && in_quotes {
177            buffer.push(ch);
178            if let Some(next) = iter.next() {
179                buffer.push(next);
180            }
181            continue;
182        }
183
184        if ch == DOUBLE_QUOTE {
185            in_quotes = !in_quotes;
186            buffer.push(ch);
187            continue;
188        }
189
190        if ch == delimiter && !in_quotes {
191            values.push(buffer.trim().to_string());
192            buffer.clear();
193            continue;
194        }
195
196        buffer.push(ch);
197    }
198
199    if !buffer.is_empty() || !values.is_empty() {
200        values.push(buffer.trim().to_string());
201    }
202
203    values
204}
205
206/// Map delimited string values into JSON primitives.
207///
208/// # Errors
209///
210/// Returns an error if any token is a malformed quoted string.
211pub fn map_row_values_to_primitives(values: &[String]) -> Result<Vec<crate::JsonPrimitive>> {
212    values
213        .iter()
214        .map(|value| parse_primitive_token(value))
215        .collect()
216}
217
218/// Parse a primitive token into a JSON primitive.
219///
220/// # Errors
221///
222/// Returns an error if a quoted string token is unterminated or malformed.
223pub fn parse_primitive_token(token: &str) -> Result<crate::JsonPrimitive> {
224    let trimmed = token.trim();
225
226    if trimmed.is_empty() {
227        return Ok(crate::StringOrNumberOrBoolOrNull::String(String::new()));
228    }
229
230    if trimmed.starts_with(DOUBLE_QUOTE) {
231        return Ok(crate::StringOrNumberOrBoolOrNull::String(
232            parse_string_literal(trimmed)?,
233        ));
234    }
235
236    if is_boolean_or_null_literal(trimmed) {
237        return Ok(match trimmed {
238            "true" => crate::StringOrNumberOrBoolOrNull::Bool(true),
239            "false" => crate::StringOrNumberOrBoolOrNull::Bool(false),
240            _ => crate::StringOrNumberOrBoolOrNull::Null,
241        });
242    }
243
244    if is_numeric_literal(trimmed) {
245        let parsed = trimmed.parse::<f64>().unwrap_or(f64::NAN);
246        let normalized = if parsed == 0.0 && parsed.is_sign_negative() {
247            0.0
248        } else {
249            parsed
250        };
251        return Ok(crate::StringOrNumberOrBoolOrNull::Number(normalized));
252    }
253
254    Ok(crate::StringOrNumberOrBoolOrNull::String(
255        trimmed.to_string(),
256    ))
257}
258
259/// Parse a quoted string literal, unescaping escape sequences.
260///
261/// # Errors
262///
263/// Returns an error for unterminated quotes or invalid escape sequences.
264pub fn parse_string_literal(token: &str) -> Result<String> {
265    let trimmed = token.trim();
266
267    if trimmed.starts_with(DOUBLE_QUOTE) {
268        let closing = find_closing_quote(trimmed, 0)
269            .ok_or_else(|| ToonError::message("Unterminated string: missing closing quote"))?;
270        if closing != trimmed.len() - 1 {
271            return Err(ToonError::message(
272                "Unexpected characters after closing quote",
273            ));
274        }
275        let content = &trimmed[1..closing];
276        return unescape_string(content).map_err(ToonError::message);
277    }
278
279    Ok(trimmed.to_string())
280}
281
282/// Parse an unquoted key up to the colon delimiter.
283///
284/// # Errors
285///
286/// Returns an error if no colon is found after the key.
287pub fn parse_unquoted_key(content: &str, start: usize) -> Result<(String, usize)> {
288    let mut pos = start;
289    while pos < content.len() && content.as_bytes()[pos] as char != COLON {
290        pos += 1;
291    }
292
293    if pos >= content.len() || content.as_bytes()[pos] as char != COLON {
294        return Err(ToonError::message("Missing colon after key"));
295    }
296
297    let key = content[start..pos].trim().to_string();
298    pos += 1;
299    Ok((key, pos))
300}
301
302/// Parse a quoted key and validate the following colon.
303///
304/// # Errors
305///
306/// Returns an error for unterminated quotes or missing colon.
307pub fn parse_quoted_key(content: &str, start: usize) -> Result<(String, usize)> {
308    let closing = find_closing_quote(content, start)
309        .ok_or_else(|| ToonError::message("Unterminated quoted key"))?;
310    let key_content = &content[start + 1..closing];
311    let key = unescape_string(key_content).map_err(ToonError::message)?;
312    let mut pos = closing + 1;
313    if pos >= content.len() || content.as_bytes()[pos] as char != COLON {
314        return Err(ToonError::message("Missing colon after key"));
315    }
316    pos += 1;
317    Ok((key, pos))
318}
319
320/// Parse a key token (quoted or unquoted) and return key, end index, and quoted flag.
321///
322/// # Errors
323///
324/// Returns an error if the key is malformed or missing a trailing colon.
325pub fn parse_key_token(content: &str, start: usize) -> Result<(String, usize, bool)> {
326    let is_quoted = content.as_bytes().get(start).map(|b| *b as char) == Some(DOUBLE_QUOTE);
327    let (key, end) = if is_quoted {
328        parse_quoted_key(content, start)?
329    } else {
330        parse_unquoted_key(content, start)?
331    };
332    Ok((key, end, is_quoted))
333}
334
335#[must_use]
336pub fn is_array_header_content(content: &str) -> bool {
337    content.trim_start().starts_with(OPEN_BRACKET)
338        && find_unquoted_char(content, COLON, 0).is_some()
339}
340
341#[must_use]
342pub fn is_key_value_content(content: &str) -> bool {
343    find_unquoted_char(content, COLON, 0).is_some()
344}