Skip to main content

toon_core/
decoder.rs

1//! TOON v3.0 Decoder — converts TOON back into JSON.
2//!
3//! The decoder parses indentation-based TOON structure back into a `serde_json::Value`
4//! tree. It handles all TOON v3.0 constructs:
5//!
6//! - Flat and nested objects (indentation-based)
7//! - Inline primitive arrays (`key[N]: v1,v2`)
8//! - Tabular arrays (`key[N]{f1,f2}:\n  v1,v2`)
9//! - Expanded lists (`key[N]:\n  - item`)
10//! - Quoted/unquoted keys and values with escape sequences
11//! - Type inference: unquoted `true`/`false` → bool, `null` → null, numbers → number
12//!
13//! # Key design decisions
14//!
15//! - **Line-index tracking**: `parse_key_value_into_map` returns the next line index
16//!   so callers can correctly skip past array bodies in nested structures.
17//! - **`skip_array_body` vs `skip_nested_lines`**: Array bodies containing "- " list
18//!   items need special handling to avoid skipping sibling fields at the same indent.
19//! - **Auto-detected indent**: `parse_array_body` finds the first "- " line's indent
20//!   rather than assuming `base_indent + 2`, supporting flexible nesting depths.
21
22use crate::error::{Result, ToonError};
23use serde_json::{Map, Value};
24
25/// Decode a TOON string back into JSON format.
26///
27/// Takes a valid TOON string and returns the compact JSON representation.
28/// The output is minified (no pretty-printing) — use `serde_json::to_string_pretty`
29/// on the result if human-readable JSON is needed.
30pub fn decode(toon: &str) -> Result<String> {
31    let value = parse_toon(toon)?;
32    Ok(serde_json::to_string(&value)?)
33}
34
35/// Main entry point: classify the TOON input as root array, root primitive, or object.
36fn parse_toon(toon: &str) -> Result<Value> {
37    let toon = toon.trim_end_matches('\n');
38
39    if toon.is_empty() {
40        return Ok(Value::Object(Map::new()));
41    }
42
43    // Check for root array: starts with [N]:
44    if toon.starts_with('[') {
45        if let Some(val) = try_parse_root_array(toon)? {
46            return Ok(val);
47        }
48    }
49
50    // Check for root primitive (single line, no colon structure)
51    let lines: Vec<&str> = toon.lines().collect();
52    if lines.len() == 1 && !line_has_key_colon(lines[0]) {
53        return parse_primitive_value(lines[0].trim());
54    }
55
56    // Object: key-value pairs
57    parse_object_from_lines(&lines, 0, 0, lines.len())
58}
59
60/// Try parsing as root array: [N]: ... or [N]:\n...
61fn try_parse_root_array(toon: &str) -> Result<Option<Value>> {
62    let lines: Vec<&str> = toon.lines().collect();
63    if lines.is_empty() {
64        return Ok(None);
65    }
66    let first_line = lines[0];
67
68    // Match [N]{fields}: or [N]: or [N]:
69    if let Some(header) = parse_array_header(first_line) {
70        let arr = parse_array_body(&header, &lines, 0, 0)?;
71        return Ok(Some(arr));
72    }
73    Ok(None)
74}
75
76/// Check if a line has a key: pattern (not just a primitive that happens to contain ':')
77fn line_has_key_colon(line: &str) -> bool {
78    let trimmed = line.trim();
79    // If it starts with a quote, it could be a quoted key
80    if trimmed.starts_with('"') {
81        // Find the closing quote (handling escapes)
82        if let Some(end) = find_closing_quote(trimmed, 1) {
83            // After closing quote, should be ':'
84            return end + 1 < trimmed.len() && trimmed.as_bytes()[end + 1] == b':';
85        }
86        return false;
87    }
88    // If it starts with '[', could be a root array header
89    if trimmed.starts_with('[') {
90        return false;
91    }
92    // Check for unquoted key: look for first ':' not inside quotes
93    // Simple heuristic: if there's a colon, and the part before it looks like a key
94    if let Some(colon_pos) = trimmed.find(':') {
95        let before = &trimmed[..colon_pos];
96        // Key should not contain spaces (unquoted keys are [A-Za-z_][A-Za-z0-9_.]*)
97        !before.contains(' ') && !before.is_empty()
98    } else {
99        false
100    }
101}
102
103/// Parsed metadata from an array header line like `key[3]{a,b}: ` or `key[2]: v1,v2`.
104///
105/// - `len`: declared element count (used for validation, not currently enforced)
106/// - `fields`: tabular column names if present (`{f1,f2}` syntax)
107/// - `inline_values`: the raw value string if inline (`[N]: v1,v2` — text after `: `)
108struct ArrayHeader {
109    len: usize,
110    fields: Option<Vec<String>>,
111    inline_values: Option<String>,
112}
113
114/// Parse array header from a line like `[N]: v1,v2` or `[N]{f1,f2}:` or `[N]:`
115fn parse_array_header(line: &str) -> Option<ArrayHeader> {
116    let trimmed = line.trim();
117    let bracket_start = trimmed.find('[')?;
118    let bracket_end = trimmed[bracket_start..].find(']')? + bracket_start;
119    let len_str = &trimmed[bracket_start + 1..bracket_end];
120    let len: usize = len_str.parse().ok()?;
121
122    let after_bracket = &trimmed[bracket_end + 1..];
123
124    // Check for tabular: {f1,f2}:
125    if after_bracket.starts_with('{') {
126        let brace_end = after_bracket.find('}')?;
127        let fields_str = &after_bracket[1..brace_end];
128        let fields: Vec<String> = fields_str.split(',').map(|s| s.to_string()).collect();
129        let after_brace = &after_bracket[brace_end + 1..];
130        if after_brace.starts_with(':') {
131            return Some(ArrayHeader {
132                len,
133                fields: Some(fields),
134                inline_values: None,
135            });
136        }
137        return None;
138    }
139
140    // Check for inline: `: v1,v2` (space after colon with values on same line)
141    if let Some(values) = after_bracket.strip_prefix(": ") {
142        return Some(ArrayHeader {
143            len,
144            fields: None,
145            inline_values: Some(values.to_string()),
146        });
147    }
148
149    // Expanded/empty: `:`
150    if after_bracket.starts_with(':') {
151        return Some(ArrayHeader {
152            len,
153            fields: None,
154            inline_values: None,
155        });
156    }
157
158    None
159}
160
161/// Parse the body of an array given its header and surrounding lines.
162///
163/// Dispatches to inline parsing, tabular row parsing, or expanded list parsing
164/// based on the header type. For expanded lists, auto-detects the indent of the
165/// first "- " marker rather than assuming a fixed offset.
166fn parse_array_body(
167    header: &ArrayHeader,
168    lines: &[&str],
169    line_idx: usize,
170    base_indent: usize,
171) -> Result<Value> {
172    // Empty array
173    if header.len == 0 {
174        return Ok(Value::Array(vec![]));
175    }
176
177    // Inline values
178    if let Some(ref inline) = header.inline_values {
179        let values = parse_inline_values(inline)?;
180        return Ok(Value::Array(values));
181    }
182
183    // Tabular
184    if let Some(ref fields) = header.fields {
185        let mut rows = Vec::new();
186        for (i, line) in lines.iter().enumerate().skip(line_idx + 1) {
187            let trimmed = line.trim();
188            if trimmed.is_empty() {
189                continue;
190            }
191            // Check indent — tabular rows should be at base_indent + 2
192            let indent = count_indent(line);
193            if indent <= base_indent && i > line_idx + 1 {
194                break;
195            }
196            let obj = parse_tabular_row(trimmed, fields)?;
197            rows.push(obj);
198        }
199        return Ok(Value::Array(rows));
200    }
201
202    // Expanded list (- items)
203    // Auto-detect the indent of the first "- " line
204    let mut detected_indent = base_indent + 2;
205    for line in &lines[line_idx + 1..] {
206        let trimmed = line.trim();
207        if trimmed.is_empty() {
208            continue;
209        }
210        if trimmed.starts_with("- ") {
211            detected_indent = count_indent(line);
212            break;
213        }
214        break;
215    }
216    parse_list_items(lines, line_idx + 1, detected_indent)
217}
218
219/// Parse comma-separated inline values like `1,Alice,true`.
220/// Handles quoted values with escape sequences (e.g., `"hello, world",42,true`).
221fn parse_inline_values(s: &str) -> Result<Vec<Value>> {
222    let mut values = Vec::new();
223    let mut i = 0;
224    let bytes = s.as_bytes();
225
226    while i < bytes.len() {
227        if bytes[i] == b'"' {
228            // Quoted value
229            let end = find_closing_quote(s, i + 1).ok_or_else(|| ToonError::ToonParse {
230                line: 0,
231                message: "Unterminated quoted string in inline array".to_string(),
232            })?;
233            let inner = &s[i + 1..end];
234            let unescaped = unescape_string(inner);
235            values.push(Value::String(unescaped));
236            i = end + 1;
237            // Skip comma
238            if i < bytes.len() && bytes[i] == b',' {
239                i += 1;
240            }
241        } else {
242            // Unquoted value — find next comma
243            let end = s[i..].find(',').map(|p| p + i).unwrap_or(s.len());
244            let token = &s[i..end];
245            values.push(parse_primitive_token(token));
246            i = end;
247            if i < bytes.len() && bytes[i] == b',' {
248                i += 1;
249            }
250        }
251    }
252
253    Ok(values)
254}
255
256/// Parse a tabular row: comma-separated values mapped to field names
257fn parse_tabular_row(row: &str, fields: &[String]) -> Result<Value> {
258    let values = parse_inline_values(row)?;
259    let mut map = Map::new();
260    for (i, field) in fields.iter().enumerate() {
261        let val = values.get(i).cloned().unwrap_or(Value::Null);
262        map.insert(field.clone(), val);
263    }
264    Ok(Value::Object(map))
265}
266
267/// Parse expanded list items starting from a given line index.
268///
269/// `item_indent` is the character offset where "- " markers appear. Items at this
270/// indent are collected; lines deeper than `item_indent` belong to the current item;
271/// lines shallower terminate the list. Lines at `item_indent` without "- " also
272/// terminate (they're sibling fields, not list items).
273fn parse_list_items(lines: &[&str], start_line: usize, item_indent: usize) -> Result<Value> {
274    let mut items = Vec::new();
275    let mut i = start_line;
276
277    while i < lines.len() {
278        let line = lines[i];
279        let indent = count_indent(line);
280        let trimmed = line.trim();
281
282        if trimmed.is_empty() {
283            i += 1;
284            continue;
285        }
286
287        // If indent is less than expected, we've exited this list level
288        if indent < item_indent {
289            break;
290        }
291
292        // Skip lines that are deeper (continuation of previous item)
293        if indent > item_indent {
294            i += 1;
295            continue;
296        }
297
298        // At the exact item_indent: must start with "- "
299        if !trimmed.starts_with("- ") {
300            break;
301        }
302
303        let content = &trimmed[2..]; // After "- "
304
305        // Check if the list item is an array
306        if content.starts_with('[') {
307            if let Some(header) = parse_array_header(content) {
308                let arr = parse_array_body(&header, lines, i, indent + 2)?;
309                items.push(arr);
310                i = skip_nested_lines(lines, i + 1, indent + 2);
311                continue;
312            }
313        }
314
315        // Check if the list item is an object (has key: pattern)
316        if item_content_is_object(content) {
317            let (obj, next_i) = parse_list_item_object(lines, i, indent + 2, content)?;
318            items.push(obj);
319            i = next_i;
320            continue;
321        }
322
323        // Primitive value
324        items.push(parse_primitive_value(content)?);
325        i += 1;
326    }
327
328    Ok(Value::Array(items))
329}
330
331/// Heuristic: does the content after "- " look like an object field (key: value)?
332/// Checks for quoted key, unquoted `key:`, or `key[N]` patterns.
333fn item_content_is_object(content: &str) -> bool {
334    // Check if content starts with a key: pattern
335    if content.starts_with('"') {
336        if let Some(end) = find_closing_quote(content, 1) {
337            return end + 1 < content.len() && content.as_bytes()[end + 1] == b':';
338        }
339        return false;
340    }
341    // Look for key: or key[N] pattern
342    if let Some(pos) = content.find(':') {
343        let before = &content[..pos];
344        return !before.contains(' ') && !before.is_empty();
345    }
346    if let Some(pos) = content.find('[') {
347        let before = &content[..pos];
348        return !before.contains(' ') && !before.is_empty();
349    }
350    false
351}
352
353/// Parse an object that starts as a list item (`- key: val`).
354///
355/// The first field's key-value is on the "- " line itself. Subsequent sibling fields
356/// appear at `hyphen_content_indent` (the indent of the content after "- ").
357/// Returns the parsed object and the next line index after this item.
358fn parse_list_item_object(
359    lines: &[&str],
360    start_line: usize,
361    hyphen_content_indent: usize,
362    first_field_content: &str,
363) -> Result<(Value, usize)> {
364    let mut map = Map::new();
365
366    // Parse the first field from the "- key: value" line
367    let mut i = parse_key_value_into_map(
368        first_field_content,
369        &mut map,
370        lines,
371        start_line,
372        hyphen_content_indent,
373    )?;
374
375    let sibling_indent = hyphen_content_indent;
376
377    while i < lines.len() {
378        let line = lines[i];
379        let indent = count_indent(line);
380        let trimmed = line.trim();
381
382        if trimmed.is_empty() {
383            i += 1;
384            continue;
385        }
386
387        // Sibling fields are at the same indent as the hyphen content
388        if indent != sibling_indent {
389            break;
390        }
391
392        // Must look like a key-value pair
393        if !line_has_key_colon(trimmed) && !trimmed.contains('[') {
394            break;
395        }
396
397        i = parse_key_value_into_map(trimmed, &mut map, lines, i, indent)?;
398    }
399
400    Ok((Value::Object(map), i))
401}
402
403/// Skip past an array body in the line stream.
404///
405/// This is distinct from `skip_nested_lines` because expanded list arrays have a
406/// subtle boundary condition: a line at `first_line_indent` that does NOT start with
407/// "- " is a sibling field, not part of the array body. `skip_nested_lines` would
408/// incorrectly consume it.
409///
410/// For tabular/non-list arrays, falls back to `skip_nested_lines`.
411fn skip_array_body(lines: &[&str], start: usize, base_indent: usize) -> usize {
412    if start >= lines.len() {
413        return start;
414    }
415
416    // Detect the indent of the first non-empty line
417    let mut first_line_indent = base_indent + 2;
418    let mut is_list = false;
419    for line in &lines[start..] {
420        let trimmed = line.trim();
421        if trimmed.is_empty() {
422            continue;
423        }
424        first_line_indent = count_indent(line);
425        is_list = trimmed.starts_with("- ");
426        break;
427    }
428
429    if !is_list {
430        // Tabular or other: skip lines at first_line_indent or deeper
431        return skip_nested_lines(lines, start, first_line_indent);
432    }
433
434    // List array: skip "- " items at first_line_indent and their deeper content
435    let mut i = start;
436    while i < lines.len() {
437        let line = lines[i];
438        let trimmed = line.trim();
439        if trimmed.is_empty() {
440            i += 1;
441            continue;
442        }
443        let indent = count_indent(line);
444        if indent < first_line_indent {
445            break;
446        }
447        if indent == first_line_indent && !trimmed.starts_with("- ") {
448            // At the list item indent but not a list item — this is a sibling field
449            break;
450        }
451        i += 1;
452    }
453    i
454}
455
456/// Skip lines at or deeper than `base_indent`. Stops at the first line that's
457/// shallower. Used for tabular rows and nested object blocks.
458fn skip_nested_lines(lines: &[&str], start: usize, base_indent: usize) -> usize {
459    let mut i = start;
460    while i < lines.len() {
461        let line = lines[i];
462        let trimmed = line.trim();
463        if trimmed.is_empty() {
464            i += 1;
465            continue;
466        }
467        let indent = count_indent(line);
468        if indent < base_indent {
469            break;
470        }
471        i += 1;
472    }
473    i
474}
475
476/// Parse a key-value pair from `content` and insert into `map`.
477///
478/// **Returns the next line index** after this key-value's content (including any
479/// array body or nested object lines). This is critical for correct line advancement
480/// in callers — without it, array bodies inside list item objects would cause sibling
481/// fields to be swallowed.
482///
483/// Handles four value forms:
484/// - `key[N]...` → array (inline, tabular, or expanded)
485/// - `key:` → empty object or nested object (check next-line indent)
486/// - `key: value` → primitive value
487fn parse_key_value_into_map(
488    content: &str,
489    map: &mut Map<String, Value>,
490    lines: &[&str],
491    line_idx: usize,
492    base_indent: usize,
493) -> Result<usize> {
494    let (key, rest) = parse_key_from_content(content)?;
495
496    // Check for array field: key[N]...
497    if rest.starts_with('[') {
498        // Build a synthetic line "x[N]..." so parse_array_header can parse it
499        let arr_line = format!("x{}", rest);
500        if let Some(header) = parse_array_header(&arr_line) {
501            let is_empty = header.len == 0;
502            let is_inline = header.inline_values.is_some();
503            let arr = parse_array_body(&header, lines, line_idx, base_indent)?;
504            map.insert(key, arr);
505            // For empty or inline arrays, no body lines to skip
506            if is_empty || is_inline {
507                return Ok(line_idx + 1);
508            }
509            // For expanded/tabular arrays, skip past the body
510            let next = skip_array_body(lines, line_idx + 1, base_indent);
511            return Ok(next);
512        }
513    }
514
515    // rest starts with ":" for objects/empty or ": " for values
516    if rest == ":" {
517        // Could be empty object or object with children on next lines
518        let child_indent = base_indent + 2;
519        if line_idx + 1 < lines.len() {
520            let next_indent = count_indent(lines[line_idx + 1]);
521            if next_indent >= child_indent && !lines[line_idx + 1].trim().is_empty() {
522                // Nested object
523                let end = find_block_end(lines, line_idx + 1, child_indent);
524                let obj = parse_object_from_lines(lines, child_indent, line_idx + 1, end)?;
525                map.insert(key, obj);
526                return Ok(end);
527            }
528        }
529        // Empty object
530        map.insert(key, Value::Object(Map::new()));
531    } else if let Some(value_str) = rest.strip_prefix(": ") {
532        let value = parse_primitive_value(value_str)?;
533        map.insert(key, value);
534    } else {
535        // Shouldn't happen with well-formed TOON
536        map.insert(key, Value::Null);
537    }
538
539    Ok(line_idx + 1)
540}
541
542/// Parse a key from the beginning of content, returning `(key, rest_after_key)`.
543///
544/// For unquoted keys, finds the earliest of `:` or `[` to handle both `key: val`
545/// and `key[N]: ...` patterns. Using `.find(':').or_else(|| .find('['))` would fail
546/// for cases like `items[2]:` where `:` appears after `[`.
547fn parse_key_from_content(content: &str) -> Result<(String, String)> {
548    if content.starts_with('"') {
549        // Quoted key
550        let end = find_closing_quote(content, 1).ok_or_else(|| ToonError::ToonParse {
551            line: 0,
552            message: "Unterminated quoted key".to_string(),
553        })?;
554        let key = unescape_string(&content[1..end]);
555        let rest = content[end + 1..].to_string();
556        Ok((key, rest))
557    } else {
558        // Unquoted key — find the earliest of ':' or '['
559        let colon_pos = content.find(':');
560        let bracket_pos = content.find('[');
561        let end = match (colon_pos, bracket_pos) {
562            (Some(c), Some(b)) => c.min(b),
563            (Some(c), None) => c,
564            (None, Some(b)) => b,
565            (None, None) => content.len(),
566        };
567        let key = content[..end].to_string();
568        let rest = content[end..].to_string();
569        Ok((key, rest))
570    }
571}
572
573/// Parse an object from indented lines
574fn parse_object_from_lines(
575    lines: &[&str],
576    expected_indent: usize,
577    start: usize,
578    end: usize,
579) -> Result<Value> {
580    let mut map = Map::new();
581    let mut i = start;
582
583    while i < end {
584        let line = lines[i];
585        let trimmed = line.trim();
586
587        if trimmed.is_empty() {
588            i += 1;
589            continue;
590        }
591
592        let indent = count_indent(line);
593        if indent < expected_indent {
594            break;
595        }
596        if indent > expected_indent {
597            // This is a child line of a previous key — skip
598            i += 1;
599            continue;
600        }
601
602        // At our indent level — parse as key-value
603        i = parse_key_value_into_map(trimmed, &mut map, lines, i, indent)?;
604        // Skip any nested content that parse_key_value_into_map didn't consume
605        while i < end {
606            let next_line = lines[i];
607            let next_trimmed = next_line.trim();
608            if next_trimmed.is_empty() {
609                i += 1;
610                continue;
611            }
612            let next_indent = count_indent(next_line);
613            if next_indent <= expected_indent {
614                break;
615            }
616            i += 1;
617        }
618    }
619
620    Ok(Value::Object(map))
621}
622
623/// Find the end of a block at the given indent level
624fn find_block_end(lines: &[&str], start: usize, min_indent: usize) -> usize {
625    let mut i = start;
626    while i < lines.len() {
627        let line = lines[i];
628        let trimmed = line.trim();
629        if trimmed.is_empty() {
630            i += 1;
631            continue;
632        }
633        let indent = count_indent(line);
634        if indent < min_indent {
635            break;
636        }
637        i += 1;
638    }
639    i
640}
641
642/// Parse a primitive value from a string token
643fn parse_primitive_value(s: &str) -> Result<Value> {
644    Ok(parse_primitive_token(s))
645}
646
647/// Parse an unquoted or quoted token into a JSON Value.
648///
649/// Type inference order: quoted string → null → bool → integer → float → unquoted string.
650/// This mirrors the encoder's quoting rules: strings that look like numbers/bools are
651/// quoted by the encoder, so unquoted tokens can be safely interpreted as their types.
652fn parse_primitive_token(s: &str) -> Value {
653    let s = s.trim();
654
655    // Quoted string
656    if s.starts_with('"') && s.ends_with('"') && s.len() >= 2 {
657        let inner = &s[1..s.len() - 1];
658        return Value::String(unescape_string(inner));
659    }
660
661    // null
662    if s == "null" {
663        return Value::Null;
664    }
665
666    // bool
667    if s == "true" {
668        return Value::Bool(true);
669    }
670    if s == "false" {
671        return Value::Bool(false);
672    }
673
674    // Try integer
675    if let Ok(n) = s.parse::<i64>() {
676        return Value::Number(n.into());
677    }
678
679    // Try float
680    if let Ok(f) = s.parse::<f64>() {
681        if let Some(n) = serde_json::Number::from_f64(f) {
682            return Value::Number(n);
683        }
684    }
685
686    // Default: unquoted string
687    Value::String(s.to_string())
688}
689
690/// Count leading spaces in a line (each 2 spaces = 1 indent level)
691fn count_indent(line: &str) -> usize {
692    line.len() - line.trim_start().len()
693}
694
695/// Find the position of the closing quote, handling escape sequences
696fn find_closing_quote(s: &str, start: usize) -> Option<usize> {
697    let bytes = s.as_bytes();
698    let mut i = start;
699    while i < bytes.len() {
700        if bytes[i] == b'\\' {
701            i += 2; // Skip escaped character
702        } else if bytes[i] == b'"' {
703            return Some(i);
704        } else {
705            i += 1;
706        }
707    }
708    None
709}
710
711/// Unescape a TOON string (handle \\, \", \n, \r, \t)
712fn unescape_string(s: &str) -> String {
713    let mut out = String::with_capacity(s.len());
714    let mut chars = s.chars();
715    while let Some(c) = chars.next() {
716        if c == '\\' {
717            match chars.next() {
718                Some('n') => out.push('\n'),
719                Some('r') => out.push('\r'),
720                Some('t') => out.push('\t'),
721                Some('\\') => out.push('\\'),
722                Some('"') => out.push('"'),
723                Some(other) => {
724                    out.push('\\');
725                    out.push(other);
726                }
727                None => out.push('\\'),
728            }
729        } else {
730            out.push(c);
731        }
732    }
733    out
734}