Skip to main content

_etoon/
toon.rs

1//! TOON encoder core (sonic-rs backend).
2//!
3//! Input: JSON bytes (from orjson.dumps on Python side).
4//! Output: TOON string, matching TOON spec v1.5.
5//!
6//! Delimiter is monomorphized via const generics (`DELIM: u8`) so the
7//! byte-match inner loops fold away when emitting default-comma output.
8
9use sonic_rs::{Array, JsonContainerTrait, JsonType, JsonValueTrait, Object, Value};
10use std::fmt::Write as _;
11
12/// Encoder configuration matching TOON spec v1.5 options.
13#[derive(Clone, Copy)]
14pub struct Config {
15    /// Delimiter between array/tabular values. Must be `,`, `\t`, or `|`.
16    pub delimiter: u8,
17    /// If true, fold single-key object chains into dot-notation keys (safe mode).
18    pub key_folding: bool,
19    /// Max fold depth (segments). None = unlimited. 0 disables folding.
20    pub flatten_depth: Option<usize>,
21}
22
23impl Default for Config {
24    fn default() -> Self {
25        Self {
26            delimiter: b',',
27            key_folding: false,
28            flatten_depth: None,
29        }
30    }
31}
32
33pub fn encode(json_bytes: &[u8]) -> Result<String, String> {
34    encode_with(json_bytes, &Config::default())
35}
36
37pub fn encode_with(json_bytes: &[u8], cfg: &Config) -> Result<String, String> {
38    let value: Value =
39        sonic_rs::from_slice(json_bytes).map_err(|e| format!("JSON parse error: {}", e))?;
40    let mut out = String::with_capacity(json_bytes.len());
41    match cfg.delimiter {
42        b',' => write_root::<b','>(&value, cfg, &mut out),
43        b'\t' => write_root::<b'\t'>(&value, cfg, &mut out),
44        b'|' => write_root::<b'|'>(&value, cfg, &mut out),
45        _ => return Err("delimiter must be ',', '\\t', or '|'".to_string()),
46    }
47    Ok(out)
48}
49
50fn write_root<const DELIM: u8>(v: &Value, cfg: &Config, out: &mut String) {
51    match v.get_type() {
52        JsonType::Object => {
53            let m = v.as_object().unwrap();
54            if !m.is_empty() {
55                // Key folding applies only at the top-level object (TOON spec v1.5).
56                write_object_body::<DELIM>(m, 0, cfg, cfg.key_folding, out);
57            }
58        }
59        JsonType::Array => write_array_suffix::<DELIM>(v.as_array().unwrap(), 0, cfg, out),
60        _ => write_scalar::<DELIM>(v, out),
61    }
62}
63
64fn write_object_body<const DELIM: u8>(
65    m: &Object,
66    indent: usize,
67    cfg: &Config,
68    allow_fold: bool,
69    out: &mut String,
70) {
71    let mut first = true;
72    for (k, v) in m.iter() {
73        if !first {
74            out.push('\n');
75        }
76        first = false;
77        write_indent(indent, out);
78
79        if allow_fold {
80            if let Some((joined, final_v)) = try_fold(k, v, cfg, m) {
81                write_key(&joined, out);
82                write_value_after_key::<DELIM>(final_v, indent, cfg, out);
83                continue;
84            }
85        }
86
87        write_key(k, out);
88        write_value_after_key::<DELIM>(v, indent, cfg, out);
89    }
90}
91
92fn try_fold<'a>(k: &'a str, v: &'a Value, cfg: &Config, m: &Object) -> Option<(String, &'a Value)> {
93    let max_depth = cfg.flatten_depth.unwrap_or(usize::MAX);
94    if max_depth < 2 {
95        return None;
96    }
97
98    // Key segments must match TOON identifier pattern (safe mode).
99    if key_needs_quoting(k) {
100        return None;
101    }
102
103    let mut cur_v = v;
104    let mut path: Vec<&'a str> = vec![k];
105
106    loop {
107        if path.len() >= max_depth {
108            break;
109        }
110        let obj = match cur_v.get_type() {
111            JsonType::Object => cur_v.as_object().unwrap(),
112            _ => break,
113        };
114        if obj.len() != 1 {
115            break;
116        }
117        let (nk, nv) = obj.iter().next().unwrap();
118        if key_needs_quoting(nk) {
119            break;
120        }
121        path.push(nk);
122        cur_v = nv;
123    }
124
125    if path.len() < 2 {
126        return None;
127    }
128
129    let joined: String = path.join(".");
130
131    if m.get(&joined).is_some() {
132        return None;
133    }
134
135    Some((joined, cur_v))
136}
137
138fn write_value_after_key<const DELIM: u8>(
139    v: &Value,
140    key_indent: usize,
141    cfg: &Config,
142    out: &mut String,
143) {
144    match v.get_type() {
145        JsonType::Object => {
146            let child = v.as_object().unwrap();
147            if child.is_empty() {
148                out.push(':');
149            } else {
150                out.push_str(":\n");
151                // Nested object bodies never re-apply key folding (TOON spec: top-level only).
152                write_object_body::<DELIM>(child, key_indent + 1, cfg, false, out);
153            }
154        }
155        JsonType::Array => write_array_suffix::<DELIM>(v.as_array().unwrap(), key_indent, cfg, out),
156        _ => {
157            out.push_str(": ");
158            write_scalar::<DELIM>(v, out);
159        }
160    }
161}
162
163fn write_array_suffix<const DELIM: u8>(arr: &Array, indent: usize, cfg: &Config, out: &mut String) {
164    write!(out, "[{}", arr.len()).unwrap();
165    if DELIM != b',' {
166        out.push(DELIM as char);
167    }
168    out.push(']');
169
170    if arr.is_empty() {
171        out.push(':');
172        return;
173    }
174
175    if arr.iter().all(is_scalar) {
176        out.push_str(": ");
177        let mut first = true;
178        for v in arr.iter() {
179            if !first {
180                out.push(DELIM as char);
181            }
182            first = false;
183            write_scalar::<DELIM>(v, out);
184        }
185        return;
186    }
187
188    if let Some((keys, uniform_order)) = table_keys(arr) {
189        out.push('{');
190        for (i, k) in keys.iter().enumerate() {
191            if i > 0 {
192                out.push(DELIM as char);
193            }
194            write_key(k, out);
195        }
196        out.push_str("}:");
197        if uniform_order {
198            for item in arr.iter() {
199                let m = item.as_object().unwrap();
200                out.push('\n');
201                write_indent(indent + 1, out);
202                let mut first = true;
203                for (_, v) in m.iter() {
204                    if !first {
205                        out.push(DELIM as char);
206                    }
207                    first = false;
208                    write_scalar::<DELIM>(v, out);
209                }
210            }
211        } else {
212            for item in arr.iter() {
213                let m = item.as_object().unwrap();
214                out.push('\n');
215                write_indent(indent + 1, out);
216                let mut first = true;
217                for k in &keys {
218                    if !first {
219                        out.push(DELIM as char);
220                    }
221                    first = false;
222                    write_scalar::<DELIM>(m.get(k).unwrap(), out);
223                }
224            }
225        }
226        return;
227    }
228
229    out.push(':');
230    for item in arr.iter() {
231        out.push('\n');
232        write_indent(indent + 1, out);
233        out.push('-');
234        write_list_item::<DELIM>(item, indent + 1, cfg, out);
235    }
236}
237
238fn write_list_item<const DELIM: u8>(v: &Value, l: usize, cfg: &Config, out: &mut String) {
239    match v.get_type() {
240        JsonType::Object => {
241            let m = v.as_object().unwrap();
242            if !m.is_empty() {
243                out.push(' ');
244                write_list_item_object::<DELIM>(m, l, cfg, out);
245            }
246        }
247        JsonType::Array => {
248            out.push(' ');
249            write_array_suffix::<DELIM>(v.as_array().unwrap(), l, cfg, out);
250        }
251        _ => {
252            out.push(' ');
253            write_scalar::<DELIM>(v, out);
254        }
255    }
256}
257
258fn write_list_item_object<const DELIM: u8>(m: &Object, l: usize, cfg: &Config, out: &mut String) {
259    let mut first = true;
260    for (k, v) in m.iter() {
261        if !first {
262            out.push('\n');
263            write_indent(l + 1, out);
264        }
265        first = false;
266        write_key(k, out);
267        write_value_after_key::<DELIM>(v, l + 1, cfg, out);
268    }
269}
270
271// ==================== Helpers ====================
272
273const INDENTS: [&str; 9] = [
274    "",
275    "  ",
276    "    ",
277    "      ",
278    "        ",
279    "          ",
280    "            ",
281    "              ",
282    "                ",
283];
284
285#[inline]
286fn write_indent(level: usize, out: &mut String) {
287    if level < INDENTS.len() {
288        out.push_str(INDENTS[level]);
289    } else {
290        for _ in 0..(level * 2) {
291            out.push(' ');
292        }
293    }
294}
295
296fn is_scalar(v: &Value) -> bool {
297    !matches!(v.get_type(), JsonType::Object | JsonType::Array)
298}
299
300fn table_keys<'a>(arr: &'a Array) -> Option<(Vec<&'a str>, bool)> {
301    let first_v = arr.iter().next()?;
302    let first = first_v.as_object()?;
303    if first.is_empty() {
304        return None;
305    }
306    if !first.iter().all(|(_, v)| is_scalar(v)) {
307        return None;
308    }
309    let keys: Vec<&'a str> = first.iter().map(|(k, _)| k).collect();
310    let mut uniform_order = true;
311
312    for item in arr.iter().skip(1) {
313        let m = item.as_object()?;
314        if m.len() != keys.len() {
315            return None;
316        }
317        let mut row_iter = m.iter();
318        for k in &keys {
319            let (ik, iv) = row_iter.next()?;
320            if !is_scalar(iv) {
321                return None;
322            }
323            if ik != *k {
324                uniform_order = false;
325            }
326        }
327        if !uniform_order {
328            for k in &keys {
329                match m.get(k) {
330                    Some(v) if is_scalar(v) => {}
331                    _ => return None,
332                }
333            }
334        }
335    }
336    Some((keys, uniform_order))
337}
338
339// ==================== Scalar ====================
340
341#[inline]
342fn write_scalar<const DELIM: u8>(v: &Value, out: &mut String) {
343    match v.get_type() {
344        JsonType::Null => out.push_str("null"),
345        JsonType::Boolean => out.push_str(if v.as_bool().unwrap() {
346            "true"
347        } else {
348            "false"
349        }),
350        JsonType::Number => write_number(v, out),
351        JsonType::String => write_string_value::<DELIM>(v.as_str().unwrap(), out),
352        _ => unreachable!("write_scalar on non-scalar"),
353    }
354}
355
356fn write_number(v: &Value, out: &mut String) {
357    if let Some(i) = v.as_i64() {
358        let mut buf = itoa::Buffer::new();
359        out.push_str(buf.format(i));
360        return;
361    }
362    if let Some(u) = v.as_u64() {
363        let mut buf = itoa::Buffer::new();
364        out.push_str(buf.format(u));
365        return;
366    }
367    let raw = v.to_string();
368    if !raw.contains('.') && !raw.contains('e') && !raw.contains('E') {
369        out.push_str(&raw);
370        return;
371    }
372    if let Some(f) = v.as_f64() {
373        write_float(f, out);
374    } else {
375        out.push_str("null");
376    }
377}
378
379fn write_float(f: f64, out: &mut String) {
380    if !f.is_finite() {
381        out.push_str("null");
382        return;
383    }
384    if f == 0.0 {
385        out.push('0');
386        return;
387    }
388    if f.fract() == 0.0 && f.abs() < 1e16 {
389        let mut buf = itoa::Buffer::new();
390        out.push_str(buf.format(f as i64));
391        return;
392    }
393    write!(out, "{}", f).unwrap();
394}
395
396// ==================== String ====================
397
398#[inline]
399fn write_string_value<const DELIM: u8>(s: &str, out: &mut String) {
400    if value_needs_quoting::<DELIM>(s) {
401        write_quoted(s, out);
402    } else {
403        out.push_str(s);
404    }
405}
406
407fn write_key(k: &str, out: &mut String) {
408    if key_needs_quoting(k) {
409        write_quoted(k, out);
410    } else {
411        out.push_str(k);
412    }
413}
414
415/// Keys must match TOON identifier pattern: `[@$#a-zA-Z_][a-zA-Z0-9_.]*`.
416/// Sigil prefixes `@`, `$`, `#` are allowed for ecosystem compatibility:
417/// - `@` : AWS CloudWatch, Elasticsearch, Serilog, XML→JSON
418/// - `$` : MongoDB, JSON Schema, AWS CloudFormation
419/// - `#` : JSON-LD, Azure Resource Manager
420#[inline]
421fn key_needs_quoting(s: &str) -> bool {
422    if s.is_empty() {
423        return true;
424    }
425    let bytes = s.as_bytes();
426    let start = match bytes[0] {
427        b'@' | b'$' | b'#' => {
428            if bytes.len() < 2 {
429                return true; // bare sigil needs quoting
430            }
431            1
432        }
433        _ => 0,
434    };
435    let first = bytes[start];
436    if !(first.is_ascii_alphabetic() || first == b'_') {
437        return true;
438    }
439    for &b in &bytes[start + 1..] {
440        if !(b.is_ascii_alphanumeric() || b == b'_' || b == b'.') {
441            return true;
442        }
443    }
444    false
445}
446
447#[inline]
448fn value_needs_quoting<const DELIM: u8>(s: &str) -> bool {
449    if s.is_empty() {
450        return true;
451    }
452    let bytes = s.as_bytes();
453    match bytes[0] {
454        b'-' | b'[' | b'{' | b'"' | b'#' | b' ' | b'\t' => return true,
455        _ => {}
456    }
457    match bytes[bytes.len() - 1] {
458        b' ' | b'\t' => return true,
459        _ => {}
460    }
461    // DELIM is a compile-time constant, so this match collapses into the
462    // single match arm below when DELIM is in {',', '\t'} (already included),
463    // and stays as a separate branch only for DELIM = '|'.
464    for &b in bytes {
465        match b {
466            b':' | b'\n' | b'\r' | b'\t' | b'"' | b'\\' => return true,
467            _ if b == DELIM => return true,
468            _ => {}
469        }
470    }
471    if matches!(s, "true" | "false" | "null") {
472        return true;
473    }
474    looks_like_number(bytes)
475}
476
477fn looks_like_number(bytes: &[u8]) -> bool {
478    let mut i = 0;
479    if bytes[0] == b'-' {
480        i = 1;
481        if i == bytes.len() {
482            return false;
483        }
484    }
485    let mut has_digit = false;
486    while i < bytes.len() && bytes[i].is_ascii_digit() {
487        has_digit = true;
488        i += 1;
489    }
490    if !has_digit {
491        return false;
492    }
493    if i < bytes.len() && bytes[i] == b'.' {
494        i += 1;
495        let mut has_frac = false;
496        while i < bytes.len() && bytes[i].is_ascii_digit() {
497            has_frac = true;
498            i += 1;
499        }
500        if !has_frac {
501            return false;
502        }
503    }
504    if i < bytes.len() && (bytes[i] == b'e' || bytes[i] == b'E') {
505        i += 1;
506        if i < bytes.len() && (bytes[i] == b'+' || bytes[i] == b'-') {
507            i += 1;
508        }
509        let mut has_exp_digit = false;
510        while i < bytes.len() && bytes[i].is_ascii_digit() {
511            has_exp_digit = true;
512            i += 1;
513        }
514        if !has_exp_digit {
515            return false;
516        }
517    }
518    i == bytes.len()
519}
520
521fn write_quoted(s: &str, out: &mut String) {
522    out.push('"');
523    let bytes = s.as_bytes();
524    let mut start = 0;
525    for (i, &b) in bytes.iter().enumerate() {
526        if matches!(b, b'\\' | b'"' | b'\n' | b'\r' | b'\t') {
527            if start < i {
528                out.push_str(unsafe { std::str::from_utf8_unchecked(&bytes[start..i]) });
529            }
530            out.push_str(match b {
531                b'\\' => "\\\\",
532                b'"' => "\\\"",
533                b'\n' => "\\n",
534                b'\r' => "\\r",
535                b'\t' => "\\t",
536                _ => unreachable!(),
537            });
538            start = i + 1;
539        }
540    }
541    if start < bytes.len() {
542        out.push_str(unsafe { std::str::from_utf8_unchecked(&bytes[start..]) });
543    }
544    out.push('"');
545}