Skip to main content

_etoon/
toon.rs

1//! TOON encoder core (sonic-rs backend).
2//!
3//! Input: JSON bytes (from orjson.dumps on Python side).
4//! Output: TOON string, byte-identical to `toons.dumps()` for standard JSON payloads.
5
6use sonic_rs::{Array, JsonContainerTrait, JsonType, JsonValueTrait, Object, Value};
7use std::fmt::Write as _;
8
9pub fn encode(json_bytes: &[u8]) -> Result<String, String> {
10    let value: Value =
11        sonic_rs::from_slice(json_bytes).map_err(|e| format!("JSON parse error: {}", e))?;
12    let mut out = String::with_capacity(json_bytes.len().saturating_mul(3) / 4);
13    write_root(&value, &mut out);
14    Ok(out)
15}
16
17fn write_root(v: &Value, out: &mut String) {
18    match v.get_type() {
19        JsonType::Object => {
20            let m = v.as_object().unwrap();
21            if !m.is_empty() {
22                write_object_body(m, 0, out);
23            }
24        }
25        JsonType::Array => write_array_suffix(v.as_array().unwrap(), 0, out),
26        _ => write_scalar(v, out),
27    }
28}
29
30fn write_object_body(m: &Object, indent: usize, out: &mut String) {
31    let mut first = true;
32    for (k, v) in m.iter() {
33        if !first {
34            out.push('\n');
35        }
36        first = false;
37        write_indent(indent, out);
38        write_key_value(k, v, indent, out);
39    }
40}
41
42fn write_key_value(k: &str, v: &Value, indent: usize, out: &mut String) {
43    write_key(k, out);
44    match v.get_type() {
45        JsonType::Object => {
46            let child = v.as_object().unwrap();
47            if child.is_empty() {
48                out.push(':');
49            } else {
50                out.push_str(":\n");
51                write_object_body(child, indent + 1, out);
52            }
53        }
54        JsonType::Array => write_array_suffix(v.as_array().unwrap(), indent, out),
55        _ => {
56            out.push_str(": ");
57            write_scalar(v, out);
58        }
59    }
60}
61
62fn write_array_suffix(arr: &Array, indent: usize, out: &mut String) {
63    write!(out, "[{}]", arr.len()).unwrap();
64
65    if arr.is_empty() {
66        out.push(':');
67        return;
68    }
69
70    // All scalars → inline
71    if arr.iter().all(is_scalar) {
72        out.push_str(": ");
73        let mut first = true;
74        for v in arr.iter() {
75            if !first {
76                out.push(',');
77            }
78            first = false;
79            write_scalar(v, out);
80        }
81        return;
82    }
83
84    // Tabular
85    if let Some((keys, uniform_order)) = table_keys(arr) {
86        out.push('{');
87        for (i, k) in keys.iter().enumerate() {
88            if i > 0 {
89                out.push(',');
90            }
91            write_key(k, out);
92        }
93        out.push_str("}:");
94        if uniform_order {
95            // Fast path: all rows have keys in the same order as header.
96            // Iterate sequentially, no key lookups.
97            for item in arr.iter() {
98                let m = item.as_object().unwrap();
99                out.push('\n');
100                write_indent(indent + 1, out);
101                let mut first = true;
102                for (_, v) in m.iter() {
103                    if !first {
104                        out.push(',');
105                    }
106                    first = false;
107                    write_scalar(v, out);
108                }
109            }
110        } else {
111            // Slow path: row orders differ, lookup per key.
112            for item in arr.iter() {
113                let m = item.as_object().unwrap();
114                out.push('\n');
115                write_indent(indent + 1, out);
116                let mut first = true;
117                for k in &keys {
118                    if !first {
119                        out.push(',');
120                    }
121                    first = false;
122                    write_scalar(m.get(&k.as_str()).unwrap(), out);
123                }
124            }
125        }
126        return;
127    }
128
129    // Bulleted fallback
130    out.push(':');
131    for item in arr.iter() {
132        out.push('\n');
133        write_indent(indent + 1, out);
134        out.push('-');
135        write_list_item(item, indent + 1, out);
136    }
137}
138
139fn write_list_item(v: &Value, l: usize, out: &mut String) {
140    match v.get_type() {
141        JsonType::Object => {
142            let m = v.as_object().unwrap();
143            if !m.is_empty() {
144                out.push(' ');
145                write_list_item_object(m, l, out);
146            }
147        }
148        JsonType::Array => {
149            out.push(' ');
150            write_array_suffix(v.as_array().unwrap(), l, out);
151        }
152        _ => {
153            out.push(' ');
154            write_scalar(v, out);
155        }
156    }
157}
158
159fn write_list_item_object(m: &Object, l: usize, out: &mut String) {
160    let mut first = true;
161    for (k, v) in m.iter() {
162        if !first {
163            out.push('\n');
164            write_indent(l + 1, out);
165        }
166        first = false;
167        write_key(k, out);
168        match v.get_type() {
169            JsonType::Object => {
170                let child = v.as_object().unwrap();
171                if child.is_empty() {
172                    out.push(':');
173                } else {
174                    out.push_str(":\n");
175                    write_object_body(child, l + 2, out);
176                }
177            }
178            JsonType::Array => write_array_suffix(v.as_array().unwrap(), l + 1, out),
179            _ => {
180                out.push_str(": ");
181                write_scalar(v, out);
182            }
183        }
184    }
185}
186
187// ==================== Helpers ====================
188
189// Pre-computed indent strings for common depths (0-8 levels).
190const INDENTS: [&str; 9] = [
191    "",
192    "  ",
193    "    ",
194    "      ",
195    "        ",
196    "          ",
197    "            ",
198    "              ",
199    "                ",
200];
201
202#[inline]
203fn write_indent(level: usize, out: &mut String) {
204    if level < INDENTS.len() {
205        out.push_str(INDENTS[level]);
206    } else {
207        for _ in 0..(level * 2) {
208            out.push(' ');
209        }
210    }
211}
212
213fn is_scalar(v: &Value) -> bool {
214    !matches!(v.get_type(), JsonType::Object | JsonType::Array)
215}
216
217/// Return ordered keys + order-uniformity flag if array is tabular-eligible.
218/// `uniform_order = true` means every row has keys in the exact same order as the header,
219/// allowing sequential iteration without key lookups.
220fn table_keys(arr: &Array) -> Option<(Vec<String>, bool)> {
221    let first_v = arr.iter().next()?;
222    let first = first_v.as_object()?;
223    if first.is_empty() {
224        return None;
225    }
226    if !first.iter().all(|(_, v)| is_scalar(v)) {
227        return None;
228    }
229    let keys: Vec<String> = first.iter().map(|(k, _)| k.to_string()).collect();
230    let mut uniform_order = true;
231
232    for item in arr.iter().skip(1) {
233        let m = item.as_object()?;
234        if m.len() != keys.len() {
235            return None;
236        }
237        // Check order + scalar values in single pass
238        let mut row_iter = m.iter();
239        for k in &keys {
240            let (ik, iv) = row_iter.next()?;
241            if !is_scalar(iv) {
242                return None;
243            }
244            if ik != k.as_str() {
245                uniform_order = false;
246                // Can't early-return — still need to verify all keys exist and values scalar.
247            }
248        }
249        // If order mismatch, do a final pass to confirm keys exist
250        if !uniform_order {
251            for k in &keys {
252                match m.get(&k.as_str()) {
253                    Some(v) if is_scalar(v) => {}
254                    _ => return None,
255                }
256            }
257        }
258    }
259    Some((keys, uniform_order))
260}
261
262// ==================== Scalar ====================
263
264fn write_scalar(v: &Value, out: &mut String) {
265    match v.get_type() {
266        JsonType::Null => out.push_str("null"),
267        JsonType::Boolean => out.push_str(if v.as_bool().unwrap() {
268            "true"
269        } else {
270            "false"
271        }),
272        JsonType::Number => write_number(v, out),
273        JsonType::String => write_string_value(v.as_str().unwrap(), out),
274        _ => unreachable!("write_scalar on non-scalar"),
275    }
276}
277
278fn write_number(v: &Value, out: &mut String) {
279    if let Some(i) = v.as_i64() {
280        let mut buf = itoa::Buffer::new();
281        out.push_str(buf.format(i));
282        return;
283    }
284    if let Some(u) = v.as_u64() {
285        let mut buf = itoa::Buffer::new();
286        out.push_str(buf.format(u));
287        return;
288    }
289    if let Some(f) = v.as_f64() {
290        write_float(f, out);
291    } else {
292        out.push_str("null");
293    }
294}
295
296fn write_float(f: f64, out: &mut String) {
297    if !f.is_finite() {
298        out.push_str("null");
299        return;
300    }
301    if f == 0.0 {
302        out.push('0');
303        return;
304    }
305    if f.fract() == 0.0 && f.abs() < 1e16 {
306        let mut buf = itoa::Buffer::new();
307        out.push_str(buf.format(f as i64));
308        return;
309    }
310    write!(out, "{}", f).unwrap();
311}
312
313// ==================== String ====================
314
315fn write_string_value(s: &str, out: &mut String) {
316    if needs_quoting(s, false) {
317        write_quoted(s, out);
318    } else {
319        out.push_str(s);
320    }
321}
322
323fn write_key(k: &str, out: &mut String) {
324    if needs_quoting(k, true) {
325        write_quoted(k, out);
326    } else {
327        out.push_str(k);
328    }
329}
330
331fn needs_quoting(s: &str, is_key: bool) -> bool {
332    if s.is_empty() {
333        return true;
334    }
335    let bytes = s.as_bytes();
336    match bytes[0] {
337        b'-' | b'[' | b'{' | b'"' | b'#' | b' ' | b'\t' => return true,
338        _ => {}
339    }
340    match bytes[bytes.len() - 1] {
341        b' ' | b'\t' => return true,
342        _ => {}
343    }
344    for &b in bytes {
345        match b {
346            b',' | b':' | b'\n' | b'\r' | b'\t' | b'"' | b'\\' => return true,
347            b' ' if is_key => return true,
348            _ => {}
349        }
350    }
351    if matches!(s, "true" | "false" | "null") {
352        return true;
353    }
354    looks_like_number(bytes)
355}
356
357fn looks_like_number(bytes: &[u8]) -> bool {
358    let mut i = 0;
359    if bytes[0] == b'-' {
360        i = 1;
361        if i == bytes.len() {
362            return false;
363        }
364    }
365    let mut has_digit = false;
366    while i < bytes.len() && bytes[i].is_ascii_digit() {
367        has_digit = true;
368        i += 1;
369    }
370    if !has_digit {
371        return false;
372    }
373    if i < bytes.len() && bytes[i] == b'.' {
374        i += 1;
375        let mut has_frac = false;
376        while i < bytes.len() && bytes[i].is_ascii_digit() {
377            has_frac = true;
378            i += 1;
379        }
380        if !has_frac {
381            return false;
382        }
383    }
384    if i < bytes.len() && (bytes[i] == b'e' || bytes[i] == b'E') {
385        i += 1;
386        if i < bytes.len() && (bytes[i] == b'+' || bytes[i] == b'-') {
387            i += 1;
388        }
389        let mut has_exp_digit = false;
390        while i < bytes.len() && bytes[i].is_ascii_digit() {
391            has_exp_digit = true;
392            i += 1;
393        }
394        if !has_exp_digit {
395            return false;
396        }
397    }
398    i == bytes.len()
399}
400
401fn write_quoted(s: &str, out: &mut String) {
402    // Fast path: bulk-copy spans between escape chars using memchr-like scan.
403    // Escape bytes: '\\' 0x5c, '"' 0x22, '\n' 0x0a, '\r' 0x0d, '\t' 0x09
404    out.push('"');
405    let bytes = s.as_bytes();
406    let mut start = 0;
407    for (i, &b) in bytes.iter().enumerate() {
408        if matches!(b, b'\\' | b'"' | b'\n' | b'\r' | b'\t') {
409            if start < i {
410                // SAFETY: start..i is bounded by ASCII escape char positions;
411                // UTF-8 boundaries are preserved since escape chars are single-byte ASCII.
412                out.push_str(unsafe { std::str::from_utf8_unchecked(&bytes[start..i]) });
413            }
414            out.push_str(match b {
415                b'\\' => "\\\\",
416                b'"' => "\\\"",
417                b'\n' => "\\n",
418                b'\r' => "\\r",
419                b'\t' => "\\t",
420                _ => unreachable!(),
421            });
422            start = i + 1;
423        }
424    }
425    if start < bytes.len() {
426        out.push_str(unsafe { std::str::from_utf8_unchecked(&bytes[start..]) });
427    }
428    out.push('"');
429}