Skip to main content

_etoon/
toon.rs

1//! TOON encoder core (sonic-rs backend).
2//!
3//! Input: JSON bytes (from orjson.dumps on Python side).
4//! Output: TOON string, byte-identical to `toons.dumps()` for standard JSON payloads.
5
6use sonic_rs::{Array, JsonContainerTrait, JsonType, JsonValueTrait, Object, Value};
7use std::fmt::Write as _;
8
9pub fn encode(json_bytes: &[u8]) -> Result<String, String> {
10    let value: Value =
11        sonic_rs::from_slice(json_bytes).map_err(|e| format!("JSON parse error: {}", e))?;
12    // TOON output is always ≤ input JSON size; use input.len() as a safe upper bound.
13    let mut out = String::with_capacity(json_bytes.len());
14    write_root(&value, &mut out);
15    Ok(out)
16}
17
18fn write_root(v: &Value, out: &mut String) {
19    match v.get_type() {
20        JsonType::Object => {
21            let m = v.as_object().unwrap();
22            if !m.is_empty() {
23                write_object_body(m, 0, out);
24            }
25        }
26        JsonType::Array => write_array_suffix(v.as_array().unwrap(), 0, out),
27        _ => write_scalar(v, out),
28    }
29}
30
31fn write_object_body(m: &Object, indent: usize, out: &mut String) {
32    let mut first = true;
33    for (k, v) in m.iter() {
34        if !first {
35            out.push('\n');
36        }
37        first = false;
38        write_indent(indent, out);
39        write_key_value(k, v, indent, out);
40    }
41}
42
43fn write_key_value(k: &str, v: &Value, indent: usize, out: &mut String) {
44    write_key(k, out);
45    write_value_after_key(v, indent, out);
46}
47
48/// Write the ": value" or ":\n<body>" tail after a key at `key_indent`.
49/// Child object bodies go at `key_indent + 1`; array rows go at `key_indent + 1`
50/// (via write_array_suffix's internal `+ 1`).
51fn write_value_after_key(v: &Value, key_indent: usize, out: &mut String) {
52    match v.get_type() {
53        JsonType::Object => {
54            let child = v.as_object().unwrap();
55            if child.is_empty() {
56                out.push(':');
57            } else {
58                out.push_str(":\n");
59                write_object_body(child, key_indent + 1, out);
60            }
61        }
62        JsonType::Array => write_array_suffix(v.as_array().unwrap(), key_indent, out),
63        _ => {
64            out.push_str(": ");
65            write_scalar(v, out);
66        }
67    }
68}
69
70fn write_array_suffix(arr: &Array, indent: usize, out: &mut String) {
71    write!(out, "[{}]", arr.len()).unwrap();
72
73    if arr.is_empty() {
74        out.push(':');
75        return;
76    }
77
78    if arr.iter().all(is_scalar) {
79        out.push_str(": ");
80        let mut first = true;
81        for v in arr.iter() {
82            if !first {
83                out.push(',');
84            }
85            first = false;
86            write_scalar(v, out);
87        }
88        return;
89    }
90
91    if let Some((keys, uniform_order)) = table_keys(arr) {
92        out.push('{');
93        for (i, k) in keys.iter().enumerate() {
94            if i > 0 {
95                out.push(',');
96            }
97            write_key(k, out);
98        }
99        out.push_str("}:");
100        if uniform_order {
101            // Fast path: all rows have keys in the same order as header.
102            // Iterate sequentially, no key lookups.
103            for item in arr.iter() {
104                let m = item.as_object().unwrap();
105                out.push('\n');
106                write_indent(indent + 1, out);
107                let mut first = true;
108                for (_, v) in m.iter() {
109                    if !first {
110                        out.push(',');
111                    }
112                    first = false;
113                    write_scalar(v, out);
114                }
115            }
116        } else {
117            // Slow path: row orders differ, lookup per key.
118            for item in arr.iter() {
119                let m = item.as_object().unwrap();
120                out.push('\n');
121                write_indent(indent + 1, out);
122                let mut first = true;
123                for k in &keys {
124                    if !first {
125                        out.push(',');
126                    }
127                    first = false;
128                    write_scalar(m.get(k).unwrap(), out);
129                }
130            }
131        }
132        return;
133    }
134
135    out.push(':');
136    for item in arr.iter() {
137        out.push('\n');
138        write_indent(indent + 1, out);
139        out.push('-');
140        write_list_item(item, indent + 1, out);
141    }
142}
143
144fn write_list_item(v: &Value, l: usize, out: &mut String) {
145    match v.get_type() {
146        JsonType::Object => {
147            let m = v.as_object().unwrap();
148            if !m.is_empty() {
149                out.push(' ');
150                write_list_item_object(m, l, out);
151            }
152        }
153        JsonType::Array => {
154            out.push(' ');
155            write_array_suffix(v.as_array().unwrap(), l, out);
156        }
157        _ => {
158            out.push(' ');
159            write_scalar(v, out);
160        }
161    }
162}
163
164fn write_list_item_object(m: &Object, l: usize, out: &mut String) {
165    let mut first = true;
166    for (k, v) in m.iter() {
167        if !first {
168            out.push('\n');
169            write_indent(l + 1, out);
170        }
171        first = false;
172        write_key(k, out);
173        // List-item's first key sits at virtual indent l+1, so pass l+1 as key_indent.
174        write_value_after_key(v, l + 1, out);
175    }
176}
177
178// ==================== Helpers ====================
179
180// Pre-computed indent strings for common depths (0-8 levels).
181const INDENTS: [&str; 9] = [
182    "",
183    "  ",
184    "    ",
185    "      ",
186    "        ",
187    "          ",
188    "            ",
189    "              ",
190    "                ",
191];
192
193#[inline]
194fn write_indent(level: usize, out: &mut String) {
195    if level < INDENTS.len() {
196        out.push_str(INDENTS[level]);
197    } else {
198        for _ in 0..(level * 2) {
199            out.push(' ');
200        }
201    }
202}
203
204fn is_scalar(v: &Value) -> bool {
205    !matches!(v.get_type(), JsonType::Object | JsonType::Array)
206}
207
208/// Return ordered keys + order-uniformity flag if array is tabular-eligible.
209/// `uniform_order = true` means every row has keys in the exact same order as the header,
210/// allowing sequential iteration without key lookups.
211fn table_keys<'a>(arr: &'a Array) -> Option<(Vec<&'a str>, bool)> {
212    let first_v = arr.iter().next()?;
213    let first = first_v.as_object()?;
214    if first.is_empty() {
215        return None;
216    }
217    if !first.iter().all(|(_, v)| is_scalar(v)) {
218        return None;
219    }
220    let keys: Vec<&'a str> = first.iter().map(|(k, _)| k).collect();
221    let mut uniform_order = true;
222
223    for item in arr.iter().skip(1) {
224        let m = item.as_object()?;
225        if m.len() != keys.len() {
226            return None;
227        }
228        let mut row_iter = m.iter();
229        for k in &keys {
230            let (ik, iv) = row_iter.next()?;
231            if !is_scalar(iv) {
232                return None;
233            }
234            if ik != *k {
235                uniform_order = false;
236            }
237        }
238        // Order mismatch: re-verify via lookup that every header key exists in this row.
239        if !uniform_order {
240            for k in &keys {
241                match m.get(k) {
242                    Some(v) if is_scalar(v) => {}
243                    _ => return None,
244                }
245            }
246        }
247    }
248    Some((keys, uniform_order))
249}
250
251// ==================== Scalar ====================
252
253fn write_scalar(v: &Value, out: &mut String) {
254    match v.get_type() {
255        JsonType::Null => out.push_str("null"),
256        JsonType::Boolean => out.push_str(if v.as_bool().unwrap() {
257            "true"
258        } else {
259            "false"
260        }),
261        JsonType::Number => write_number(v, out),
262        JsonType::String => write_string_value(v.as_str().unwrap(), out),
263        _ => unreachable!("write_scalar on non-scalar"),
264    }
265}
266
267fn write_number(v: &Value, out: &mut String) {
268    if let Some(i) = v.as_i64() {
269        let mut buf = itoa::Buffer::new();
270        out.push_str(buf.format(i));
271        return;
272    }
273    if let Some(u) = v.as_u64() {
274        let mut buf = itoa::Buffer::new();
275        out.push_str(buf.format(u));
276        return;
277    }
278    if let Some(f) = v.as_f64() {
279        write_float(f, out);
280    } else {
281        out.push_str("null");
282    }
283}
284
285fn write_float(f: f64, out: &mut String) {
286    if !f.is_finite() {
287        out.push_str("null");
288        return;
289    }
290    if f == 0.0 {
291        out.push('0');
292        return;
293    }
294    if f.fract() == 0.0 && f.abs() < 1e16 {
295        let mut buf = itoa::Buffer::new();
296        out.push_str(buf.format(f as i64));
297        return;
298    }
299    write!(out, "{}", f).unwrap();
300}
301
302// ==================== String ====================
303
304fn write_string_value(s: &str, out: &mut String) {
305    if needs_quoting(s, false) {
306        write_quoted(s, out);
307    } else {
308        out.push_str(s);
309    }
310}
311
312fn write_key(k: &str, out: &mut String) {
313    if needs_quoting(k, true) {
314        write_quoted(k, out);
315    } else {
316        out.push_str(k);
317    }
318}
319
320fn needs_quoting(s: &str, is_key: bool) -> bool {
321    if s.is_empty() {
322        return true;
323    }
324    let bytes = s.as_bytes();
325    match bytes[0] {
326        b'-' | b'[' | b'{' | b'"' | b'#' | b' ' | b'\t' => return true,
327        _ => {}
328    }
329    match bytes[bytes.len() - 1] {
330        b' ' | b'\t' => return true,
331        _ => {}
332    }
333    for &b in bytes {
334        match b {
335            b',' | b':' | b'\n' | b'\r' | b'\t' | b'"' | b'\\' => return true,
336            b' ' if is_key => return true,
337            _ => {}
338        }
339    }
340    if matches!(s, "true" | "false" | "null") {
341        return true;
342    }
343    looks_like_number(bytes)
344}
345
346fn looks_like_number(bytes: &[u8]) -> bool {
347    let mut i = 0;
348    if bytes[0] == b'-' {
349        i = 1;
350        if i == bytes.len() {
351            return false;
352        }
353    }
354    let mut has_digit = false;
355    while i < bytes.len() && bytes[i].is_ascii_digit() {
356        has_digit = true;
357        i += 1;
358    }
359    if !has_digit {
360        return false;
361    }
362    if i < bytes.len() && bytes[i] == b'.' {
363        i += 1;
364        let mut has_frac = false;
365        while i < bytes.len() && bytes[i].is_ascii_digit() {
366            has_frac = true;
367            i += 1;
368        }
369        if !has_frac {
370            return false;
371        }
372    }
373    if i < bytes.len() && (bytes[i] == b'e' || bytes[i] == b'E') {
374        i += 1;
375        if i < bytes.len() && (bytes[i] == b'+' || bytes[i] == b'-') {
376            i += 1;
377        }
378        let mut has_exp_digit = false;
379        while i < bytes.len() && bytes[i].is_ascii_digit() {
380            has_exp_digit = true;
381            i += 1;
382        }
383        if !has_exp_digit {
384            return false;
385        }
386    }
387    i == bytes.len()
388}
389
390fn write_quoted(s: &str, out: &mut String) {
391    // Fast path: bulk-copy spans between escape chars using memchr-like scan.
392    // Escape bytes: '\\' 0x5c, '"' 0x22, '\n' 0x0a, '\r' 0x0d, '\t' 0x09
393    out.push('"');
394    let bytes = s.as_bytes();
395    let mut start = 0;
396    for (i, &b) in bytes.iter().enumerate() {
397        if matches!(b, b'\\' | b'"' | b'\n' | b'\r' | b'\t') {
398            if start < i {
399                // SAFETY: start..i is bounded by ASCII escape char positions;
400                // UTF-8 boundaries are preserved since escape chars are single-byte ASCII.
401                out.push_str(unsafe { std::str::from_utf8_unchecked(&bytes[start..i]) });
402            }
403            out.push_str(match b {
404                b'\\' => "\\\\",
405                b'"' => "\\\"",
406                b'\n' => "\\n",
407                b'\r' => "\\r",
408                b'\t' => "\\t",
409                _ => unreachable!(),
410            });
411            start = i + 1;
412        }
413    }
414    if start < bytes.len() {
415        out.push_str(unsafe { std::str::from_utf8_unchecked(&bytes[start..]) });
416    }
417    out.push('"');
418}