Skip to main content

toon_encode/
lib.rs

1//! Minimal TOON encoder — Token-Oriented Object Notation.
2//!
3//! TOON is a compact, human-readable encoding of JSON data for LLM prompts.
4//! It declares field names once in tabular headers and uses CSV-like rows,
5//! reducing token consumption by 30-50% for list-heavy responses.
6//!
7//! # Examples
8//!
9//! ```
10//! use serde::Serialize;
11//!
12//! #[derive(Serialize)]
13//! struct Item { name: String, value: i32 }
14//!
15//! let items = vec![
16//!     Item { name: "alpha".into(), value: 1 },
17//!     Item { name: "beta".into(), value: 2 },
18//! ];
19//! let toon = toon_encode::to_toon_string(&items).unwrap();
20//! assert!(toon.contains("[2]{name,value}:"));
21//! ```
22
23use serde_json::Value;
24
25const INDENT: &str = "  ";
26const TOON_SPECIAL: &[char] = &[',', ':', '"', '\\', '[', ']', '{', '}', '\n', '\r', '\t'];
27
28/// Encode any `Serialize` type as a TOON string.
29///
30/// Internally converts to `serde_json::Value` first, then encodes as TOON.
31pub fn to_toon_string<T: serde::Serialize>(value: &T) -> Result<String, serde_json::Error> {
32    let json_value = serde_json::to_value(value)?;
33    Ok(encode_toon(&json_value, 0))
34}
35
36/// Encode a `serde_json::Value` as a TOON string at the given indentation depth.
37pub fn encode_toon(value: &Value, depth: usize) -> String {
38    let indent = INDENT.repeat(depth);
39    match value {
40        Value::Null => "null".to_string(),
41        Value::Bool(b) => b.to_string(),
42        Value::Number(n) => n.to_string(),
43        Value::String(s) => toon_quote(s),
44        Value::Array(arr) if is_tabular(arr) => encode_tabular(arr, depth),
45        Value::Array(arr) => encode_list(arr, depth),
46        Value::Object(obj) => {
47            let mut lines = Vec::new();
48            obj.iter().for_each(|(k, v)| match v {
49                Value::Object(_) | Value::Array(_) => {
50                    lines.push(format!("{indent}{}:", toon_quote(k)));
51                    let child = encode_toon(v, depth + 1);
52                    lines.push(child);
53                }
54                _ => lines.push(format!("{indent}{}: {}", toon_quote(k), encode_toon(v, 0))),
55            });
56            lines.join("\n")
57        }
58    }
59}
60
61/// Check if an array is tabular (all elements are objects with identical key sets
62/// and only primitive values).
63fn is_tabular(arr: &[Value]) -> bool {
64    if arr.is_empty() {
65        return false;
66    }
67    let Some(Value::Object(first)) = arr.first() else {
68        return false;
69    };
70    let all_primitive =
71        |o: &serde_json::Map<String, Value>| o.values().all(|v| !v.is_object() && !v.is_array());
72    if !all_primitive(first) {
73        return false;
74    }
75    let keys: Vec<&String> = first.keys().collect();
76    arr[1..].iter().all(|v| {
77        v.as_object()
78            .map(|o| {
79                o.len() == keys.len()
80                    && keys.iter().all(|k| o.contains_key(k.as_str()))
81                    && all_primitive(o)
82            })
83            .unwrap_or(false)
84    })
85}
86
87/// Encode a tabular array as TOON with header row.
88fn encode_tabular(arr: &[Value], depth: usize) -> String {
89    let indent = INDENT.repeat(depth);
90    let row_indent = INDENT.repeat(depth + 1);
91    let Some(first) = arr[0].as_object() else {
92        return String::new();
93    };
94    let fields: Vec<&String> = first.keys().collect();
95    let header = fields
96        .iter()
97        .map(|f| f.as_str())
98        .collect::<Vec<_>>()
99        .join(",");
100    let mut lines = vec![format!("{indent}[{}]{{{header}}}:", arr.len())];
101    arr.iter().for_each(|row| {
102        let Some(obj) = row.as_object() else { return };
103        let vals: Vec<String> = fields
104            .iter()
105            .map(|f| encode_toon(&obj[f.as_str()], 0))
106            .collect();
107        lines.push(format!("{row_indent}{}", vals.join(",")));
108    });
109    lines.join("\n")
110}
111
112/// Encode a non-tabular array as TOON list.
113fn encode_list(arr: &[Value], depth: usize) -> String {
114    let row_indent = INDENT.repeat(depth);
115    let mut lines = Vec::new();
116    arr.iter().for_each(|v| {
117        let encoded = encode_toon(v, depth + 1);
118        if encoded.contains('\n') {
119            lines.push(format!("{row_indent}-"));
120            lines.push(encoded);
121        } else {
122            lines.push(format!("{row_indent}- {encoded}"));
123        }
124    });
125    lines.join("\n")
126}
127
128/// Quote a string if it contains TOON special characters, is empty, or starts with `-`.
129fn toon_quote(s: &str) -> String {
130    if s.is_empty() || s.starts_with('-') || s.contains(TOON_SPECIAL) {
131        let escaped = s
132            .replace('\\', "\\\\")
133            .replace('"', "\\\"")
134            .replace('\n', "\\n")
135            .replace('\r', "\\r")
136            .replace('\t', "\\t");
137        format!("\"{escaped}\"")
138    } else {
139        s.to_string()
140    }
141}
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146    use serde::Serialize;
147    use serde_json::json;
148
149    // ── Quote tests ──────────────────────────────────────────
150
151    #[test]
152    fn quote_plain() {
153        assert_eq!(toon_quote("hello"), "hello");
154        assert_eq!(toon_quote("foo_bar"), "foo_bar");
155    }
156
157    #[test]
158    fn quote_special_chars() {
159        assert_eq!(toon_quote("a,b"), "\"a,b\"");
160        assert_eq!(toon_quote("key: val"), "\"key: val\"");
161        assert_eq!(toon_quote(""), "\"\"");
162    }
163
164    #[test]
165    fn quote_control_chars() {
166        assert_eq!(toon_quote("line1\nline2"), "\"line1\\nline2\"");
167        assert_eq!(toon_quote("col1\tcol2"), "\"col1\\tcol2\"");
168        assert_eq!(toon_quote("cr\rhere"), "\"cr\\rhere\"");
169    }
170
171    #[test]
172    fn tabular_with_newline_values() {
173        let val = json!([
174            {"name": "a", "body": "line1\nline2"},
175            {"name": "b", "body": "single"},
176        ]);
177        let toon = encode_toon(&val, 0);
178        // Newline in value must be escaped, not break the row format
179        assert!(
180            toon.contains("\"line1\\nline2\""),
181            "newline should be escaped, got: {toon}"
182        );
183        assert_eq!(
184            toon.lines().count(),
185            3,
186            "should be header + 2 rows, got: {toon}"
187        );
188    }
189
190    #[test]
191    fn quote_dash_start() {
192        assert_eq!(toon_quote("-flag"), "\"-flag\"");
193    }
194
195    #[test]
196    fn quote_escapes() {
197        assert_eq!(toon_quote("say \"hi\""), "\"say \\\"hi\\\"\"");
198        assert_eq!(toon_quote("a\\b"), "\"a\\\\b\"");
199    }
200
201    // ── Primitive tests ──────────────────────────────────────
202
203    #[test]
204    fn encode_primitives() {
205        assert_eq!(encode_toon(&json!(null), 0), "null");
206        assert_eq!(encode_toon(&json!(true), 0), "true");
207        assert_eq!(encode_toon(&json!(42), 0), "42");
208        assert_eq!(encode_toon(&json!("hello"), 0), "hello");
209        assert_eq!(encode_toon(&json!("a,b"), 0), "\"a,b\"");
210    }
211
212    // ── Object tests ─────────────────────────────────────────
213
214    #[test]
215    fn encode_flat_object() {
216        let val = json!({"version": "0.5.5", "findings": 0});
217        let toon = encode_toon(&val, 0);
218        assert!(toon.contains("version: 0.5.5"), "got: {toon}");
219        assert!(toon.contains("findings: 0"), "got: {toon}");
220    }
221
222    // ── Tabular array tests ──────────────────────────────────
223
224    #[test]
225    fn encode_tabular_array() {
226        let val = json!([
227            {"name": "IOSP", "pct": 100.0},
228            {"name": "CX", "pct": 99.8},
229        ]);
230        let toon = encode_toon(&val, 0);
231        assert!(
232            toon.contains("[2]{name,pct}:"),
233            "should have tabular header, got: {toon}"
234        );
235        assert!(toon.contains("IOSP,100.0"), "got: {toon}");
236        assert!(toon.contains("CX,99.8"), "got: {toon}");
237    }
238
239    #[test]
240    fn encode_empty_array_not_tabular() {
241        let val = json!([]);
242        let toon = encode_toon(&val, 0);
243        assert_eq!(toon, "");
244    }
245
246    // ── Non-tabular array tests ──────────────────────────────
247
248    #[test]
249    fn encode_primitive_list() {
250        let val = json!(["a", "b", "c"]);
251        let toon = encode_toon(&val, 0);
252        assert!(toon.contains("- a"), "got: {toon}");
253        assert!(toon.contains("- b"), "got: {toon}");
254    }
255
256    #[test]
257    fn encode_list_indentation() {
258        let val = json!({"items": ["x", "y"]});
259        let toon = encode_toon(&val, 0);
260        assert!(toon.contains("items:"), "got: {toon}");
261        assert!(
262            toon.contains("  - x"),
263            "items should be at 2-space indent, got: {toon}"
264        );
265    }
266
267    // ── Serde integration tests ──────────────────────────────
268
269    #[test]
270    fn to_toon_string_with_struct() {
271        #[derive(Serialize)]
272        struct Item {
273            name: String,
274            value: i32,
275        }
276        let items = vec![
277            Item {
278                name: "alpha".into(),
279                value: 1,
280            },
281            Item {
282                name: "beta".into(),
283                value: 2,
284            },
285        ];
286        let toon = to_toon_string(&items).unwrap();
287        assert!(toon.contains("[2]{name,value}:"), "got: {toon}");
288        assert!(toon.contains("alpha,1"), "got: {toon}");
289        assert!(toon.contains("beta,2"), "got: {toon}");
290    }
291
292    #[test]
293    fn to_toon_string_with_nested_struct() {
294        #[derive(Serialize)]
295        struct Wrapper {
296            results: Vec<Entry>,
297            count: usize,
298        }
299        #[derive(Serialize)]
300        struct Entry {
301            file: String,
302            kind: String,
303        }
304        let w = Wrapper {
305            results: vec![
306                Entry {
307                    file: "a.rs".into(),
308                    kind: "fn".into(),
309                },
310                Entry {
311                    file: "b.rs".into(),
312                    kind: "struct".into(),
313                },
314            ],
315            count: 2,
316        };
317        let toon = to_toon_string(&w).unwrap();
318        assert!(toon.contains("results:"), "got: {toon}");
319        assert!(toon.contains("[2]{file,kind}:"), "got: {toon}");
320        assert!(toon.contains("count: 2"), "got: {toon}");
321    }
322}