totoon/
lib.rs

1//! totoon - Convert any data format to TOON (Token-Oriented Object Notation)
2//!
3//! TOON is a compact data format that reduces token usage by 30-60% compared to JSON
4//! when interfacing with Large Language Models (LLMs).
5
6use serde_json::Value;
7use std::collections::HashMap;
8
9/// Convert a serde_json::Value to TOON format string
10///
11/// # Examples
12///
13/// ```
14/// use totoon::to_toon;
15/// use serde_json::json;
16///
17/// let data = json!({
18///     "name": "Alice",
19///     "age": 30
20/// });
21///
22/// let toon = to_toon(&data);
23/// println!("{}", toon);
24/// ```
25pub fn to_toon(value: &Value) -> String {
26    to_toon_with_indent(value, 2, 0)
27}
28
29/// Convert a serde_json::Value to TOON format with custom indentation
30pub fn to_toon_with_indent(value: &Value, indent: usize, level: usize) -> String {
31    match value {
32        Value::Null => "null".to_string(),
33        Value::Bool(b) => b.to_string(),
34        Value::Number(n) => n.to_string(),
35        Value::String(s) => escape_string(s),
36        Value::Array(arr) => {
37            if arr.is_empty() {
38                return "[]".to_string();
39            }
40            // Check if it's a list of objects (tabular format)
41            if let Some(Value::Object(_)) = arr.first() {
42                if arr.iter().all(|v| v.is_object()) {
43                    return list_of_objects_to_toon("", arr, indent, level);
44                }
45            }
46            list_to_toon(arr, indent, level)
47        }
48        Value::Object(obj) => dict_to_toon(obj, indent, level),
49    }
50}
51
52/// Convert JSON string to TOON format
53///
54/// # Examples
55///
56/// ```
57/// use totoon::json_to_toon;
58///
59/// let json_str = r#"{"name": "Alice", "age": 30}"#;
60/// let toon = json_to_toon(json_str).unwrap();
61/// println!("{}", toon);
62/// ```
63pub fn json_to_toon(json_str: &str) -> Result<String, serde_json::Error> {
64    let value: Value = serde_json::from_str(json_str)?;
65    Ok(to_toon(&value))
66}
67
68fn dict_to_toon(obj: &serde_json::Map<String, Value>, indent: usize, level: usize) -> String {
69    if obj.is_empty() {
70        return "{}".to_string();
71    }
72
73    let mut lines = Vec::new();
74    let prefix = " ".repeat(indent * level);
75
76    for (key, value) in obj {
77        match value {
78            Value::Object(inner_obj) if !inner_obj.is_empty() => {
79                lines.push(format!("{}{}:", prefix, key));
80                lines.push(dict_to_toon(inner_obj, indent, level + 1));
81            }
82            Value::Array(arr) if !arr.is_empty() => {
83                // Check if it's a list of objects
84                if let Some(Value::Object(_)) = arr.first() {
85                    if arr.iter().all(|v| v.is_object()) {
86                        lines.push(list_of_objects_to_toon(key, arr, indent, level));
87                        continue;
88                    }
89                }
90                lines.push(format!("{}{}:", prefix, key));
91                lines.push(list_to_toon(arr, indent, level + 1));
92            }
93            _ => {
94                let value_str = value_to_toon(value, indent, level + 1);
95                lines.push(format!("{}{}: {}", prefix, key, value_str));
96            }
97        }
98    }
99
100    lines.join("\n")
101}
102
103fn list_to_toon(arr: &[Value], indent: usize, level: usize) -> String {
104    if arr.is_empty() {
105        return "[]".to_string();
106    }
107
108    let mut lines = Vec::new();
109    let prefix = " ".repeat(indent * level);
110
111    for item in arr {
112        let value_str = value_to_toon(item, indent, level);
113        lines.push(format!("{}- {}", prefix, value_str));
114    }
115
116    lines.join("\n")
117}
118
119fn list_of_objects_to_toon(
120    key: &str,
121    arr: &[Value],
122    indent: usize,
123    level: usize,
124) -> String {
125    if arr.is_empty() {
126        return "[]".to_string();
127    }
128
129    let mut lines = Vec::new();
130    let prefix = " ".repeat(indent * level);
131
132    // Collect all unique keys from all objects
133    let mut seen_keys = HashMap::new();
134
135    for item in arr {
136        if let Value::Object(obj) = item {
137            for k in obj.keys() {
138                seen_keys.insert(k.clone(), true);
139            }
140        }
141    }
142
143    if seen_keys.is_empty() {
144        return "[]".to_string();
145    }
146
147    // Sort keys for consistent output (HashMap doesn't preserve order)
148    let mut all_keys: Vec<String> = seen_keys.keys().cloned().collect();
149    all_keys.sort();
150
151    // Header format: key[count]{field1,field2,field3}:
152    let count = arr.len();
153    let fields = all_keys.join(",");
154    if !key.is_empty() {
155        lines.push(format!("{}{}[{}]{{{}}}:", prefix, key, count, fields));
156    } else {
157        lines.push(format!("{}[{}]{{{}}}:", prefix, count, fields));
158    }
159
160    // Data rows: comma-separated values with 2 spaces indentation
161    let data_prefix = "  "; // Two spaces for data rows
162    let empty_value = Value::String(String::new());
163    for item in arr {
164        if let Value::Object(obj) = item {
165            let mut row_values = Vec::new();
166            for k in &all_keys {
167                let value = obj.get(k).unwrap_or(&empty_value);
168                let value_str = match value {
169                    Value::Array(arr_val) => {
170                        if arr_val.is_empty() {
171                            "[]".to_string()
172                        } else if let Some(Value::Object(_)) = arr_val.first() {
173                            // Array of objects: use compact inline tabular format
174                            let mut nested_keys_map = HashMap::new();
175                            for nested_item in arr_val {
176                                if let Value::Object(nested_obj) = nested_item {
177                                    for nk in nested_obj.keys() {
178                                        nested_keys_map.insert(nk.clone(), true);
179                                    }
180                                }
181                            }
182                            let mut nested_keys: Vec<String> = nested_keys_map.keys().cloned().collect();
183                            nested_keys.sort();
184                            let nested_fields = nested_keys.join(",");
185                            let nested_count = arr_val.len();
186                            
187                            // Build compact data rows separated by semicolons
188                            let mut nested_rows = Vec::new();
189                            for nested_item in arr_val {
190                                if let Value::Object(nested_obj) = nested_item {
191                                    let mut nested_row_values = Vec::new();
192                                    for nk in &nested_keys {
193                                        let nv = nested_obj.get(nk).unwrap_or(&empty_value);
194                                        let mut nv_str = value_to_toon(nv, 0, 0);
195                                        if nv_str.contains(',') || nv_str.contains(';') || nv_str.contains(':') {
196                                            nv_str = format!("\"{}\"", nv_str);
197                                        }
198                                        nested_row_values.push(nv_str);
199                                    }
200                                    nested_rows.push(nested_row_values.join(","));
201                                }
202                            }
203                            format!("[{}]{{{}}}:{}", nested_count, nested_fields, nested_rows.join(";"))
204                        } else {
205                            // Array of primitives: use bracket notation
206                            let items: Vec<String> = arr_val.iter().map(|v| value_to_toon(v, 0, 0)).collect();
207                            format!("[{}]", items.join(","))
208                        }
209                    }
210                    Value::Object(nested_obj) => {
211                        // Nested object: use compact key:value format
212                        let mut nested_items = Vec::new();
213                        let mut nested_keys: Vec<String> = nested_obj.keys().cloned().collect();
214                        nested_keys.sort();
215                        for nk in nested_keys {
216                            let nv = nested_obj.get(&nk).unwrap_or(&empty_value);
217                            let mut nv_str = value_to_toon(nv, 0, 0);
218                            if nv_str.contains(',') || nv_str.contains(':') {
219                                nv_str = format!("\"{}\"", nv_str);
220                            }
221                            nested_items.push(format!("{}:{}", nk, nv_str));
222                        }
223                        format!("{{{}}}", nested_items.join(","))
224                    }
225                    _ => {
226                        let mut value_str = value_to_toon(value, 0, 0);
227                        // Handle values with commas, newlines, colons, or semicolons
228                        // Only quote if not already quoted and contains special chars
229                        if !(value_str.starts_with('"') && value_str.ends_with('"')) {
230                            if value_str.contains(',') || value_str.contains('\n') || value_str.contains(':') || value_str.contains(';') {
231                                // Escape quotes if present
232                                if value_str.contains('"') {
233                                    value_str = value_str.replace('"', "\\\"");
234                                }
235                                value_str = format!("\"{}\"", value_str);
236                            }
237                        }
238                        value_str
239                    }
240                };
241                row_values.push(value_str);
242            }
243            let row = row_values.join(",");
244            lines.push(format!("{}{}", data_prefix, row));
245        }
246    }
247
248    lines.join("\n")
249}
250
251fn value_to_toon(value: &Value, indent: usize, level: usize) -> String {
252    match value {
253        Value::Null => "null".to_string(),
254        Value::Bool(b) => b.to_string(),
255        Value::Number(n) => n.to_string(),
256        Value::String(s) => escape_string(s),
257        Value::Array(arr) => "\n".to_string() + &list_to_toon(arr, indent, level),
258        Value::Object(obj) => "\n".to_string() + &dict_to_toon(obj, indent, level),
259    }
260}
261
262fn escape_string(s: &str) -> String {
263    // Only escape actual control characters (newlines, tabs, etc.)
264    // Let the caller decide if quoting is needed for other special chars
265    let has_control_chars = s.chars().any(|c| matches!(c, '\n' | '\t' | '\r'));
266
267    if !has_control_chars {
268        return s.to_string();
269    }
270
271    // Escape control characters
272    let mut result = String::with_capacity(s.len() + 2);
273    result.push('"');
274    for c in s.chars() {
275        match c {
276            '\\' => result.push_str("\\\\"),
277            '"' => result.push_str("\\\""),
278            '\n' => result.push_str("\\n"),
279            '\r' => result.push_str("\\r"),
280            '\t' => result.push_str("\\t"),
281            _ => result.push(c),
282        }
283    }
284    result.push('"');
285    result
286}
287
288#[cfg(test)]
289mod tests {
290    use super::*;
291    use serde_json::json;
292
293    #[test]
294    fn test_simple_object() {
295        let data = json!({
296            "name": "Alice",
297            "age": 30
298        });
299        let result = to_toon(&data);
300        assert!(result.contains("name: Alice"));
301        assert!(result.contains("age: 30"));
302    }
303
304    #[test]
305    fn test_nested_object() {
306        let data = json!({
307            "user": {
308                "name": "Alice",
309                "details": {
310                    "age": 30,
311                    "city": "NYC"
312                }
313            }
314        });
315        let result = to_toon(&data);
316        assert!(result.contains("user:"));
317        assert!(result.contains("name: Alice"));
318        assert!(result.contains("details:"));
319        assert!(result.contains("age: 30"));
320    }
321
322    #[test]
323    fn test_list_of_objects() {
324        let data = json!([
325            {"name": "Alice", "age": 30},
326            {"name": "Bob", "age": 25}
327        ]);
328        let result = to_toon(&data);
329        // Should have [2]{fields}: format
330        assert!(result.contains("[2]{"));
331        assert!(result.contains("Alice"));
332        assert!(result.contains("Bob"));
333        assert!(result.contains(",")); // Comma-separated values
334    }
335
336    #[test]
337    fn test_simple_list() {
338        let data = json!([1, 2, 3]);
339        let result = to_toon(&data);
340        assert!(result.contains("- 1"));
341        assert!(result.contains("- 2"));
342        assert!(result.contains("- 3"));
343    }
344
345    #[test]
346    fn test_primitives() {
347        assert_eq!(to_toon(&Value::Null), "null");
348        assert_eq!(to_toon(&json!(true)), "true");
349        assert_eq!(to_toon(&json!(false)), "false");
350        assert_eq!(to_toon(&json!(42)), "42");
351        assert_eq!(to_toon(&json!(3.14)), "3.14");
352        assert_eq!(to_toon(&json!("hello")), "hello");
353    }
354
355    #[test]
356    fn test_string_escaping() {
357        let data = json!({
358            "message": "Hello\nWorld"
359        });
360        let result = to_toon(&data);
361        assert!(result.contains("\""));
362        assert!(result.contains("Hello"));
363        assert!(result.contains("World"));
364    }
365
366    #[test]
367    fn test_complex_structure() {
368        let data = json!({
369            "users": [
370                {"name": "Alice", "age": 30, "active": true},
371                {"name": "Bob", "age": 25, "active": false}
372            ],
373            "metadata": {
374                "count": 2,
375                "timestamp": "2024-01-01"
376            }
377        });
378        let result = to_toon(&data);
379        assert!(result.contains("users["));
380        assert!(result.contains(",")); // Comma-separated values
381        assert!(result.contains("metadata:"));
382        assert!(result.contains("count: 2"));
383    }
384
385    #[test]
386    fn test_empty_object() {
387        let data = json!({});
388        let result = to_toon(&data);
389        assert_eq!(result, "{}");
390    }
391
392    #[test]
393    fn test_empty_array() {
394        let data = json!([]);
395        let result = to_toon(&data);
396        assert_eq!(result, "[]");
397    }
398
399    #[test]
400    fn test_json_to_toon() {
401        let json_str = r#"{"name": "Alice", "age": 30}"#;
402        let result = json_to_toon(json_str).unwrap();
403        assert!(result.contains("name: Alice"));
404        assert!(result.contains("age: 30"));
405    }
406
407    #[test]
408    fn test_json_to_toon_complex() {
409        let json_str = r#"{"users": [{"name": "Alice", "age": 30}, {"name": "Bob", "age": 25}]}"#;
410        let result = json_to_toon(json_str).unwrap();
411        assert!(result.contains("users["));
412        assert!(result.contains(",")); // Comma-separated values
413    }
414}
415