aether_core/
toon.rs

1use serde_json::{Value, Map};
2
3/// Token-Oriented Object Notation (TOON) Serializer.
4/// Reduces token usage by 30-60% compared to JSON.
5pub struct Toon;
6
7impl Toon {
8    /// Serialize a JSON value to TOON format.
9    pub fn serialize(value: &Value) -> String {
10        match value {
11            Value::Object(map) => Self::serialize_object(map, 0),
12            Value::Array(arr) => Self::serialize_array(arr, 0),
13            Value::String(s) => s.clone(),
14            Value::Number(n) => n.to_string(),
15            Value::Bool(b) => if *b { "T" } else { "F" }.to_string(),
16            Value::Null => "~".to_string(),
17        }
18    }
19
20    fn serialize_object(map: &Map<String, Value>, indent: usize) -> String {
21        let mut out = String::new();
22        let pad = "  ".repeat(indent);
23        
24        for (k, v) in map {
25            match v {
26                Value::Object(child_map) => {
27                    out.push_str(&format!("{}{}:\n{}", pad, k, Self::serialize_object(child_map, indent + 1)));
28                }
29                Value::Array(arr) => {
30                    out.push_str(&format!("{}{}[{}]:\n{}", pad, k, arr.len(), Self::serialize_array(arr, indent + 1)));
31                }
32                _ => {
33                    out.push_str(&format!("{}{}: {}\n", pad, k, Self::serialize(v)));
34                }
35            }
36        }
37        out
38    }
39
40    fn serialize_array(arr: &[Value], indent: usize) -> String {
41        if arr.is_empty() {
42            return "[]".to_string();
43        }
44
45        // Check if it's a homogeneous list of objects to use tabular TOON format
46        if let Some(first) = arr.first() {
47            if let Value::Object(first_map) = first {
48                let keys: Vec<String> = first_map.keys().cloned().collect();
49                let pad = "  ".repeat(indent);
50                let mut out = format!("{}{{{}}}:\n", pad, keys.join(","));
51
52                for item in arr {
53                    if let Value::Object(item_map) = item {
54                        let values: Vec<String> = keys.iter()
55                            .map(|k| item_map.get(k).map(|v| Self::serialize_flat(v)).unwrap_or_else(|| "~".to_string()))
56                            .collect();
57                        out.push_str(&format!("{}{}\n", pad, values.join(",")));
58                    }
59                }
60                return out;
61            }
62        }
63
64        // Fallback for simple arrays
65        let mut out = String::new();
66        let pad = "  ".repeat(indent);
67        for v in arr {
68            out.push_str(&format!("{}- {}\n", pad, Self::serialize(v).trim()));
69        }
70        out
71    }
72
73    fn serialize_flat(value: &Value) -> String {
74        match value {
75            Value::String(s) => s.replace(',', "\\,").to_string(),
76            Value::Number(n) => n.to_string(),
77            Value::Bool(b) => if *b { "T" } else { "F" }.to_string(),
78            _ => ".".to_string(),
79        }
80    }
81
82    /// Deserialize a TOON string back into a JSON value.
83    pub fn deserialize(input: &str) -> Result<Value, String> {
84        let lines: Vec<&str> = input.lines().filter(|l| !l.trim().is_empty()).collect();
85        if lines.is_empty() {
86            return Ok(Value::Null);
87        }
88
89        Self::parse_level(&lines, 0).map(|(v, _)| v)
90    }
91
92    fn parse_level(lines: &[&str], start_idx: usize) -> Result<(Value, usize), String> {
93        if start_idx >= lines.len() {
94            return Ok((Value::Null, start_idx));
95        }
96
97        let first_line = lines[start_idx];
98        let indent = first_line.chars().take_while(|c| c.is_whitespace()).count();
99        let trimmed = first_line.trim();
100
101        if trimmed.starts_with('{') && trimmed.contains("}:") {
102            // Tabular format: {id,name}:
103            return Self::parse_tabular(lines, start_idx, indent);
104        }
105
106        if trimmed.starts_with("- ") {
107            // List format
108            return Self::parse_list(lines, start_idx, indent);
109        }
110
111        // Object format (key: value)
112        let mut map = Map::new();
113        let mut idx = start_idx;
114
115        while idx < lines.len() {
116            let line = lines[idx];
117            let current_indent = line.chars().take_while(|c| c.is_whitespace()).count();
118            
119            if current_indent < indent {
120                break;
121            }
122            if current_indent > indent {
123                // This shouldn't happen in a well-formed object stream without a parent key
124                idx += 1;
125                continue;
126            }
127
128            let line_trimmed = line.trim();
129            if let Some(colon_idx) = line_trimmed.find(':') {
130                let mut key = line_trimmed[..colon_idx].trim().to_string();
131                
132                // Strip [len] suffix if present
133                if let Some(bracket_idx) = key.find('[') {
134                    if key.ends_with(']') {
135                        key = key[..bracket_idx].to_string();
136                    }
137                }
138
139                let val_part = line_trimmed[colon_idx + 1..].trim();
140
141                if val_part.is_empty() && idx + 1 < lines.len() {
142                    // Check if next line is more indented (nested object/array)
143                    let next_indent = lines[idx + 1].chars().take_while(|c| c.is_whitespace()).count();
144                    if next_indent > current_indent {
145                        let (child_val, next_idx) = Self::parse_level(lines, idx + 1)?;
146                        map.insert(key, child_val);
147                        idx = next_idx;
148                        continue;
149                    }
150                }
151                
152                map.insert(key, Self::parse_primitive(val_part));
153                idx += 1;
154            } else {
155                idx += 1;
156            }
157        }
158
159        Ok((Value::Object(map), idx))
160    }
161
162    fn parse_tabular(lines: &[&str], start_idx: usize, base_indent: usize) -> Result<(Value, usize), String> {
163        let header = lines[start_idx].trim();
164        let keys_str = header.trim_start_matches('{').trim_end_matches("}:");
165        let keys: Vec<&str> = keys_str.split(',').map(|k| k.trim()).collect();
166        
167        let mut arr = Vec::new();
168        let mut idx = start_idx + 1;
169
170        while idx < lines.len() {
171            let line = lines[idx];
172            let current_indent = line.chars().take_while(|c| c.is_whitespace()).count();
173            if current_indent <= base_indent && !line.trim().is_empty() && idx != (start_idx + 1) {
174                // We keep moving if it's the first line after header, otherwise check indent
175                if current_indent < base_indent { break; }
176            }
177
178            let row_trimmed = line.trim();
179            if row_trimmed.is_empty() { 
180                idx += 1;
181                continue; 
182            }
183
184            let values: Vec<Value> = row_trimmed.split(',')
185                .map(|v| Self::parse_primitive(v.trim()))
186                .collect();
187            
188            let mut obj = Map::new();
189            for (i, key) in keys.iter().enumerate() {
190                let val = values.get(i).cloned().unwrap_or(Value::Null);
191                obj.insert(key.to_string(), val);
192            }
193            arr.push(Value::Object(obj));
194            idx += 1;
195        }
196
197        Ok((Value::Array(arr), idx))
198    }
199
200    fn parse_list(lines: &[&str], start_idx: usize, base_indent: usize) -> Result<(Value, usize), String> {
201        let mut arr = Vec::new();
202        let mut idx = start_idx;
203
204        while idx < lines.len() {
205            let line = lines[idx];
206            let current_indent = line.chars().take_while(|c| c.is_whitespace()).count();
207            if current_indent < base_indent {
208                break;
209            }
210
211            let trimmed = line.trim();
212            if trimmed.starts_with("- ") {
213                arr.push(Self::parse_primitive(&trimmed[2..]));
214            }
215            idx += 1;
216        }
217
218        Ok((Value::Array(arr), idx))
219    }
220
221    fn parse_primitive(s: &str) -> Value {
222        match s {
223            "~" => Value::Null,
224            "T" => Value::Bool(true),
225            "F" => Value::Bool(false),
226            _ => {
227                if let Ok(n) = s.parse::<i64>() {
228                    Value::Number(n.into())
229                } else if let Ok(f) = s.parse::<f64>() {
230                    if let Some(n) = serde_json::Number::from_f64(f) {
231                        Value::Number(n)
232                    } else {
233                        Value::String(s.to_string())
234                    }
235                } else {
236                    Value::String(s.replace("\\,", ",").to_string())
237                }
238            }
239        }
240    }
241}
242
243#[cfg(test)]
244mod tests {
245    use super::*;
246    use serde_json::json;
247
248    #[test]
249    fn test_toon_tabular() {
250        let data = json!([
251            {"id": 1, "name": "Apple", "price": 10},
252            {"id": 2, "name": "Banana", "price": 5}
253        ]);
254        let toon = Toon::serialize(&data);
255        assert!(toon.contains("{id,name,price}"));
256        assert!(toon.contains("1,Apple,10"));
257    }
258
259    #[test]
260    fn test_toon_object() {
261        let data = json!({
262            "user": "admin",
263            "meta": { "last_login": "2024-01-01" }
264        });
265        let toon = Toon::serialize(&data);
266        assert!(toon.contains("user: admin"));
267        assert!(toon.contains("meta:"));
268    }
269
270    #[test]
271    fn test_toon_roundtrip() {
272        let original = json!({
273            "project": "Aether",
274            "active": true,
275            "version": 1,
276            "null_val": null,
277            "tags": ["ai", "rust", "security"],
278            "files": [
279                {"name": "main.rs", "size": 1024},
280                {"name": "lib.rs", "size": 2048}
281            ]
282        });
283
284        let serialized = Toon::serialize(&original);
285        println!("Serialized TOON:\n{}", serialized);
286        let deserialized = Toon::deserialize(&serialized).unwrap();
287
288        // Note: Tabular conversion might lose some type info if not careful, 
289        // but here it should match. Bool T/F is handled.
290        assert_eq!(original["project"], deserialized["project"]);
291        assert_eq!(deserialized["active"], json!(true));
292        assert_eq!(deserialized["null_val"], Value::Null);
293        assert_eq!(deserialized["tags"].as_array().unwrap().len(), 3);
294        assert_eq!(deserialized["files"].as_array().unwrap().len(), 2);
295    }
296}