1use crate::{Result, ZoonError};
2use serde::Serialize;
3use std::collections::{BTreeMap, HashMap, HashSet};
4
5pub fn encode<T: Serialize>(value: &T) -> Result<String> {
6 let json_value = serde_json::to_value(value).map_err(|e| ZoonError::UnsupportedType(e.to_string()))?;
7 encode_value(&json_value)
8}
9
10fn encode_value(value: &serde_json::Value) -> Result<String> {
11 match value {
12 serde_json::Value::Array(arr) => encode_tabular(arr),
13 serde_json::Value::Object(obj) => encode_inline(obj),
14 _ => Err(ZoonError::InvalidFormat("top level must be object or array".into())),
15 }
16}
17
18fn flatten_object(
19 prefix: &str,
20 value: &serde_json::Value,
21 result: &mut BTreeMap<String, serde_json::Value>,
22) {
23 if let serde_json::Value::Object(obj) = value {
24 for (k, v) in obj {
25 let new_key = if prefix.is_empty() {
26 k.clone()
27 } else {
28 format!("{}.{}", prefix, k)
29 };
30 flatten_object(&new_key, v, result);
31 }
32 } else {
33 result.insert(prefix.to_string(), value.clone());
34 }
35}
36
37fn detect_aliases(keys: &[String]) -> HashMap<String, String> {
38 let mut prefix_counts: HashMap<String, usize> = HashMap::new();
39
40 for key in keys {
41 let parts: Vec<&str> = key.split('.').collect();
42 if parts.len() > 1 {
43 for i in 1..parts.len() {
44 let prefix = parts[..i].join(".");
45 *prefix_counts.entry(prefix).or_insert(0) += 1;
46 }
47 }
48 }
49
50 let mut savings: Vec<(String, isize)> = Vec::new();
51
52 for (prefix, count) in &prefix_counts {
53 let prefix_len = prefix.len() as isize;
54 let count = *count as isize;
55 let score = (prefix_len - 2) * count - (prefix_len + 4);
57 if score > 0 {
58 savings.push((prefix.clone(), score));
59 }
60 }
61
62 savings.sort_by(|a, b| b.1.cmp(&a.1));
63
64 let mut aliases = HashMap::new();
65 let mut used_aliases = HashSet::new();
66 let mut alias_idx = 0;
67
68 for (prefix, _) in savings {
69 let parts: Vec<&str> = prefix.split('.').collect();
71 let name = parts.last().unwrap();
72 let mut candidate = name.chars().next().unwrap().to_lowercase().to_string();
73
74 loop {
76 if !used_aliases.contains(&candidate) {
77 break;
78 }
79 if candidate.len() == 1 {
80 let c = (b'a' + alias_idx) as char;
82 candidate = c.to_string();
83 alias_idx += 1;
84 if alias_idx > 25 { break; } } else {
86 break;
88 }
89 }
90
91 if !used_aliases.contains(&candidate) {
92 aliases.insert(prefix, candidate.clone());
93 used_aliases.insert(candidate);
94 }
95 if aliases.len() >= 10 { break; }
96 }
97
98 aliases
99}
100
101fn apply_alias(name: &str, aliases: &HashMap<String, String>) -> String {
102 for (prefix, alias) in aliases {
103 if name == prefix {
104 return format!("%{}", alias);
105 }
106 if name.starts_with(&format!("{}.", prefix)) {
107 return format!("%{}.{}", alias, &name[prefix.len() + 1..]);
108 }
109 }
110 name.to_string()
111}
112
113fn encode_tabular(arr: &[serde_json::Value]) -> Result<String> {
114 if arr.is_empty() {
115 return Ok(String::new());
116 }
117
118 let mut flattened_rows = Vec::new();
120 let mut all_keys_set = HashSet::new();
121
122 for item in arr {
123 let mut flat_map = BTreeMap::new();
124 flatten_object("", item, &mut flat_map);
125 for k in flat_map.keys() {
126 all_keys_set.insert(k.clone());
127 }
128 flattened_rows.push(flat_map);
129 }
130
131 let mut all_keys: Vec<String> = all_keys_set.into_iter().collect();
132 all_keys.sort();
133
134 let mut constants = BTreeMap::new();
136 let mut active_keys = Vec::new();
137
138 if arr.len() > 1 {
139 for key in &all_keys {
140 let first_val = flattened_rows[0].get(key).unwrap_or(&serde_json::Value::Null);
141 let mut is_const = true;
142 for row in &flattened_rows {
143 let val = row.get(key).unwrap_or(&serde_json::Value::Null);
144 if val != first_val {
145 is_const = false;
146 break;
147 }
148 }
149 if is_const && !first_val.is_null() {
150 constants.insert(key.clone(), first_val.clone());
151 } else {
152 active_keys.push(key.clone());
153 }
154 }
155 } else {
156 active_keys = all_keys;
157 }
158
159 let aliases = detect_aliases(&active_keys);
161
162 let mut stats: BTreeMap<String, ColumnStats> = BTreeMap::new();
164 for key in &active_keys {
165 stats.insert(key.clone(), ColumnStats::default());
166 }
167
168 for row in &flattened_rows {
169 for key in &active_keys {
170 let val = row.get(key).unwrap_or(&serde_json::Value::Null);
171 let s = serialize_value(val);
172 let stat = stats.get_mut(key).unwrap();
173
174 stat.values.push(s.clone());
175 stat.unique_vals.insert(s);
176
177 if key.to_lowercase() == "id" {
179 if let serde_json::Value::Number(_) = val {
180 stat.is_seq = true;
181 }
182 }
183 }
184 }
185
186 let mut header_parts = vec!["#".to_string()];
188
189 let mut alias_defs: Vec<String> = Vec::new();
191 for (prefix, alias) in &aliases {
192 alias_defs.push(format!("%{}={}", alias, prefix));
193 }
194 alias_defs.sort(); let mut lines = Vec::new();
203 if !alias_defs.is_empty() {
204 lines.push(alias_defs.join(" "));
205 }
206
207 for (k, v) in &constants {
209 let aliased = apply_alias(k, &aliases).replace(" ", "_");
210 let s_val = serialize_value(v);
211 let mut type_code = ":";
212
213 if let serde_json::Value::String(_) = v {
214 type_code = "=";
215 } else if let serde_json::Value::Bool(b) = v {
216 if *b {
217 }
222 }
223
224 let display_val = if let serde_json::Value::Bool(b) = v {
225 if *b { "y".to_string() } else { "n".to_string() }
226 } else {
227 s_val
228 };
229
230 let sep = if let serde_json::Value::String(_) = v { "=" } else { ":" };
231 header_parts.push(format!("@{}{}{}", aliased, sep, display_val));
232 }
233
234 let mut skip_indices = HashSet::new();
235
236 for (i, key) in active_keys.iter().enumerate() {
237 let stat = stats.get(key).unwrap();
238 let aliased = apply_alias(key, &aliases).replace(" ", "_");
239 let type_code = infer_type(stat, arr.len(), key);
240
241 if type_code == "i+" {
242 skip_indices.insert(i);
243 }
244
245 if type_code.starts_with('=') || type_code.starts_with('!') {
246 header_parts.push(format!("{}{}", aliased, type_code));
247 } else {
248 header_parts.push(format!("{}:{}", aliased, type_code));
249 }
250 }
251
252 let mut all_skipped = true;
255 if active_keys.is_empty() {
256 } else {
258 for i in 0..active_keys.len() {
259 if !skip_indices.contains(&i) {
260 all_skipped = false;
261 break;
262 }
263 }
264 }
265
266 if all_skipped && !arr.is_empty() {
267 header_parts.push(format!("+{}", arr.len()));
268 }
269
270 lines.push(header_parts.join(" "));
271
272 if all_skipped {
273 return Ok(lines.join("\n"));
274 }
275
276 for row in &flattened_rows {
278 let mut out_row = Vec::new();
279 for (i, key) in active_keys.iter().enumerate() {
280 if skip_indices.contains(&i) { continue; }
281
282 let val = row.get(key).unwrap_or(&serde_json::Value::Null);
283 let mut s = serialize_value(val);
284
285 let stat = stats.get(key).unwrap();
286 let type_code = infer_type(stat, arr.len(), key);
287
288 if type_code == "b" {
289 if s == "true" { s = "1".into(); }
290 else if s == "false" { s = "0".into(); }
291 } else if type_code == "t" {
292 if let serde_json::Value::String(raw) = val {
293 s = format!("\"{}\"", raw.replace('"', "\\\""));
294 }
295 }
296 out_row.push(s);
297 }
298 lines.push(out_row.join(" "));
299 }
300
301 Ok(lines.join("\n"))
302}
303
304fn encode_inline(obj: &serde_json::Map<String, serde_json::Value>) -> Result<String> {
305 let parts: Vec<String> = obj.iter().map(|(k, v)| format_inline_pair(k, v)).collect();
306 Ok(parts.join(" "))
307}
308
309fn format_inline_pair(key: &str, value: &serde_json::Value) -> String {
310 match value {
311 serde_json::Value::String(s) => format!("{}={}", key, s.replace(' ', "_")),
312 serde_json::Value::Bool(b) => format!("{}:{}", key, if *b { "y" } else { "n" }),
313 serde_json::Value::Number(n) => format!("{}:{}", key, n),
314 serde_json::Value::Null => format!("{}:~", key),
315 serde_json::Value::Object(obj) => {
316 let inner = encode_inline(obj).unwrap_or_default();
317 format!("{}:{{{}}}", key, inner)
318 }
319 serde_json::Value::Array(_) => format!("{}:[...]", key),
320 }
321}
322
323fn serialize_value(value: &serde_json::Value) -> String {
324 match value {
325 serde_json::Value::String(s) => s.replace(' ', "_"),
326 serde_json::Value::Number(n) => n.to_string(),
327 serde_json::Value::Bool(b) => if *b { "1".into() } else { "0".into() },
328 serde_json::Value::Null => "~".into(),
329 serde_json::Value::Object(obj) => {
330 let inner = encode_inline(obj).unwrap_or_default();
331 format!("{{{}}}", inner)
332 }
333 serde_json::Value::Array(_) => "[...]".into(),
334 }
335}
336
337#[derive(Default)]
338struct ColumnStats {
339 values: Vec<String>,
340 unique_vals: std::collections::HashSet<String>,
341 is_seq: bool,
342 indexed: bool,
343 enum_keys: Vec<String>,
344 is_text: bool,
345}
346
347fn infer_type(stat: &ColumnStats, arr_len: usize, key: &str) -> String {
348 if key.to_lowercase() == "id" && stat.is_seq && check_sequence(&stat.values) {
349 return "i+".into();
350 }
351
352 let all_nums = stat.values.iter().all(|v| v.parse::<i64>().is_ok() || v == "~");
353 if all_nums && !stat.values.iter().all(|v| v == "~") {
354 return "i".into();
355 }
356
357 let all_bools = stat.values.iter().all(|v| v == "0" || v == "1" || v == "~");
358 if all_bools {
359 return "b".into();
360 }
361
362 if stat.unique_vals.len() <= 10 && stat.unique_vals.len() < arr_len {
363 let mut vals: Vec<_> = stat.unique_vals.iter().filter(|v| *v != "~").cloned().collect();
364 vals.sort();
365 if !vals.is_empty() {
366 if vals.len() >= 3 {
367 let avg_len: usize = vals.iter().map(|v| v.len()).sum::<usize>() / vals.len();
368 let literal_cost = avg_len * arr_len;
369 let index_cost = vals.join("|").len() + arr_len * 2;
370 if literal_cost > index_cost {
371 return format!("!{}", vals.join("|"));
372 }
373 }
374 return format!("={}", vals.join("|"));
375 }
376 }
377
378 let total_len: usize = stat.values.iter().map(|v| v.len()).sum();
379 if !stat.values.is_empty() && total_len / stat.values.len() > 30 {
380 return "t".into();
381 }
382
383 "s".into()
384}
385
386fn check_sequence(values: &[String]) -> bool {
387 for (i, v) in values.iter().enumerate() {
388 if v != &(i + 1).to_string() {
389 return false;
390 }
391 }
392 true
393}