1use serde_json::Value;
46
47const INDENT: &str = " ";
48
49pub fn encode(value: &Value) -> String {
51 let mut out = String::new();
52 encode_value(value, 0, &mut out);
53 while out.ends_with('\n') {
55 out.pop();
56 }
57 out
58}
59
60fn encode_value(value: &Value, depth: usize, out: &mut String) {
61 match value {
62 Value::Null => out.push_str("null"),
63 Value::Bool(b) => out.push_str(if *b { "true" } else { "false" }),
64 Value::Number(n) => out.push_str(&canonical_number(n)),
65 Value::String(s) => encode_string(s, ',', out),
66 Value::Array(arr) => encode_array(arr, depth, out),
67 Value::Object(obj) => encode_object_fields(obj, depth, out),
68 }
69}
70
71fn canonical_number(n: &serde_json::Number) -> String {
72 if let Some(i) = n.as_i64() {
73 return i.to_string();
74 }
75 if let Some(u) = n.as_u64() {
76 return u.to_string();
77 }
78 if let Some(f) = n.as_f64() {
79 if f.is_nan() || f.is_infinite() {
80 return "null".to_string();
81 }
82 if f == 0.0 {
83 return "0".to_string();
84 }
85 if f.fract() == 0.0 && f.abs() < (i64::MAX as f64) {
87 return (f as i64).to_string();
88 }
89 let s = format!("{}", f);
91 s
92 } else {
93 "null".to_string()
94 }
95}
96
97fn needs_quoting(s: &str, delimiter: char) -> bool {
99 if s.is_empty() {
100 return true;
101 }
102 if s.starts_with(' ') || s.ends_with(' ') {
103 return true;
104 }
105 if s == "true" || s == "false" || s == "null" {
106 return true;
107 }
108 if s.starts_with('-') {
109 return true;
110 }
111 if looks_numeric(s) {
113 return true;
114 }
115 for c in s.chars() {
117 if c == ':'
118 || c == '"'
119 || c == '\\'
120 || c == '['
121 || c == ']'
122 || c == '{'
123 || c == '}'
124 || c == '\n'
125 || c == '\r'
126 || c == '\t'
127 {
128 return true;
129 }
130 if c == delimiter {
131 return true;
132 }
133 }
134 false
135}
136
137fn looks_numeric(s: &str) -> bool {
138 let s = s.strip_prefix('-').unwrap_or(s);
139 if s.is_empty() {
140 return false;
141 }
142 if s.len() > 1 && s.starts_with('0') && s.as_bytes()[1].is_ascii_digit() {
144 return true;
145 }
146 let mut chars = s.chars().peekable();
147 if !chars.peek().map(|c| c.is_ascii_digit()).unwrap_or(false) {
149 return false;
150 }
151 while chars.peek().map(|c| c.is_ascii_digit()).unwrap_or(false) {
152 chars.next();
153 }
154 if chars.peek() == Some(&'.') {
156 chars.next();
157 if !chars.peek().map(|c| c.is_ascii_digit()).unwrap_or(false) {
158 return false;
159 }
160 while chars.peek().map(|c| c.is_ascii_digit()).unwrap_or(false) {
161 chars.next();
162 }
163 }
164 if chars
166 .peek()
167 .map(|c| *c == 'e' || *c == 'E')
168 .unwrap_or(false)
169 {
170 chars.next();
171 if chars
172 .peek()
173 .map(|c| *c == '+' || *c == '-')
174 .unwrap_or(false)
175 {
176 chars.next();
177 }
178 if !chars.peek().map(|c| c.is_ascii_digit()).unwrap_or(false) {
179 return false;
180 }
181 while chars.peek().map(|c| c.is_ascii_digit()).unwrap_or(false) {
182 chars.next();
183 }
184 }
185 chars.peek().is_none()
186}
187
188fn escape_string(s: &str) -> String {
189 let mut out = String::with_capacity(s.len());
190 for c in s.chars() {
191 match c {
192 '\\' => out.push_str("\\\\"),
193 '"' => out.push_str("\\\""),
194 '\n' => out.push_str("\\n"),
195 '\r' => out.push_str("\\r"),
196 '\t' => out.push_str("\\t"),
197 _ => out.push(c),
198 }
199 }
200 out
201}
202
203fn encode_string(s: &str, delimiter: char, out: &mut String) {
204 if needs_quoting(s, delimiter) {
205 out.push('"');
206 out.push_str(&escape_string(s));
207 out.push('"');
208 } else {
209 out.push_str(s);
210 }
211}
212
213fn key_needs_quoting(k: &str) -> bool {
215 if k.is_empty() {
216 return true;
217 }
218 let mut chars = k.chars();
219 let first = chars.next().unwrap();
220 if !first.is_ascii_alphabetic() && first != '_' {
221 return true;
222 }
223 for c in chars {
224 if !c.is_ascii_alphanumeric() && c != '_' && c != '.' {
225 return true;
226 }
227 }
228 false
229}
230
231fn encode_key(k: &str, out: &mut String) {
232 if key_needs_quoting(k) {
233 out.push('"');
234 out.push_str(&escape_string(k));
235 out.push('"');
236 } else {
237 out.push_str(k);
238 }
239}
240
241fn indent(depth: usize, out: &mut String) {
242 for _ in 0..depth {
243 out.push_str(INDENT);
244 }
245}
246
247fn encode_object_fields(obj: &serde_json::Map<String, Value>, depth: usize, out: &mut String) {
248 for (key, val) in obj {
249 indent(depth, out);
250 encode_key(key, out);
251
252 match val {
253 Value::Object(inner) if !inner.is_empty() => {
254 out.push_str(":\n");
255 encode_object_fields(inner, depth + 1, out);
256 }
257 Value::Array(arr) => {
258 encode_array_header(key, arr, out);
259 encode_array_body(arr, depth, out);
260 }
261 _ => {
262 out.push_str(": ");
263 encode_value(val, depth + 1, out);
264 out.push('\n');
265 }
266 }
267 }
268}
269
270fn try_tabular(arr: &[Value]) -> Option<Vec<String>> {
272 if arr.is_empty() {
273 return None;
274 }
275 let mut fields: Option<Vec<String>> = None;
276 for item in arr {
277 let obj = item.as_object()?;
278 let keys: Vec<String> = obj.keys().cloned().collect();
279 for val in obj.values() {
281 if val.is_object() || val.is_array() {
282 return None;
283 }
284 }
285 match &fields {
286 None => fields = Some(keys),
287 Some(f) => {
288 if keys.len() != f.len() || keys.iter().zip(f.iter()).any(|(a, b)| a != b) {
289 return None;
290 }
291 }
292 }
293 }
294 fields
295}
296
297fn encode_array_header(_key: &str, arr: &[Value], out: &mut String) {
298 let len = arr.len();
302
303 if let Some(fields) = try_tabular(arr) {
304 out.push_str(&format!("[{}]{{", len));
305 for (i, f) in fields.iter().enumerate() {
306 if i > 0 {
307 out.push(',');
308 }
309 encode_key(f, out);
310 }
311 out.push_str("}:\n");
312 } else if all_primitives(arr) {
313 out.push_str(&format!("[{}]: ", len));
315 for (i, val) in arr.iter().enumerate() {
316 if i > 0 {
317 out.push(',');
318 }
319 encode_primitive_value(val, ',', out);
320 }
321 out.push('\n');
322 } else {
323 out.push_str(&format!("[{}]:\n", len));
324 }
325}
326
327fn all_primitives(arr: &[Value]) -> bool {
328 arr.iter().all(|v| !v.is_object() && !v.is_array())
329}
330
331fn encode_primitive_value(val: &Value, delimiter: char, out: &mut String) {
332 match val {
333 Value::Null => out.push_str("null"),
334 Value::Bool(b) => out.push_str(if *b { "true" } else { "false" }),
335 Value::Number(n) => out.push_str(&canonical_number(n)),
336 Value::String(s) => encode_string(s, delimiter, out),
337 _ => {}
338 }
339}
340
341fn encode_array_body(arr: &[Value], depth: usize, out: &mut String) {
342 if let Some(fields) = try_tabular(arr) {
343 for item in arr {
345 let obj = item.as_object().unwrap();
346 indent(depth + 1, out);
347 for (i, f) in fields.iter().enumerate() {
348 if i > 0 {
349 out.push(',');
350 }
351 let val = obj.get(f).unwrap_or(&Value::Null);
352 encode_primitive_value(val, ',', out);
353 }
354 out.push('\n');
355 }
356 } else if all_primitives(arr) {
357 } else {
359 for item in arr {
361 indent(depth + 1, out);
362 out.push_str("- ");
363 match item {
364 Value::Object(obj) if !obj.is_empty() => {
365 let mut iter = obj.iter();
367 if let Some((k, v)) = iter.next() {
368 encode_key(k, out);
369 match v {
370 Value::Object(inner) if !inner.is_empty() => {
371 out.push_str(":\n");
372 encode_object_fields(inner, depth + 2, out);
373 }
374 _ => {
375 out.push_str(": ");
376 encode_value(v, depth + 2, out);
377 out.push('\n');
378 }
379 }
380 for (k, v) in iter {
382 indent(depth + 2, out);
383 encode_key(k, out);
384 match v {
385 Value::Object(inner) if !inner.is_empty() => {
386 out.push_str(":\n");
387 encode_object_fields(inner, depth + 3, out);
388 }
389 _ => {
390 out.push_str(": ");
391 encode_value(v, depth + 3, out);
392 out.push('\n');
393 }
394 }
395 }
396 }
397 }
398 _ => {
399 encode_value(item, depth + 2, out);
400 out.push('\n');
401 }
402 }
403 }
404 }
405}
406
407fn encode_array(arr: &[Value], depth: usize, out: &mut String) {
408 let len = arr.len();
410
411 if let Some(fields) = try_tabular(arr) {
412 out.push_str(&format!("[{}]{{", len));
413 for (i, f) in fields.iter().enumerate() {
414 if i > 0 {
415 out.push(',');
416 }
417 encode_key(f, out);
418 }
419 out.push_str("}:\n");
420 for item in arr {
421 let obj = item.as_object().unwrap();
422 indent(depth + 1, out);
423 for (i, f) in fields.iter().enumerate() {
424 if i > 0 {
425 out.push(',');
426 }
427 let val = obj.get(f).unwrap_or(&Value::Null);
428 encode_primitive_value(val, ',', out);
429 }
430 out.push('\n');
431 }
432 } else if all_primitives(arr) {
433 out.push_str(&format!("[{}]: ", len));
434 for (i, val) in arr.iter().enumerate() {
435 if i > 0 {
436 out.push(',');
437 }
438 encode_primitive_value(val, ',', out);
439 }
440 out.push('\n');
441 } else {
442 out.push_str(&format!("[{}]:\n", len));
443 encode_array_body(arr, depth, out);
444 }
445}
446
447pub fn caveman(text: &str) -> String {
472 let mut result = text.to_string();
473
474 for phrase in STRIP_PHRASES {
476 let phrase_lower = phrase.to_lowercase();
477 if let Some(pos) = result.to_lowercase().find(&phrase_lower) {
478 let end = pos + phrase.len();
479 result = format!("{}{}", &result[..pos], &result[end..]);
480 }
481 }
482
483 for (verbose, short) in REPLACE_PHRASES {
485 let lower = result.to_lowercase();
486 if let Some(pos) = lower.find(&verbose.to_lowercase()) {
487 let end = pos + verbose.len();
488 result = format!("{}{}{}", &result[..pos], short, &result[end..]);
489 }
490 }
491
492 let words: Vec<&str> = result.split_whitespace().collect();
494 let filtered: Vec<&str> = words
495 .into_iter()
496 .filter(|w| {
497 let lower = w
498 .trim_matches(|c: char| c.is_ascii_punctuation())
499 .to_lowercase();
500 !FILLER_WORDS.contains(&lower.as_str())
501 })
502 .collect();
503 result = filtered.join(" ");
504
505 while result.contains(" ") {
507 result = result.replace(" ", " ");
508 }
509 result = result.replace(" .", ".").replace(" ,", ",");
511 result = result.replace(". .", ".").replace(",,", ",");
512 result = result.trim().to_string();
513
514 while result.starts_with('.') || result.starts_with(',') || result.starts_with('!') {
516 result = result[1..].trim_start().to_string();
517 }
518
519 result
520}
521
522const STRIP_PHRASES: &[&str] = &[
524 "I would be happy to help you with that.",
525 "I'd be happy to help you with that.",
526 "I'd be happy to help with that.",
527 "I would be happy to help with that.",
528 "I'd be happy to help!",
529 "I'd be happy to help.",
530 "Sure! I'd be happy to",
531 "Sure, I'd be happy to",
532 "Sure! I can help with that.",
533 "Sure, I can help with that.",
534 "Sure thing!",
535 "Sure!",
536 "Sure,",
537 "Of course!",
538 "Of course,",
539 "Absolutely!",
540 "Absolutely,",
541 "Let me help you with that.",
542 "I'll help you with that.",
543 "Great question!",
544 "That's a great question.",
545 "Good question!",
546 "Here's what I found:",
547 "Here is what I found:",
548 "Let me explain.",
549 "Let me break this down.",
550 "I hope this helps!",
551 "I hope that helps!",
552 "Hope this helps!",
553 "Let me know if you have any questions.",
554 "Let me know if you need anything else.",
555 "Feel free to ask if you have any questions.",
556 "Don't hesitate to ask.",
557 "Happy to help further!",
558 "Is there anything else I can help with?",
559 "Is there anything else you need?",
560];
561
562const REPLACE_PHRASES: &[(&str, &str)] = &[
564 ("in order to", "to"),
565 ("due to the fact that", "because"),
566 ("for the purpose of", "for"),
567 ("in the event that", "if"),
568 ("at this point in time", "now"),
569 ("at the present time", "now"),
570 ("on the other hand", "but"),
571 ("in addition to", "plus"),
572 ("as a result of", "from"),
573 ("with regard to", "re"),
574 ("with respect to", "re"),
575 ("in terms of", "for"),
576 ("a large number of", "many"),
577 ("a significant amount of", "much"),
578 ("it is important to note that", "note:"),
579 ("it should be noted that", "note:"),
580 ("it is worth mentioning that", "note:"),
581 ("please note that", "note:"),
582 ("as you can see", ""),
583 ("as mentioned above", ""),
584 ("as previously mentioned", ""),
585 ("is currently not working", "fails"),
586 ("is not working", "fails"),
587 ("is currently failing", "fails"),
588 ("does not work", "fails"),
589 ("has not been", "wasn't"),
590 ("have not been", "weren't"),
591 ("is not able to", "can't"),
592 ("are not able to", "can't"),
593 ("was not able to", "couldn't"),
594 ("it appears that", ""),
595 ("it seems that", ""),
596 ("it looks like", ""),
597 ("I believe that", ""),
598 ("I think that", ""),
599 ("in my opinion", ""),
600 ("basically what happens is", ""),
601 ("what's happening here is", ""),
602 ("the reason for this is", "reason:"),
603 ("the issue here is that", "issue:"),
604 ("the problem is that", "problem:"),
605 ("make sure to", "must"),
606 ("you need to make sure", "must"),
607 ("you'll want to", ""),
608 ("you might want to", ""),
609 ("you should consider", "consider"),
610 ("it would be a good idea to", "should"),
611 ("properly initialized", "initialized"),
612 ("correctly configured", "configured"),
613 ("successfully completed", "completed"),
614];
615
616const FILLER_WORDS: &[&str] = &[
618 "the",
619 "a",
620 "an", "just",
622 "really",
623 "very", "quite",
625 "rather",
626 "fairly",
627 "somewhat",
628 "actually",
629 "basically",
630 "essentially",
631 "literally",
632 "obviously",
633 "clearly",
634 "simply",
635 "merely",
636 "certainly",
637 "definitely",
638 "perhaps",
639 "maybe",
640 "possibly",
641 "potentially",
642 "presumably",
643 "however",
644 "furthermore",
645 "moreover",
646 "additionally",
647 "consequently",
648 "therefore",
649 "thus",
650 "hence",
651 "accordingly",
652 "please",
653 "kindly",
654 "respective",
655 "corresponding",
656];
657
658#[cfg(test)]
659mod tests {
660 use super::*;
661 use serde_json::json;
662
663 #[test]
664 fn test_simple_object() {
665 let val = json!({"name": "aura", "version": "0.4.0", "active": true});
666 let toon = encode(&val);
667 assert!(toon.contains("name: aura"));
668 assert!(toon.contains("version: 0.4.0"));
669 assert!(toon.contains("active: true"));
670 }
671
672 #[test]
673 fn test_nested_object() {
674 let val = json!({"server": {"name": "aura-vcs", "version": "1.0"}});
675 let toon = encode(&val);
676 assert!(toon.contains("server:\n"));
677 assert!(toon.contains(" name: aura-vcs"));
678 }
679
680 #[test]
681 fn test_tabular_array() {
682 let val = json!({
683 "snapshots": [
684 {"file": "main.rs", "trigger": "watcher", "ts": 123},
685 {"file": "lib.rs", "trigger": "mcp", "ts": 456}
686 ]
687 });
688 let toon = encode(&val);
689 assert!(toon.contains("snapshots[2]{file,trigger,ts}:"));
690 assert!(toon.contains("main.rs,watcher,123"));
691 }
692
693 #[test]
694 fn test_quoting() {
695 let val = json!({"msg": "hello world: test"});
696 let toon = encode(&val);
697 assert!(toon.contains("\"hello world: test\""));
698 }
699
700 #[test]
701 fn test_empty_object() {
702 let val = json!({});
703 let toon = encode(&val);
704 assert_eq!(toon, "");
705 }
706
707 #[test]
708 fn test_primitive_array() {
709 let val = json!({"tags": ["rust", "git", "ai"]});
710 let toon = encode(&val);
711 assert!(toon.contains("tags[3]: rust,git,ai"));
712 }
713
714 #[test]
717 fn test_caveman_strips_pleasantries() {
718 let input = "Sure! I'd be happy to help you with that. The function fails.";
719 let output = caveman(input);
720 assert!(!output.contains("Sure"));
721 assert!(!output.contains("happy"));
722 assert!(output.contains("function fails"));
723 }
724
725 #[test]
726 fn test_caveman_replaces_verbose_phrases() {
727 let input = "In order to fix the bug, due to the fact that the config is wrong.";
728 let output = caveman(input);
729 assert!(output.contains("to fix"));
730 assert!(output.contains("because"));
731 assert!(!output.contains("in order to"));
732 assert!(!output.contains("due to the fact that"));
733 }
734
735 #[test]
736 fn test_caveman_strips_filler_words() {
737 let input = "The variable is actually just really not initialized.";
738 let output = caveman(input);
739 assert!(!output.contains("actually"));
740 assert!(!output.contains("just"));
741 assert!(!output.contains("really"));
742 assert!(output.contains("variable"));
743 assert!(output.contains("not initialized"));
744 }
745
746 #[test]
747 fn test_caveman_preserves_technical_content() {
748 let input = "HashMap<String, Vec<u8>> implements Clone and Send.";
749 let output = caveman(input);
750 assert!(output.contains("HashMap<String,"));
751 assert!(output.contains("Clone"));
752 assert!(output.contains("Send"));
753 }
754
755 #[test]
756 fn test_caveman_empty_input() {
757 assert_eq!(caveman(""), "");
758 }
759
760 #[test]
761 fn test_caveman_token_reduction() {
762 let verbose = "Sure! I'd be happy to help you with that. The function is currently \
763 not working because the variable has not been properly initialized \
764 in the constructor. In order to fix this, you need to make sure that \
765 the value is correctly configured before calling the method.";
766 let terse = caveman(verbose);
767 assert!(
769 terse.len() < verbose.len() * 3 / 4,
770 "Expected >25% reduction. Original: {} chars, caveman: {} chars",
771 verbose.len(),
772 terse.len()
773 );
774 assert!(terse.contains("function"));
776 assert!(terse.contains("variable"));
777 assert!(terse.contains("constructor"));
778 }
779}