1use std::collections::HashSet;
16use std::time::Instant;
17use tracing::debug;
18
19pub mod adaptive;
20pub mod cache;
21pub mod profiles;
22
23pub use adaptive::{recommend_mode_for_content, AdaptiveRecommendation};
24pub use cache::{cache_policy_summary, effective_ttl_with_hysteresis, CacheTtlResult};
25pub use profiles::{
26 list_builtin_profiles, resolve_builtin_profile, suggest_profile_id_for_project,
27 CompressionProfile, BUILTIN_PROFILES,
28};
29
30#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
32#[cfg_attr(
33 feature = "graph-telemetry",
34 derive(serde::Serialize, serde::Deserialize)
35)]
36pub enum EfficientMode {
37 #[default]
39 Off,
40 Balanced,
42 Aggressive,
44}
45
46impl EfficientMode {
47 pub fn parse_config(s: &str) -> Self {
49 match s.to_lowercase().as_str() {
50 "balanced" | "adaptive" => Self::Balanced,
51 "aggressive" => Self::Aggressive,
52 _ => Self::Off,
53 }
54 }
55
56 pub fn parse_natural_language(s: &str) -> Self {
63 let lo = s.to_ascii_lowercase();
64 let has = |needle: &str| lo.contains(needle);
65 if has("disable compression")
66 || has("no compression")
67 || has("compression off")
68 || has("eco off")
69 || has("turn off eco")
70 || has("off mode")
71 {
72 return Self::Off;
73 }
74 if has("aggressive")
75 || has("max savings")
76 || has("highest savings")
77 || has("ultra eco")
78 || has("eco aggressive")
79 {
80 return Self::Aggressive;
81 }
82 if has("balanced")
83 || has("default eco")
84 || has("eco balanced")
85 || has("enable eco")
86 || has("compression on")
87 {
88 return Self::Balanced;
89 }
90 Self::parse_config(&lo)
91 }
92
93 fn retain(self) -> f32 {
100 match self {
101 Self::Balanced => 0.55,
102 Self::Aggressive => 0.35,
103 Self::Off => 1.0,
104 }
105 }
106}
107
108#[derive(Debug, Clone)]
110#[cfg_attr(
111 feature = "graph-telemetry",
112 derive(serde::Serialize, serde::Deserialize)
113)]
114pub struct CompressionMetrics {
115 pub mode: EfficientMode,
116 pub original_chars: usize,
117 pub compressed_chars: usize,
118 pub original_tokens: usize,
119 pub compressed_tokens: usize,
120 pub tokens_saved: usize,
121 pub savings_ratio_pct: f32,
123 pub semantic_preservation_score: Option<f32>,
125 pub elapsed_ms: u64,
126}
127
128impl CompressionMetrics {
129 #[must_use]
130 pub fn from_result(
131 mode: EfficientMode,
132 original_text: &str,
133 compressed: &Compressed,
134 semantic_preservation_score: Option<f32>,
135 elapsed_ms: u64,
136 ) -> Self {
137 let tokens_saved = compressed.tokens_saved();
138 let savings_ratio_pct = if compressed.original_tokens == 0 {
139 0.0
140 } else {
141 (tokens_saved as f32 * 100.0) / compressed.original_tokens as f32
142 };
143 Self {
144 mode,
145 original_chars: original_text.len(),
146 compressed_chars: compressed.text.len(),
147 original_tokens: compressed.original_tokens,
148 compressed_tokens: compressed.compressed_tokens,
149 tokens_saved,
150 savings_ratio_pct,
151 semantic_preservation_score,
152 elapsed_ms,
153 }
154 }
155}
156
157pub trait CompressionTelemetrySink: Send + Sync {
159 fn emit(&self, metrics: CompressionMetrics);
160}
161
162pub struct PromptCompressor {
167 mode: EfficientMode,
168 emit_telemetry: Option<Box<dyn Fn(CompressionMetrics) + Send + Sync>>,
169}
170
171impl PromptCompressor {
172 #[must_use]
173 pub fn new(mode: EfficientMode) -> Self {
174 Self {
175 mode,
176 emit_telemetry: None,
177 }
178 }
179
180 #[must_use]
181 pub fn from_natural_language(mode_hint: &str) -> Self {
182 Self::new(EfficientMode::parse_natural_language(mode_hint))
183 }
184
185 #[must_use]
186 pub fn with_telemetry_callback(
187 mode: EfficientMode,
188 emit_telemetry: Option<Box<dyn Fn(CompressionMetrics) + Send + Sync>>,
189 ) -> Self {
190 Self {
191 mode,
192 emit_telemetry,
193 }
194 }
195
196 pub fn compress(&self, text: &str) -> Compressed {
197 self.compress_with_semantic_score(text, None)
198 }
199
200 pub fn compress_with_semantic_score(
201 &self,
202 text: &str,
203 semantic_preservation_score: Option<f32>,
204 ) -> Compressed {
205 let t0 = Instant::now();
206 let result = compress(text, self.mode);
207 if let Some(cb) = &self.emit_telemetry {
208 cb(CompressionMetrics::from_result(
209 self.mode,
210 text,
211 &result,
212 semantic_preservation_score,
213 t0.elapsed().as_millis() as u64,
214 ));
215 }
216 result
217 }
218}
219
220pub struct Compressed {
222 pub text: String,
224 pub original_tokens: usize,
226 pub compressed_tokens: usize,
228}
229
230impl Compressed {
231 pub fn tokens_saved(&self) -> usize {
233 self.original_tokens.saturating_sub(self.compressed_tokens)
234 }
235}
236
237#[inline]
244#[must_use]
245pub fn tokenize_estimate(s: &str) -> usize {
246 tok(s)
247}
248
249fn tok(s: &str) -> usize {
250 s.len() / 4 + 1
251}
252
253const FILLERS: &[&str] = &[
254 "I think ",
255 "I believe ",
256 "Basically, ",
257 "Essentially, ",
258 "Of course, ",
259 "Please note that ",
260 "It is worth noting that ",
261 "It's worth noting that ",
262 "I would like to ",
263 "I'd like to ",
264 "Don't hesitate to ",
265 "Feel free to ",
266 "As you know, ",
267 "As mentioned earlier, ",
268 "That being said, ",
269 "To be honest, ",
270 "Needless to say, ",
271 " basically ",
273 " essentially ",
274 " simply ",
275 " just ",
276 " very ",
277 " really ",
278];
279
280const HARD_PRESERVE: &[&str] = &[
283 "exact",
284 "steps",
285 "already tried",
286 "already restarted",
287 "already checked",
288 "restart",
289 "daemon",
290 "error",
291 "http://",
292 "https://",
293 "R http",
294 "R web",
295 "L_",
296 "->",
297 "::",
298 ".ainl",
299 "opcode",
300 "R queue",
301 "R llm",
302 "R core",
303 "R solana",
304 "R postgres",
305 "R redis",
306 "```",
307];
308
309const SOFT_PRESERVE: &[&str] = &[
314 "##", " ms", " kb", " mb", " gb", " %", "openfang", "armaraos", "manifest",
315];
316
317fn hard_keep(s: &str) -> bool {
318 let lo = s.to_lowercase();
319 HARD_PRESERVE.iter().any(|p| lo.contains(&p.to_lowercase()))
320}
321
322fn soft_match(s: &str) -> bool {
323 let lo = s.to_lowercase();
324 SOFT_PRESERVE.iter().any(|p| lo.contains(&p.to_lowercase()))
325}
326
327fn must_keep(s: &str, mode: EfficientMode) -> bool {
329 hard_keep(s) || (mode != EfficientMode::Aggressive && soft_match(s))
330}
331
332pub fn compress(text: &str, mode: EfficientMode) -> Compressed {
337 let orig = tok(text);
338 if mode == EfficientMode::Off || orig < 80 {
339 return Compressed {
340 text: text.to_string(),
341 original_tokens: orig,
342 compressed_tokens: orig,
343 };
344 }
345 let budget = ((orig as f32 * mode.retain()) as usize).max(orig / 4);
350
351 let mut blocks: Vec<(bool, String)> = Vec::new();
353 let mut rest = text;
354 while let Some(f) = rest.find("```") {
355 if f > 0 {
356 blocks.push((false, rest[..f].to_string()));
357 }
358 rest = &rest[f + 3..];
359 if let Some(e) = rest.find("```") {
360 blocks.push((true, format!("```{}```", &rest[..e])));
361 rest = &rest[e + 3..];
362 } else {
363 blocks.push((true, format!("```{rest}")));
364 rest = "";
365 break;
366 }
367 }
368 if !rest.is_empty() {
369 blocks.push((false, rest.to_string()));
370 }
371
372 let code_tok: usize = blocks.iter().filter(|(c, _)| *c).map(|(_, t)| tok(t)).sum();
373 let mut prose_budget = budget.saturating_sub(code_tok);
374 let mut out: Vec<String> = Vec::new();
375
376 for (is_code, block) in &blocks {
377 if *is_code {
378 out.push(block.clone());
379 continue;
380 }
381 let prose = compress_prose(block, prose_budget, mode);
382 prose_budget = prose_budget.saturating_sub(tok(&prose));
383 out.push(prose);
384 }
385
386 let result = out.join("\n\n").trim().to_string();
387 let c = tok(&result);
388 if c >= orig {
390 debug!(orig_tok = orig, "prompt_compressor: no gain — passthrough");
391 Compressed {
392 text: text.to_string(),
393 original_tokens: orig,
394 compressed_tokens: orig,
395 }
396 } else {
397 debug!(
398 orig_tok = orig,
399 compressed_tok = c,
400 savings_pct = 100u64.saturating_sub((c as u64 * 100) / orig.max(1) as u64),
401 original_text = %text,
402 compressed_text = %result,
403 "prompt_compressor: compressed"
404 );
405 Compressed {
406 text: result,
407 original_tokens: orig,
408 compressed_tokens: c,
409 }
410 }
411}
412
413pub fn compress_with_metrics(
415 text: &str,
416 mode: EfficientMode,
417 semantic_preservation_score: Option<f32>,
418) -> (Compressed, CompressionMetrics) {
419 let t0 = Instant::now();
420 let result = compress(text, mode);
421 let semantic_preservation_score = semantic_preservation_score
422 .or_else(|| Some(estimate_semantic_preservation_score(text, &result.text)));
423 let metrics = CompressionMetrics::from_result(
424 mode,
425 text,
426 &result,
427 semantic_preservation_score,
428 t0.elapsed().as_millis() as u64,
429 );
430 (result, metrics)
431}
432
433#[must_use]
435pub fn estimate_semantic_preservation_score(original: &str, compressed: &str) -> f32 {
436 fn terms(s: &str) -> std::collections::HashSet<String> {
437 s.split(|c: char| !c.is_alphanumeric() && c != '_' && c != '-')
438 .map(|t| t.trim().to_ascii_lowercase())
439 .filter(|t| t.len() >= 4)
440 .collect()
441 }
442 let a = terms(original);
443 if a.is_empty() {
444 return 1.0;
445 }
446 let b = terms(compressed);
447 let overlap = a.iter().filter(|t| b.contains(*t)).count();
448 (overlap as f32 / a.len() as f32).clamp(0.0, 1.0)
449}
450
451fn compress_prose(text: &str, budget: usize, mode: EfficientMode) -> String {
452 let sents: Vec<&str> = text
453 .split(". ")
454 .flat_map(|l| l.split('\n'))
455 .filter(|s| !s.trim().is_empty())
456 .collect();
457 if sents.len() <= 2 {
458 return text.to_string();
459 }
460
461 let intent: HashSet<&str> = sents
463 .iter()
464 .take(2)
465 .flat_map(|s| s.split_whitespace())
466 .filter(|w| w.len() > 3)
467 .collect();
468 let n = sents.len();
469
470 let mut scored: Vec<(usize, f32)> = sents
471 .iter()
472 .enumerate()
473 .map(|(i, &s)| {
474 if must_keep(s, mode) {
475 return (i, f32::MAX);
476 }
477 let words: Vec<&str> = s.split_whitespace().collect();
478 let wc = words.len().max(1) as f32;
479 let overlap = words.iter().filter(|w| intent.contains(*w)).count() as f32;
480 let pos = if i == 0 {
481 2.5
482 } else if i < n / 4 {
483 1.5
484 } else if i > n * 4 / 5 {
485 1.2
486 } else {
487 1.0
488 };
489 let ent = if words
490 .iter()
491 .any(|w| w.parse::<f64>().is_ok() || w.starts_with("http"))
492 {
493 1.4
494 } else {
495 1.0
496 };
497 let (soft_boost, trailing_pen) = if mode == EfficientMode::Aggressive {
501 let boost = if soft_match(s) { 1.3 } else { 1.0 };
502 let t = s.trim();
503 let pen = if t.starts_with("This ")
504 || t.starts_with("These ")
505 || t.starts_with("It ")
506 || t.starts_with("Which ")
507 {
508 0.65
509 } else {
510 1.0
511 };
512 (boost, pen)
513 } else {
514 (1.0, 1.0)
515 };
516 (
517 i,
518 (overlap / wc + 0.2) * pos * ent * soft_boost * trailing_pen,
519 )
520 })
521 .collect();
522
523 scored.sort_unstable_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
524 let mut kept: HashSet<usize> = HashSet::new();
525 let mut used = 0usize;
526 for &(idx, score) in &scored {
527 let s = sents[idx];
528 if score == f32::MAX || used + tok(s) <= budget {
529 kept.insert(idx);
530 used += tok(s);
531 }
532 if used >= budget && score != f32::MAX {
533 break;
534 }
535 }
536
537 let mut joined: String = (0..n)
538 .filter(|i| kept.contains(i))
539 .map(|i| sents[i].trim())
540 .collect::<Vec<_>>()
541 .join(". ");
542 for filler in FILLERS {
545 joined = joined.replace(filler, " ");
546 }
547 while joined.contains(" ") {
549 joined = joined.replace(" ", " ");
550 }
551 let joined = joined.trim();
553 let mut chars = joined.chars();
554 match chars.next() {
555 None => String::new(),
556 Some(c) => c.to_uppercase().collect::<String>() + chars.as_str(),
557 }
558}
559
560#[cfg(test)]
561mod tests {
562 use super::*;
563
564 #[test]
565 fn short_prompt_passthrough() {
566 let short = "Hello, please help me.";
567 let r = compress(short, EfficientMode::Balanced);
568 assert_eq!(r.text, short);
569 assert_eq!(r.tokens_saved(), 0);
570 }
571
572 #[test]
573 fn code_block_preserved_verbatim() {
574 let msg = "Fix my code:\n```rust\nfn add(a: i32, b: i32) -> i32 { a + b }\n```\nIt panics.";
575 let r = compress(msg, EfficientMode::Balanced);
576 assert!(
577 r.text.contains("fn add(a: i32"),
578 "code block must survive compression"
579 );
580 }
581
582 #[test]
583 fn off_mode_is_identity() {
584 let text = "word ".repeat(100);
585 let r = compress(&text, EfficientMode::Off);
586 assert_eq!(r.text, text);
587 assert_eq!(r.tokens_saved(), 0);
588 }
589
590 #[test]
591 fn balanced_reduces_long_prose() {
592 let msg =
593 "I am working on a React component and experiencing a problem with state management. \
594 The component re-renders multiple times when it should only render once. \
595 I have tried using useMemo but it does not seem to work as expected. \
596 Basically the error says too many re-renders and I believe the issue might be related \
597 to the useEffect dependency array. \
598 I think I need help understanding what is going wrong and how to resolve the problem. \
599 I would like to know if there is a standard approach for fixing infinite render loops. \
600 Please provide a clear explanation and I'd like step-by-step guidance if possible.";
601 let r = compress(msg, EfficientMode::Balanced);
602 let ratio = r.compressed_tokens as f32 / r.original_tokens as f32;
603 assert!(
604 ratio < 0.85,
605 "expected >15 % compression on long prose, got {ratio:.2}"
606 );
607 assert!(r.text.contains("React"), "intent keywords must survive");
608 }
609
610 #[test]
611 fn parse_config_roundtrip() {
612 assert_eq!(
613 EfficientMode::parse_config("balanced"),
614 EfficientMode::Balanced
615 );
616 assert_eq!(
617 EfficientMode::parse_config("AGGRESSIVE"),
618 EfficientMode::Aggressive
619 );
620 assert_eq!(EfficientMode::parse_config("off"), EfficientMode::Off);
621 assert_eq!(
622 EfficientMode::parse_config("adaptive"),
623 EfficientMode::Balanced
624 );
625 assert_eq!(EfficientMode::parse_config("unknown"), EfficientMode::Off);
626 }
627
628 #[test]
629 fn parse_natural_language_roundtrip() {
630 assert_eq!(
631 EfficientMode::parse_natural_language("use aggressive eco mode for max savings"),
632 EfficientMode::Aggressive
633 );
634 assert_eq!(
635 EfficientMode::parse_natural_language("balanced mode please"),
636 EfficientMode::Balanced
637 );
638 assert_eq!(
639 EfficientMode::parse_natural_language("disable compression for this turn"),
640 EfficientMode::Off
641 );
642 }
643
644 #[test]
645 fn telemetry_callback_emits_metrics() {
646 use std::sync::{Arc, Mutex};
647 let captured: Arc<Mutex<Vec<CompressionMetrics>>> = Arc::new(Mutex::new(Vec::new()));
648 let sink = Arc::clone(&captured);
649 let compressor = PromptCompressor::with_telemetry_callback(
650 EfficientMode::Balanced,
651 Some(Box::new(move |m| {
652 sink.lock().expect("lock").push(m);
653 })),
654 );
655 let _ = compressor.compress_with_semantic_score(
656 "I think I would like to understand basically why the dashboard is showing a red error badge. \
657 Please note that I already restarted the daemon and still see the issue.",
658 Some(0.91),
659 );
660 let rows = captured.lock().expect("lock");
661 assert_eq!(rows.len(), 1);
662 let m = &rows[0];
663 assert_eq!(m.mode, EfficientMode::Balanced);
664 assert!(m.original_tokens >= m.compressed_tokens);
665 assert!(m.savings_ratio_pct >= 0.0);
666 assert_eq!(m.semantic_preservation_score, Some(0.91));
667 }
668
669 #[test]
670 fn semantic_preservation_score_reasonable_range() {
671 let original = "Please restart the daemon and check the red error badge in dashboard logs";
672 let compressed = "Restart daemon; check red error badge in dashboard logs";
673 let score = estimate_semantic_preservation_score(original, compressed);
674 assert!((0.0..=1.0).contains(&score));
675 assert!(score > 0.5, "expected high overlap score, got {score}");
676 }
677
678 #[test]
679 fn smoke_complex_ainl_workflow_question() {
680 let input = "\
681 I am really trying to understand basically why my AINL workflow is failing at the R http.GET step. \
682 I think the issue might be related to the timeout setting or the URL format that I am passing to the adapter. \
683 Essentially, the workflow looks like this: I start with L_start, then I call R http.GET https://api.example.com/data?key=abc®ion=us-east-1 ->result, \
684 and after that I do R core.GET result body ->body. \
685 I have already tried increasing the timeout to 30 seconds by passing a third positional argument, but it does not seem to help. \
686 To be honest, I am not really sure whether the problem is the URL query string encoding, \
687 or whether the -> result binding is somehow not resolving the value correctly in the next step. \
688 Please note that I have already checked the adapter docs and the http adapter section of AGENTS.md. \
689 I would really appreciate a step-by-step explanation of what might be going wrong and what exact steps I should take to debug this. \
690 It would also be helpful if you could show me the correct opcode syntax for a GET request with headers and timeout.";
691 let r = compress(input, EfficientMode::Balanced);
692 let savings =
693 100usize.saturating_sub((r.compressed_tokens * 100) / r.original_tokens.max(1));
694 assert!(
695 r.text.contains("R http.GET") || r.text.contains("http.GET"),
696 "http.GET must survive: got: {}",
697 r.text
698 );
699 assert!(
700 r.text.contains("https://") || r.text.contains("api.example.com"),
701 "URL must survive: got: {}",
702 r.text
703 );
704 assert!(
705 r.text.contains("->"),
706 "-> binding must survive: got: {}",
707 r.text
708 );
709 assert!(
710 r.text.contains("steps") || r.text.contains("step"),
711 "steps/step must survive: got: {}",
712 r.text
713 );
714 assert!(
715 savings >= 10,
716 "expected ≥10 % savings on complex AINL question ({}→{} tok), got {}%: [{}]",
717 r.original_tokens,
718 r.compressed_tokens,
719 savings,
720 r.text
721 );
722 }
723
724 #[test]
725 fn aggressive_vs_balanced_gap() {
726 let everyday =
727 "I am working on a React component and experiencing a problem with state management. \
728 The component re-renders multiple times when it should only render once. \
729 I have tried using useMemo but it does not seem to work as expected. \
730 Basically the error says too many re-renders and I believe the issue might be related \
731 to the useEffect dependency array. \
732 I think I need help understanding what is going wrong and how to resolve the problem. \
733 I would like to know if there is a standard approach for fixing infinite render loops. \
734 Please provide a clear explanation and I'd like step-by-step guidance if possible.";
735 let bal = compress(everyday, EfficientMode::Balanced);
736 let agg = compress(everyday, EfficientMode::Aggressive);
737 let bal_pct =
738 100usize.saturating_sub((bal.compressed_tokens * 100) / bal.original_tokens.max(1));
739 let agg_pct =
740 100usize.saturating_sub((agg.compressed_tokens * 100) / agg.original_tokens.max(1));
741
742 let changelog = "The ArmaraOS kernel now injects efficient_mode into each scheduled run. \
743 This makes the list self-documenting and more robust for real dashboard status messages. \
744 The openfang runtime resolves the manifest field at startup. \
745 It is worth noting that the latency is under 30 ms for most prompts. \
746 These changes improve the armaraos agent scheduling pipeline significantly. \
747 Which means users can expect 20 % fewer API calls on high-volume deployments. \
748 The openfang kernel also now exposes a new manifest key for efficient_mode override. \
749 This ensures per-agent configuration always wins over the global config value.";
750 let bal_cl = compress(changelog, EfficientMode::Balanced);
751 let agg_cl = compress(changelog, EfficientMode::Aggressive);
752 let bal_cl_pct = 100usize
753 .saturating_sub((bal_cl.compressed_tokens * 100) / bal_cl.original_tokens.max(1));
754 let agg_cl_pct = 100usize
755 .saturating_sub((agg_cl.compressed_tokens * 100) / agg_cl.original_tokens.max(1));
756
757 assert!(
758 agg_pct > bal_pct + 10,
759 "Aggressive should beat Balanced by >10% on everyday prose; Bal={}% Agg={}%",
760 bal_pct,
761 agg_pct
762 );
763 assert!(
764 agg_cl_pct > bal_cl_pct + 8,
765 "Aggressive should beat Balanced by >8% on soft-identifier changelog; Bal={}% Agg={}%",
766 bal_cl_pct,
767 agg_cl_pct
768 );
769 }
770
771 #[test]
772 fn preserve_marker_forces_keep() {
773 let msg = "I want help. Please do not drop the exact steps required for this. ".repeat(20);
774 let r = compress(&msg, EfficientMode::Aggressive);
775 assert!(
776 r.text.contains("exact steps"),
777 "preserve marker must survive aggressive mode"
778 );
779 }
780
781 #[test]
782 fn readme_dashboard_example_ratio() {
783 let input = "I think I would like to understand basically why the dashboard is showing me \
784 a red error badge on the agents page. Essentially, it seems like the agent is not \
785 responding and I am not sure what steps I should take to investigate this issue. \
786 Please note that I have already tried restarting the daemon. To be honest, I am not \
787 really sure where to look next.";
788 let r = compress(input, EfficientMode::Balanced);
789 let savings =
790 100usize.saturating_sub((r.compressed_tokens * 100) / r.original_tokens.max(1));
791 assert!(
792 r.text.contains("red error badge") || r.text.contains("error badge"),
793 "error badge context must survive: got: {}",
794 r.text
795 );
796 assert!(
797 r.text.contains("daemon"),
798 "daemon restart context must survive"
799 );
800 assert!(
801 savings >= 30,
802 "expected ≥30 % savings on verbose dashboard question, got {}%: [{}]",
803 savings,
804 r.text
805 );
806 }
807
808 #[test]
809 fn http_adapter_prompt_preserves_technical_terms() {
810 let input =
811 "Can you help me understand why the R http.GET call is failing with a timeout? \
812 I am using the URL https://example.com/api?key=abc and getting a connection error. \
813 The adapter seems to not be working and I am not sure if it is the timeout setting \
814 or the URL format that is causing issues with the -> result binding.";
815 let r = compress(input, EfficientMode::Balanced);
816 assert!(
817 r.text.contains("R http.GET") || r.text.contains("http.GET"),
818 "R http.GET must survive: got: {}",
819 r.text
820 );
821 assert!(
822 r.text.contains("https://") || r.text.contains("http"),
823 "URL must survive: got: {}",
824 r.text
825 );
826 assert!(
827 r.text.contains("->"),
828 "-> binding must survive: got: {}",
829 r.text
830 );
831 }
832
833 #[test]
834 fn benchmark_mode_savings_corpus() {
835 let corpus = vec![
836 (
837 "dashboard-verbose",
838 "I think I would like to understand basically why the dashboard is showing me \
839 a red error badge on the agents page. Essentially, it seems like the agent is not \
840 responding and I am not sure what steps I should take to investigate this issue. \
841 Please note that I have already tried restarting the daemon. To be honest, I am not \
842 really sure where to look next.",
843 ),
844 (
845 "ainl-http-technical",
846 "I am really trying to understand basically why my AINL workflow is failing at the R http.GET step. \
847 I think the issue might be related to the timeout setting or the URL format that I am passing to the adapter. \
848 Essentially, the workflow looks like this: I start with L_start, then I call R http.GET https://api.example.com/data?key=abc®ion=us-east-1 ->result, \
849 and after that I do R core.GET result body ->body. \
850 I have already tried increasing the timeout to 30 seconds by passing a third positional argument, but it does not seem to help. \
851 To be honest, I am not really sure whether the problem is the URL query string encoding, \
852 or whether the -> result binding is somehow not resolving the value correctly in the next step.",
853 ),
854 (
855 "everyday-prose",
856 "I am working on a React component and experiencing a problem with state management. \
857 The component re-renders multiple times when it should only render once. \
858 I have tried using useMemo but it does not seem to work as expected. \
859 Basically the error says too many re-renders and I believe the issue might be related \
860 to the useEffect dependency array. \
861 I think I need help understanding what is going wrong and how to resolve the problem. \
862 I would like to know if there is a standard approach for fixing infinite render loops. \
863 Please provide a clear explanation and I'd like step-by-step guidance if possible.",
864 ),
865 (
866 "changelog-soft-identifiers",
867 "The ArmaraOS kernel now injects efficient_mode into each scheduled run. \
868 This makes the list self-documenting and more robust for real dashboard status messages. \
869 The openfang runtime resolves the manifest field at startup. \
870 It is worth noting that the latency is under 30 ms for most prompts. \
871 These changes improve the armaraos agent scheduling pipeline significantly. \
872 Which means users can expect 20 % fewer API calls on high-volume deployments. \
873 The openfang kernel also now exposes a new manifest key for efficient_mode override. \
874 This ensures per-agent configuration always wins over the global config value.",
875 ),
876 ];
877
878 let mut balanced_pcts: Vec<u64> = Vec::new();
879 let mut aggressive_pcts: Vec<u64> = Vec::new();
880
881 for (name, input) in corpus {
882 let off = compress(input, EfficientMode::Off);
883 let bal = compress(input, EfficientMode::Balanced);
884 let agg = compress(input, EfficientMode::Aggressive);
885
886 let bal_pct = 100u64.saturating_sub(
887 (bal.compressed_tokens as u64 * 100) / bal.original_tokens.max(1) as u64,
888 );
889 let agg_pct = 100u64.saturating_sub(
890 (agg.compressed_tokens as u64 * 100) / agg.original_tokens.max(1) as u64,
891 );
892
893 balanced_pcts.push(bal_pct);
894 aggressive_pcts.push(agg_pct);
895
896 eprintln!(
897 "[bench] {name}: off={}tok, balanced={}tok (↓{}%), aggressive={}tok (↓{}%), delta=+{}%",
898 off.compressed_tokens,
899 bal.compressed_tokens,
900 bal_pct,
901 agg.compressed_tokens,
902 agg_pct,
903 agg_pct.saturating_sub(bal_pct)
904 );
905 }
906
907 balanced_pcts.sort_unstable();
908 aggressive_pcts.sort_unstable();
909 let mid = balanced_pcts.len() / 2;
910 let bal_median = balanced_pcts[mid];
911 let agg_median = aggressive_pcts[mid];
912 let bal_mean = balanced_pcts.iter().sum::<u64>() as f64 / balanced_pcts.len() as f64;
913 let agg_mean = aggressive_pcts.iter().sum::<u64>() as f64 / aggressive_pcts.len() as f64;
914
915 eprintln!(
916 "[bench-summary] balanced median={}%, mean={:.1}% | aggressive median={}%, mean={:.1}% | delta median=+{}%",
917 bal_median,
918 bal_mean,
919 agg_median,
920 agg_mean,
921 agg_median.saturating_sub(bal_median)
922 );
923
924 assert!(
925 agg_median >= bal_median,
926 "aggressive should not underperform balanced median"
927 );
928 }
929}