1use std::collections::HashSet;
11use std::time::Instant;
12use tracing::debug;
13
14#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
16#[cfg_attr(
17 feature = "graph-telemetry",
18 derive(serde::Serialize, serde::Deserialize)
19)]
20pub enum EfficientMode {
21 #[default]
23 Off,
24 Balanced,
26 Aggressive,
28}
29
30impl EfficientMode {
31 pub fn parse_config(s: &str) -> Self {
33 match s.to_lowercase().as_str() {
34 "balanced" => Self::Balanced,
35 "aggressive" => Self::Aggressive,
36 _ => Self::Off,
37 }
38 }
39
40 pub fn parse_natural_language(s: &str) -> Self {
47 let lo = s.to_ascii_lowercase();
48 let has = |needle: &str| lo.contains(needle);
49 if has("disable compression")
50 || has("no compression")
51 || has("compression off")
52 || has("eco off")
53 || has("turn off eco")
54 || has("off mode")
55 {
56 return Self::Off;
57 }
58 if has("aggressive")
59 || has("max savings")
60 || has("highest savings")
61 || has("ultra eco")
62 || has("eco aggressive")
63 {
64 return Self::Aggressive;
65 }
66 if has("balanced")
67 || has("default eco")
68 || has("eco balanced")
69 || has("enable eco")
70 || has("compression on")
71 {
72 return Self::Balanced;
73 }
74 Self::parse_config(&lo)
75 }
76
77 fn retain(self) -> f32 {
84 match self {
85 Self::Balanced => 0.55,
86 Self::Aggressive => 0.35,
87 Self::Off => 1.0,
88 }
89 }
90}
91
92#[derive(Debug, Clone)]
94#[cfg_attr(
95 feature = "graph-telemetry",
96 derive(serde::Serialize, serde::Deserialize)
97)]
98pub struct CompressionMetrics {
99 pub mode: EfficientMode,
100 pub original_chars: usize,
101 pub compressed_chars: usize,
102 pub original_tokens: usize,
103 pub compressed_tokens: usize,
104 pub tokens_saved: usize,
105 pub savings_ratio_pct: f32,
107 pub semantic_preservation_score: Option<f32>,
109 pub elapsed_ms: u64,
110}
111
112impl CompressionMetrics {
113 #[must_use]
114 pub fn from_result(
115 mode: EfficientMode,
116 original_text: &str,
117 compressed: &Compressed,
118 semantic_preservation_score: Option<f32>,
119 elapsed_ms: u64,
120 ) -> Self {
121 let tokens_saved = compressed.tokens_saved();
122 let savings_ratio_pct = if compressed.original_tokens == 0 {
123 0.0
124 } else {
125 (tokens_saved as f32 * 100.0) / compressed.original_tokens as f32
126 };
127 Self {
128 mode,
129 original_chars: original_text.len(),
130 compressed_chars: compressed.text.len(),
131 original_tokens: compressed.original_tokens,
132 compressed_tokens: compressed.compressed_tokens,
133 tokens_saved,
134 savings_ratio_pct,
135 semantic_preservation_score,
136 elapsed_ms,
137 }
138 }
139}
140
141pub trait CompressionTelemetrySink: Send + Sync {
143 fn emit(&self, metrics: CompressionMetrics);
144}
145
146pub struct PromptCompressor {
151 mode: EfficientMode,
152 emit_telemetry: Option<Box<dyn Fn(CompressionMetrics) + Send + Sync>>,
153}
154
155impl PromptCompressor {
156 #[must_use]
157 pub fn new(mode: EfficientMode) -> Self {
158 Self {
159 mode,
160 emit_telemetry: None,
161 }
162 }
163
164 #[must_use]
165 pub fn from_natural_language(mode_hint: &str) -> Self {
166 Self::new(EfficientMode::parse_natural_language(mode_hint))
167 }
168
169 #[must_use]
170 pub fn with_telemetry_callback(
171 mode: EfficientMode,
172 emit_telemetry: Option<Box<dyn Fn(CompressionMetrics) + Send + Sync>>,
173 ) -> Self {
174 Self {
175 mode,
176 emit_telemetry,
177 }
178 }
179
180 pub fn compress(&self, text: &str) -> Compressed {
181 self.compress_with_semantic_score(text, None)
182 }
183
184 pub fn compress_with_semantic_score(
185 &self,
186 text: &str,
187 semantic_preservation_score: Option<f32>,
188 ) -> Compressed {
189 let t0 = Instant::now();
190 let result = compress(text, self.mode);
191 if let Some(cb) = &self.emit_telemetry {
192 cb(CompressionMetrics::from_result(
193 self.mode,
194 text,
195 &result,
196 semantic_preservation_score,
197 t0.elapsed().as_millis() as u64,
198 ));
199 }
200 result
201 }
202}
203
204pub struct Compressed {
206 pub text: String,
208 pub original_tokens: usize,
210 pub compressed_tokens: usize,
212}
213
214impl Compressed {
215 pub fn tokens_saved(&self) -> usize {
217 self.original_tokens.saturating_sub(self.compressed_tokens)
218 }
219}
220
221fn tok(s: &str) -> usize {
222 s.len() / 4 + 1
223}
224
225const FILLERS: &[&str] = &[
226 "I think ",
227 "I believe ",
228 "Basically, ",
229 "Essentially, ",
230 "Of course, ",
231 "Please note that ",
232 "It is worth noting that ",
233 "It's worth noting that ",
234 "I would like to ",
235 "I'd like to ",
236 "Don't hesitate to ",
237 "Feel free to ",
238 "As you know, ",
239 "As mentioned earlier, ",
240 "That being said, ",
241 "To be honest, ",
242 "Needless to say, ",
243 " basically ",
245 " essentially ",
246 " simply ",
247 " just ",
248 " very ",
249 " really ",
250];
251
252const HARD_PRESERVE: &[&str] = &[
255 "exact",
256 "steps",
257 "already tried",
258 "already restarted",
259 "already checked",
260 "restart",
261 "daemon",
262 "error",
263 "http://",
264 "https://",
265 "R http",
266 "R web",
267 "L_",
268 "->",
269 "::",
270 ".ainl",
271 "opcode",
272 "R queue",
273 "R llm",
274 "R core",
275 "R solana",
276 "R postgres",
277 "R redis",
278 "```",
279];
280
281const SOFT_PRESERVE: &[&str] = &[
286 "##", " ms", " kb", " mb", " gb", " %", "openfang", "armaraos", "manifest",
287];
288
289fn hard_keep(s: &str) -> bool {
290 let lo = s.to_lowercase();
291 HARD_PRESERVE.iter().any(|p| lo.contains(&p.to_lowercase()))
292}
293
294fn soft_match(s: &str) -> bool {
295 let lo = s.to_lowercase();
296 SOFT_PRESERVE.iter().any(|p| lo.contains(&p.to_lowercase()))
297}
298
299fn must_keep(s: &str, mode: EfficientMode) -> bool {
301 hard_keep(s) || (mode != EfficientMode::Aggressive && soft_match(s))
302}
303
304pub fn compress(text: &str, mode: EfficientMode) -> Compressed {
309 let orig = tok(text);
310 if mode == EfficientMode::Off || orig < 80 {
311 return Compressed {
312 text: text.to_string(),
313 original_tokens: orig,
314 compressed_tokens: orig,
315 };
316 }
317 let budget = ((orig as f32 * mode.retain()) as usize).max(orig / 4);
322
323 let mut blocks: Vec<(bool, String)> = Vec::new();
325 let mut rest = text;
326 while let Some(f) = rest.find("```") {
327 if f > 0 {
328 blocks.push((false, rest[..f].to_string()));
329 }
330 rest = &rest[f + 3..];
331 if let Some(e) = rest.find("```") {
332 blocks.push((true, format!("```{}```", &rest[..e])));
333 rest = &rest[e + 3..];
334 } else {
335 blocks.push((true, format!("```{rest}")));
336 rest = "";
337 break;
338 }
339 }
340 if !rest.is_empty() {
341 blocks.push((false, rest.to_string()));
342 }
343
344 let code_tok: usize = blocks.iter().filter(|(c, _)| *c).map(|(_, t)| tok(t)).sum();
345 let mut prose_budget = budget.saturating_sub(code_tok);
346 let mut out: Vec<String> = Vec::new();
347
348 for (is_code, block) in &blocks {
349 if *is_code {
350 out.push(block.clone());
351 continue;
352 }
353 let prose = compress_prose(block, prose_budget, mode);
354 prose_budget = prose_budget.saturating_sub(tok(&prose));
355 out.push(prose);
356 }
357
358 let result = out.join("\n\n").trim().to_string();
359 let c = tok(&result);
360 if c >= orig {
362 debug!(orig_tok = orig, "prompt_compressor: no gain — passthrough");
363 Compressed {
364 text: text.to_string(),
365 original_tokens: orig,
366 compressed_tokens: orig,
367 }
368 } else {
369 debug!(
370 orig_tok = orig,
371 compressed_tok = c,
372 savings_pct = 100u64.saturating_sub((c as u64 * 100) / orig.max(1) as u64),
373 original_text = %text,
374 compressed_text = %result,
375 "prompt_compressor: compressed"
376 );
377 Compressed {
378 text: result,
379 original_tokens: orig,
380 compressed_tokens: c,
381 }
382 }
383}
384
385pub fn compress_with_metrics(
387 text: &str,
388 mode: EfficientMode,
389 semantic_preservation_score: Option<f32>,
390) -> (Compressed, CompressionMetrics) {
391 let t0 = Instant::now();
392 let result = compress(text, mode);
393 let semantic_preservation_score = semantic_preservation_score
394 .or_else(|| Some(estimate_semantic_preservation_score(text, &result.text)));
395 let metrics = CompressionMetrics::from_result(
396 mode,
397 text,
398 &result,
399 semantic_preservation_score,
400 t0.elapsed().as_millis() as u64,
401 );
402 (result, metrics)
403}
404
405#[must_use]
407pub fn estimate_semantic_preservation_score(original: &str, compressed: &str) -> f32 {
408 fn terms(s: &str) -> std::collections::HashSet<String> {
409 s.split(|c: char| !c.is_alphanumeric() && c != '_' && c != '-')
410 .map(|t| t.trim().to_ascii_lowercase())
411 .filter(|t| t.len() >= 4)
412 .collect()
413 }
414 let a = terms(original);
415 if a.is_empty() {
416 return 1.0;
417 }
418 let b = terms(compressed);
419 let overlap = a.iter().filter(|t| b.contains(*t)).count();
420 (overlap as f32 / a.len() as f32).clamp(0.0, 1.0)
421}
422
423fn compress_prose(text: &str, budget: usize, mode: EfficientMode) -> String {
424 let sents: Vec<&str> = text
425 .split(". ")
426 .flat_map(|l| l.split('\n'))
427 .filter(|s| !s.trim().is_empty())
428 .collect();
429 if sents.len() <= 2 {
430 return text.to_string();
431 }
432
433 let intent: HashSet<&str> = sents
435 .iter()
436 .take(2)
437 .flat_map(|s| s.split_whitespace())
438 .filter(|w| w.len() > 3)
439 .collect();
440 let n = sents.len();
441
442 let mut scored: Vec<(usize, f32)> = sents
443 .iter()
444 .enumerate()
445 .map(|(i, &s)| {
446 if must_keep(s, mode) {
447 return (i, f32::MAX);
448 }
449 let words: Vec<&str> = s.split_whitespace().collect();
450 let wc = words.len().max(1) as f32;
451 let overlap = words.iter().filter(|w| intent.contains(*w)).count() as f32;
452 let pos = if i == 0 {
453 2.5
454 } else if i < n / 4 {
455 1.5
456 } else if i > n * 4 / 5 {
457 1.2
458 } else {
459 1.0
460 };
461 let ent = if words
462 .iter()
463 .any(|w| w.parse::<f64>().is_ok() || w.starts_with("http"))
464 {
465 1.4
466 } else {
467 1.0
468 };
469 let (soft_boost, trailing_pen) = if mode == EfficientMode::Aggressive {
473 let boost = if soft_match(s) { 1.3 } else { 1.0 };
474 let t = s.trim();
475 let pen = if t.starts_with("This ")
476 || t.starts_with("These ")
477 || t.starts_with("It ")
478 || t.starts_with("Which ")
479 {
480 0.65
481 } else {
482 1.0
483 };
484 (boost, pen)
485 } else {
486 (1.0, 1.0)
487 };
488 (
489 i,
490 (overlap / wc + 0.2) * pos * ent * soft_boost * trailing_pen,
491 )
492 })
493 .collect();
494
495 scored.sort_unstable_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
496 let mut kept: HashSet<usize> = HashSet::new();
497 let mut used = 0usize;
498 for &(idx, score) in &scored {
499 let s = sents[idx];
500 if score == f32::MAX || used + tok(s) <= budget {
501 kept.insert(idx);
502 used += tok(s);
503 }
504 if used >= budget && score != f32::MAX {
505 break;
506 }
507 }
508
509 let mut joined: String = (0..n)
510 .filter(|i| kept.contains(i))
511 .map(|i| sents[i].trim())
512 .collect::<Vec<_>>()
513 .join(". ");
514 for filler in FILLERS {
517 joined = joined.replace(filler, " ");
518 }
519 while joined.contains(" ") {
521 joined = joined.replace(" ", " ");
522 }
523 let joined = joined.trim();
525 let mut chars = joined.chars();
526 match chars.next() {
527 None => String::new(),
528 Some(c) => c.to_uppercase().collect::<String>() + chars.as_str(),
529 }
530}
531
532#[cfg(test)]
533mod tests {
534 use super::*;
535
536 #[test]
537 fn short_prompt_passthrough() {
538 let short = "Hello, please help me.";
539 let r = compress(short, EfficientMode::Balanced);
540 assert_eq!(r.text, short);
541 assert_eq!(r.tokens_saved(), 0);
542 }
543
544 #[test]
545 fn code_block_preserved_verbatim() {
546 let msg = "Fix my code:\n```rust\nfn add(a: i32, b: i32) -> i32 { a + b }\n```\nIt panics.";
547 let r = compress(msg, EfficientMode::Balanced);
548 assert!(
549 r.text.contains("fn add(a: i32"),
550 "code block must survive compression"
551 );
552 }
553
554 #[test]
555 fn off_mode_is_identity() {
556 let text = "word ".repeat(100);
557 let r = compress(&text, EfficientMode::Off);
558 assert_eq!(r.text, text);
559 assert_eq!(r.tokens_saved(), 0);
560 }
561
562 #[test]
563 fn balanced_reduces_long_prose() {
564 let msg =
565 "I am working on a React component and experiencing a problem with state management. \
566 The component re-renders multiple times when it should only render once. \
567 I have tried using useMemo but it does not seem to work as expected. \
568 Basically the error says too many re-renders and I believe the issue might be related \
569 to the useEffect dependency array. \
570 I think I need help understanding what is going wrong and how to resolve the problem. \
571 I would like to know if there is a standard approach for fixing infinite render loops. \
572 Please provide a clear explanation and I'd like step-by-step guidance if possible.";
573 let r = compress(msg, EfficientMode::Balanced);
574 let ratio = r.compressed_tokens as f32 / r.original_tokens as f32;
575 assert!(
576 ratio < 0.85,
577 "expected >15 % compression on long prose, got {ratio:.2}"
578 );
579 assert!(r.text.contains("React"), "intent keywords must survive");
580 }
581
582 #[test]
583 fn parse_config_roundtrip() {
584 assert_eq!(
585 EfficientMode::parse_config("balanced"),
586 EfficientMode::Balanced
587 );
588 assert_eq!(
589 EfficientMode::parse_config("AGGRESSIVE"),
590 EfficientMode::Aggressive
591 );
592 assert_eq!(EfficientMode::parse_config("off"), EfficientMode::Off);
593 assert_eq!(EfficientMode::parse_config("unknown"), EfficientMode::Off);
594 }
595
596 #[test]
597 fn parse_natural_language_roundtrip() {
598 assert_eq!(
599 EfficientMode::parse_natural_language("use aggressive eco mode for max savings"),
600 EfficientMode::Aggressive
601 );
602 assert_eq!(
603 EfficientMode::parse_natural_language("balanced mode please"),
604 EfficientMode::Balanced
605 );
606 assert_eq!(
607 EfficientMode::parse_natural_language("disable compression for this turn"),
608 EfficientMode::Off
609 );
610 }
611
612 #[test]
613 fn telemetry_callback_emits_metrics() {
614 use std::sync::{Arc, Mutex};
615 let captured: Arc<Mutex<Vec<CompressionMetrics>>> = Arc::new(Mutex::new(Vec::new()));
616 let sink = Arc::clone(&captured);
617 let compressor = PromptCompressor::with_telemetry_callback(
618 EfficientMode::Balanced,
619 Some(Box::new(move |m| {
620 sink.lock().expect("lock").push(m);
621 })),
622 );
623 let _ = compressor.compress_with_semantic_score(
624 "I think I would like to understand basically why the dashboard is showing a red error badge. \
625 Please note that I already restarted the daemon and still see the issue.",
626 Some(0.91),
627 );
628 let rows = captured.lock().expect("lock");
629 assert_eq!(rows.len(), 1);
630 let m = &rows[0];
631 assert_eq!(m.mode, EfficientMode::Balanced);
632 assert!(m.original_tokens >= m.compressed_tokens);
633 assert!(m.savings_ratio_pct >= 0.0);
634 assert_eq!(m.semantic_preservation_score, Some(0.91));
635 }
636
637 #[test]
638 fn semantic_preservation_score_reasonable_range() {
639 let original = "Please restart the daemon and check the red error badge in dashboard logs";
640 let compressed = "Restart daemon; check red error badge in dashboard logs";
641 let score = estimate_semantic_preservation_score(original, compressed);
642 assert!((0.0..=1.0).contains(&score));
643 assert!(score > 0.5, "expected high overlap score, got {score}");
644 }
645
646 #[test]
647 fn smoke_complex_ainl_workflow_question() {
648 let input = "\
649 I am really trying to understand basically why my AINL workflow is failing at the R http.GET step. \
650 I think the issue might be related to the timeout setting or the URL format that I am passing to the adapter. \
651 Essentially, the workflow looks like this: I start with L_start, then I call R http.GET https://api.example.com/data?key=abc®ion=us-east-1 ->result, \
652 and after that I do R core.GET result body ->body. \
653 I have already tried increasing the timeout to 30 seconds by passing a third positional argument, but it does not seem to help. \
654 To be honest, I am not really sure whether the problem is the URL query string encoding, \
655 or whether the -> result binding is somehow not resolving the value correctly in the next step. \
656 Please note that I have already checked the adapter docs and the http adapter section of AGENTS.md. \
657 I would really appreciate a step-by-step explanation of what might be going wrong and what exact steps I should take to debug this. \
658 It would also be helpful if you could show me the correct opcode syntax for a GET request with headers and timeout.";
659 let r = compress(input, EfficientMode::Balanced);
660 let savings =
661 100usize.saturating_sub((r.compressed_tokens * 100) / r.original_tokens.max(1));
662 assert!(
663 r.text.contains("R http.GET") || r.text.contains("http.GET"),
664 "http.GET must survive: got: {}",
665 r.text
666 );
667 assert!(
668 r.text.contains("https://") || r.text.contains("api.example.com"),
669 "URL must survive: got: {}",
670 r.text
671 );
672 assert!(
673 r.text.contains("->"),
674 "-> binding must survive: got: {}",
675 r.text
676 );
677 assert!(
678 r.text.contains("steps") || r.text.contains("step"),
679 "steps/step must survive: got: {}",
680 r.text
681 );
682 assert!(
683 savings >= 10,
684 "expected ≥10 % savings on complex AINL question ({}→{} tok), got {}%: [{}]",
685 r.original_tokens,
686 r.compressed_tokens,
687 savings,
688 r.text
689 );
690 }
691
692 #[test]
693 fn aggressive_vs_balanced_gap() {
694 let everyday =
695 "I am working on a React component and experiencing a problem with state management. \
696 The component re-renders multiple times when it should only render once. \
697 I have tried using useMemo but it does not seem to work as expected. \
698 Basically the error says too many re-renders and I believe the issue might be related \
699 to the useEffect dependency array. \
700 I think I need help understanding what is going wrong and how to resolve the problem. \
701 I would like to know if there is a standard approach for fixing infinite render loops. \
702 Please provide a clear explanation and I'd like step-by-step guidance if possible.";
703 let bal = compress(everyday, EfficientMode::Balanced);
704 let agg = compress(everyday, EfficientMode::Aggressive);
705 let bal_pct =
706 100usize.saturating_sub((bal.compressed_tokens * 100) / bal.original_tokens.max(1));
707 let agg_pct =
708 100usize.saturating_sub((agg.compressed_tokens * 100) / agg.original_tokens.max(1));
709
710 let changelog = "The ArmaraOS kernel now injects efficient_mode into each scheduled run. \
711 This makes the list self-documenting and more robust for real dashboard status messages. \
712 The openfang runtime resolves the manifest field at startup. \
713 It is worth noting that the latency is under 30 ms for most prompts. \
714 These changes improve the armaraos agent scheduling pipeline significantly. \
715 Which means users can expect 20 % fewer API calls on high-volume deployments. \
716 The openfang kernel also now exposes a new manifest key for efficient_mode override. \
717 This ensures per-agent configuration always wins over the global config value.";
718 let bal_cl = compress(changelog, EfficientMode::Balanced);
719 let agg_cl = compress(changelog, EfficientMode::Aggressive);
720 let bal_cl_pct = 100usize
721 .saturating_sub((bal_cl.compressed_tokens * 100) / bal_cl.original_tokens.max(1));
722 let agg_cl_pct = 100usize
723 .saturating_sub((agg_cl.compressed_tokens * 100) / agg_cl.original_tokens.max(1));
724
725 assert!(
726 agg_pct > bal_pct + 10,
727 "Aggressive should beat Balanced by >10% on everyday prose; Bal={}% Agg={}%",
728 bal_pct,
729 agg_pct
730 );
731 assert!(
732 agg_cl_pct > bal_cl_pct + 8,
733 "Aggressive should beat Balanced by >8% on soft-identifier changelog; Bal={}% Agg={}%",
734 bal_cl_pct,
735 agg_cl_pct
736 );
737 }
738
739 #[test]
740 fn preserve_marker_forces_keep() {
741 let msg = "I want help. Please do not drop the exact steps required for this. ".repeat(20);
742 let r = compress(&msg, EfficientMode::Aggressive);
743 assert!(
744 r.text.contains("exact steps"),
745 "preserve marker must survive aggressive mode"
746 );
747 }
748
749 #[test]
750 fn readme_dashboard_example_ratio() {
751 let input = "I think I would like to understand basically why the dashboard is showing me \
752 a red error badge on the agents page. Essentially, it seems like the agent is not \
753 responding and I am not sure what steps I should take to investigate this issue. \
754 Please note that I have already tried restarting the daemon. To be honest, I am not \
755 really sure where to look next.";
756 let r = compress(input, EfficientMode::Balanced);
757 let savings =
758 100usize.saturating_sub((r.compressed_tokens * 100) / r.original_tokens.max(1));
759 assert!(
760 r.text.contains("red error badge") || r.text.contains("error badge"),
761 "error badge context must survive: got: {}",
762 r.text
763 );
764 assert!(
765 r.text.contains("daemon"),
766 "daemon restart context must survive"
767 );
768 assert!(
769 savings >= 30,
770 "expected ≥30 % savings on verbose dashboard question, got {}%: [{}]",
771 savings,
772 r.text
773 );
774 }
775
776 #[test]
777 fn http_adapter_prompt_preserves_technical_terms() {
778 let input =
779 "Can you help me understand why the R http.GET call is failing with a timeout? \
780 I am using the URL https://example.com/api?key=abc and getting a connection error. \
781 The adapter seems to not be working and I am not sure if it is the timeout setting \
782 or the URL format that is causing issues with the -> result binding.";
783 let r = compress(input, EfficientMode::Balanced);
784 assert!(
785 r.text.contains("R http.GET") || r.text.contains("http.GET"),
786 "R http.GET must survive: got: {}",
787 r.text
788 );
789 assert!(
790 r.text.contains("https://") || r.text.contains("http"),
791 "URL must survive: got: {}",
792 r.text
793 );
794 assert!(
795 r.text.contains("->"),
796 "-> binding must survive: got: {}",
797 r.text
798 );
799 }
800
801 #[test]
802 fn benchmark_mode_savings_corpus() {
803 let corpus = vec![
804 (
805 "dashboard-verbose",
806 "I think I would like to understand basically why the dashboard is showing me \
807 a red error badge on the agents page. Essentially, it seems like the agent is not \
808 responding and I am not sure what steps I should take to investigate this issue. \
809 Please note that I have already tried restarting the daemon. To be honest, I am not \
810 really sure where to look next.",
811 ),
812 (
813 "ainl-http-technical",
814 "I am really trying to understand basically why my AINL workflow is failing at the R http.GET step. \
815 I think the issue might be related to the timeout setting or the URL format that I am passing to the adapter. \
816 Essentially, the workflow looks like this: I start with L_start, then I call R http.GET https://api.example.com/data?key=abc®ion=us-east-1 ->result, \
817 and after that I do R core.GET result body ->body. \
818 I have already tried increasing the timeout to 30 seconds by passing a third positional argument, but it does not seem to help. \
819 To be honest, I am not really sure whether the problem is the URL query string encoding, \
820 or whether the -> result binding is somehow not resolving the value correctly in the next step.",
821 ),
822 (
823 "everyday-prose",
824 "I am working on a React component and experiencing a problem with state management. \
825 The component re-renders multiple times when it should only render once. \
826 I have tried using useMemo but it does not seem to work as expected. \
827 Basically the error says too many re-renders and I believe the issue might be related \
828 to the useEffect dependency array. \
829 I think I need help understanding what is going wrong and how to resolve the problem. \
830 I would like to know if there is a standard approach for fixing infinite render loops. \
831 Please provide a clear explanation and I'd like step-by-step guidance if possible.",
832 ),
833 (
834 "changelog-soft-identifiers",
835 "The ArmaraOS kernel now injects efficient_mode into each scheduled run. \
836 This makes the list self-documenting and more robust for real dashboard status messages. \
837 The openfang runtime resolves the manifest field at startup. \
838 It is worth noting that the latency is under 30 ms for most prompts. \
839 These changes improve the armaraos agent scheduling pipeline significantly. \
840 Which means users can expect 20 % fewer API calls on high-volume deployments. \
841 The openfang kernel also now exposes a new manifest key for efficient_mode override. \
842 This ensures per-agent configuration always wins over the global config value.",
843 ),
844 ];
845
846 let mut balanced_pcts: Vec<u64> = Vec::new();
847 let mut aggressive_pcts: Vec<u64> = Vec::new();
848
849 for (name, input) in corpus {
850 let off = compress(input, EfficientMode::Off);
851 let bal = compress(input, EfficientMode::Balanced);
852 let agg = compress(input, EfficientMode::Aggressive);
853
854 let bal_pct = 100u64.saturating_sub(
855 (bal.compressed_tokens as u64 * 100) / bal.original_tokens.max(1) as u64,
856 );
857 let agg_pct = 100u64.saturating_sub(
858 (agg.compressed_tokens as u64 * 100) / agg.original_tokens.max(1) as u64,
859 );
860
861 balanced_pcts.push(bal_pct);
862 aggressive_pcts.push(agg_pct);
863
864 eprintln!(
865 "[bench] {name}: off={}tok, balanced={}tok (↓{}%), aggressive={}tok (↓{}%), delta=+{}%",
866 off.compressed_tokens,
867 bal.compressed_tokens,
868 bal_pct,
869 agg.compressed_tokens,
870 agg_pct,
871 agg_pct.saturating_sub(bal_pct)
872 );
873 }
874
875 balanced_pcts.sort_unstable();
876 aggressive_pcts.sort_unstable();
877 let mid = balanced_pcts.len() / 2;
878 let bal_median = balanced_pcts[mid];
879 let agg_median = aggressive_pcts[mid];
880 let bal_mean = balanced_pcts.iter().sum::<u64>() as f64 / balanced_pcts.len() as f64;
881 let agg_mean = aggressive_pcts.iter().sum::<u64>() as f64 / aggressive_pcts.len() as f64;
882
883 eprintln!(
884 "[bench-summary] balanced median={}%, mean={:.1}% | aggressive median={}%, mean={:.1}% | delta median=+{}%",
885 bal_median,
886 bal_mean,
887 agg_median,
888 agg_mean,
889 agg_median.saturating_sub(bal_median)
890 );
891
892 assert!(
893 agg_median >= bal_median,
894 "aggressive should not underperform balanced median"
895 );
896 }
897}