Skip to main content

ainl_compression/
lib.rs

1//! Heuristic compression primitives for AINL hosts.
2//!
3//! This crate currently exposes the ArmaraOS "Ultra Cost-Efficient Mode"
4//! input compressor. It is intentionally embedding-free and dependency-light
5//! so it can be reused across hosts without shipping local ML models.
6//!
7//! Set `RUST_LOG=ainl_compression=debug` to enable full before/after text
8//! logging per call (useful for tuning preserve lists and retention ratios).
9
10use std::collections::HashSet;
11use std::time::Instant;
12use tracing::debug;
13
14/// Input compression aggressiveness.
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
16#[cfg_attr(
17    feature = "graph-telemetry",
18    derive(serde::Serialize, serde::Deserialize)
19)]
20pub enum EfficientMode {
21    /// Pass through without modification.
22    #[default]
23    Off,
24    /// ~55 % token retention — sweet-spot 50–60 % reduction. (default)
25    Balanced,
26    /// ~40 % token retention — opt-in for high-volume / cost-sensitive paths.
27    Aggressive,
28}
29
30impl EfficientMode {
31    /// Parse from a config string; unknown values → `Off`.
32    pub fn parse_config(s: &str) -> Self {
33        match s.to_lowercase().as_str() {
34            "balanced" => Self::Balanced,
35            "aggressive" => Self::Aggressive,
36            _ => Self::Off,
37        }
38    }
39
40    /// Parse from free-form natural language intent.
41    ///
42    /// Examples:
43    /// - "use aggressive eco mode" -> `Aggressive`
44    /// - "balanced mode please" -> `Balanced`
45    /// - "disable compression" -> `Off`
46    pub fn parse_natural_language(s: &str) -> Self {
47        let lo = s.to_ascii_lowercase();
48        let has = |needle: &str| lo.contains(needle);
49        if has("disable compression")
50            || has("no compression")
51            || has("compression off")
52            || has("eco off")
53            || has("turn off eco")
54            || has("off mode")
55        {
56            return Self::Off;
57        }
58        if has("aggressive")
59            || has("max savings")
60            || has("highest savings")
61            || has("ultra eco")
62            || has("eco aggressive")
63        {
64            return Self::Aggressive;
65        }
66        if has("balanced")
67            || has("default eco")
68            || has("eco balanced")
69            || has("enable eco")
70            || has("compression on")
71        {
72            return Self::Balanced;
73        }
74        Self::parse_config(&lo)
75    }
76
77    /// Token retention ratio.
78    ///
79    /// `Balanced` targets ~55 % retention (40–50 % reduction) — sweet-spot for most prompts.
80    /// `Aggressive` targets ~35 % retention (55–70 % reduction) — meaningfully wider gap vs
81    /// Balanced; soft-preserve terms become score-boosts rather than force-keeps, and
82    /// trailing-explanation sentences get a score penalty to prune meta-commentary.
83    fn retain(self) -> f32 {
84        match self {
85            Self::Balanced => 0.55,
86            Self::Aggressive => 0.35,
87            Self::Off => 1.0,
88        }
89    }
90}
91
92/// Structured telemetry emitted for each compression operation.
93#[derive(Debug, Clone)]
94#[cfg_attr(
95    feature = "graph-telemetry",
96    derive(serde::Serialize, serde::Deserialize)
97)]
98pub struct CompressionMetrics {
99    pub mode: EfficientMode,
100    pub original_chars: usize,
101    pub compressed_chars: usize,
102    pub original_tokens: usize,
103    pub compressed_tokens: usize,
104    pub tokens_saved: usize,
105    /// Percentage saved, range 0.0..100.0.
106    pub savings_ratio_pct: f32,
107    /// Optional caller-provided semantic preservation score.
108    pub semantic_preservation_score: Option<f32>,
109    pub elapsed_ms: u64,
110}
111
112impl CompressionMetrics {
113    #[must_use]
114    pub fn from_result(
115        mode: EfficientMode,
116        original_text: &str,
117        compressed: &Compressed,
118        semantic_preservation_score: Option<f32>,
119        elapsed_ms: u64,
120    ) -> Self {
121        let tokens_saved = compressed.tokens_saved();
122        let savings_ratio_pct = if compressed.original_tokens == 0 {
123            0.0
124        } else {
125            (tokens_saved as f32 * 100.0) / compressed.original_tokens as f32
126        };
127        Self {
128            mode,
129            original_chars: original_text.len(),
130            compressed_chars: compressed.text.len(),
131            original_tokens: compressed.original_tokens,
132            compressed_tokens: compressed.compressed_tokens,
133            tokens_saved,
134            savings_ratio_pct,
135            semantic_preservation_score,
136            elapsed_ms,
137        }
138    }
139}
140
141/// Optional telemetry sink for compression metrics.
142pub trait CompressionTelemetrySink: Send + Sync {
143    fn emit(&self, metrics: CompressionMetrics);
144}
145
146/// Standalone input prompt compressor.
147///
148/// This is the intended public API for external agents that want to adopt
149/// AINL eco-mode compression without pulling runtime-specific crates.
150pub struct PromptCompressor {
151    mode: EfficientMode,
152    emit_telemetry: Option<Box<dyn Fn(CompressionMetrics) + Send + Sync>>,
153}
154
155impl PromptCompressor {
156    #[must_use]
157    pub fn new(mode: EfficientMode) -> Self {
158        Self {
159            mode,
160            emit_telemetry: None,
161        }
162    }
163
164    #[must_use]
165    pub fn from_natural_language(mode_hint: &str) -> Self {
166        Self::new(EfficientMode::parse_natural_language(mode_hint))
167    }
168
169    #[must_use]
170    pub fn with_telemetry_callback(
171        mode: EfficientMode,
172        emit_telemetry: Option<Box<dyn Fn(CompressionMetrics) + Send + Sync>>,
173    ) -> Self {
174        Self {
175            mode,
176            emit_telemetry,
177        }
178    }
179
180    pub fn compress(&self, text: &str) -> Compressed {
181        self.compress_with_semantic_score(text, None)
182    }
183
184    pub fn compress_with_semantic_score(
185        &self,
186        text: &str,
187        semantic_preservation_score: Option<f32>,
188    ) -> Compressed {
189        let t0 = Instant::now();
190        let result = compress(text, self.mode);
191        if let Some(cb) = &self.emit_telemetry {
192            cb(CompressionMetrics::from_result(
193                self.mode,
194                text,
195                &result,
196                semantic_preservation_score,
197                t0.elapsed().as_millis() as u64,
198            ));
199        }
200        result
201    }
202}
203
204/// Result of a compression pass.
205pub struct Compressed {
206    /// Compressed (or original, on no-op) text.
207    pub text: String,
208    /// Estimated original token count (chars/4).
209    pub original_tokens: usize,
210    /// Estimated compressed token count.
211    pub compressed_tokens: usize,
212}
213
214impl Compressed {
215    /// Tokens saved; 0 when compression was a no-op.
216    pub fn tokens_saved(&self) -> usize {
217        self.original_tokens.saturating_sub(self.compressed_tokens)
218    }
219}
220
221fn tok(s: &str) -> usize {
222    s.len() / 4 + 1
223}
224
225const FILLERS: &[&str] = &[
226    "I think ",
227    "I believe ",
228    "Basically, ",
229    "Essentially, ",
230    "Of course, ",
231    "Please note that ",
232    "It is worth noting that ",
233    "It's worth noting that ",
234    "I would like to ",
235    "I'd like to ",
236    "Don't hesitate to ",
237    "Feel free to ",
238    "As you know, ",
239    "As mentioned earlier, ",
240    "That being said, ",
241    "To be honest, ",
242    "Needless to say, ",
243    // Mid-sentence hedging words (always safe to strip)
244    " basically ",
245    " essentially ",
246    " simply ",
247    " just ",
248    " very ",
249    " really ",
250];
251
252/// Hard-preserve: force-keep in **both** Balanced and Aggressive.
253/// Irreplaceable content — actual opcodes, URLs, diagnostic history, user-intent markers.
254const HARD_PRESERVE: &[&str] = &[
255    "exact",
256    "steps",
257    "already tried",
258    "already restarted",
259    "already checked",
260    "restart",
261    "daemon",
262    "error",
263    "http://",
264    "https://",
265    "R http",
266    "R web",
267    "L_",
268    "->",
269    "::",
270    ".ainl",
271    "opcode",
272    "R queue",
273    "R llm",
274    "R core",
275    "R solana",
276    "R postgres",
277    "R redis",
278    "```",
279];
280
281/// Soft-preserve: force-keep in Balanced; **score-boost only** in Aggressive.
282/// These identifiers/units are important but the LLM can reconstruct context without them
283/// when the budget is tight.  Freeing them lets Aggressive prune changelog-dense text
284/// where these terms would otherwise lock in nearly every sentence.
285const SOFT_PRESERVE: &[&str] = &[
286    "##", " ms", " kb", " mb", " gb", " %", "openfang", "armaraos", "manifest",
287];
288
289fn hard_keep(s: &str) -> bool {
290    let lo = s.to_lowercase();
291    HARD_PRESERVE.iter().any(|p| lo.contains(&p.to_lowercase()))
292}
293
294fn soft_match(s: &str) -> bool {
295    let lo = s.to_lowercase();
296    SOFT_PRESERVE.iter().any(|p| lo.contains(&p.to_lowercase()))
297}
298
299/// Returns `true` when `s` must be included regardless of budget.
300fn must_keep(s: &str, mode: EfficientMode) -> bool {
301    hard_keep(s) || (mode != EfficientMode::Aggressive && soft_match(s))
302}
303
304/// Compress `text` toward `mode.retain()` of its original token budget.
305///
306/// Prompts shorter than 80 tokens, or `Off` mode, pass through unchanged.
307/// Code fences (` ``` `) are extracted and re-inserted verbatim.
308pub fn compress(text: &str, mode: EfficientMode) -> Compressed {
309    let orig = tok(text);
310    if mode == EfficientMode::Off || orig < 80 {
311        return Compressed {
312            text: text.to_string(),
313            original_tokens: orig,
314            compressed_tokens: orig,
315        };
316    }
317    // Floor: never go below 25 % of original (prevents total context loss on short messages),
318    // but keep it relative so both modes stay distinct on moderate-length inputs.
319    // The old fixed `.max(80)` floor was equalising Balanced and Aggressive on ~100–200 token
320    // messages because both natural budgets fell below 80, producing identical outputs.
321    let budget = ((orig as f32 * mode.retain()) as usize).max(orig / 4);
322
323    // Split at code fences; preserve code blocks verbatim.
324    let mut blocks: Vec<(bool, String)> = Vec::new();
325    let mut rest = text;
326    while let Some(f) = rest.find("```") {
327        if f > 0 {
328            blocks.push((false, rest[..f].to_string()));
329        }
330        rest = &rest[f + 3..];
331        if let Some(e) = rest.find("```") {
332            blocks.push((true, format!("```{}```", &rest[..e])));
333            rest = &rest[e + 3..];
334        } else {
335            blocks.push((true, format!("```{rest}")));
336            rest = "";
337            break;
338        }
339    }
340    if !rest.is_empty() {
341        blocks.push((false, rest.to_string()));
342    }
343
344    let code_tok: usize = blocks.iter().filter(|(c, _)| *c).map(|(_, t)| tok(t)).sum();
345    let mut prose_budget = budget.saturating_sub(code_tok);
346    let mut out: Vec<String> = Vec::new();
347
348    for (is_code, block) in &blocks {
349        if *is_code {
350            out.push(block.clone());
351            continue;
352        }
353        let prose = compress_prose(block, prose_budget, mode);
354        prose_budget = prose_budget.saturating_sub(tok(&prose));
355        out.push(prose);
356    }
357
358    let result = out.join("\n\n").trim().to_string();
359    let c = tok(&result);
360    // Safety: never return longer than original.
361    if c >= orig {
362        debug!(orig_tok = orig, "prompt_compressor: no gain — passthrough");
363        Compressed {
364            text: text.to_string(),
365            original_tokens: orig,
366            compressed_tokens: orig,
367        }
368    } else {
369        debug!(
370            orig_tok = orig,
371            compressed_tok = c,
372            savings_pct = 100u64.saturating_sub((c as u64 * 100) / orig.max(1) as u64),
373            original_text = %text,
374            compressed_text = %result,
375            "prompt_compressor: compressed"
376        );
377        Compressed {
378            text: result,
379            original_tokens: orig,
380            compressed_tokens: c,
381        }
382    }
383}
384
385/// Compress and return structured telemetry metrics in one call.
386pub fn compress_with_metrics(
387    text: &str,
388    mode: EfficientMode,
389    semantic_preservation_score: Option<f32>,
390) -> (Compressed, CompressionMetrics) {
391    let t0 = Instant::now();
392    let result = compress(text, mode);
393    let semantic_preservation_score = semantic_preservation_score
394        .or_else(|| Some(estimate_semantic_preservation_score(text, &result.text)));
395    let metrics = CompressionMetrics::from_result(
396        mode,
397        text,
398        &result,
399        semantic_preservation_score,
400        t0.elapsed().as_millis() as u64,
401    );
402    (result, metrics)
403}
404
405/// Lightweight lexical semantic-preservation heuristic in range 0.0..1.0.
406#[must_use]
407pub fn estimate_semantic_preservation_score(original: &str, compressed: &str) -> f32 {
408    fn terms(s: &str) -> std::collections::HashSet<String> {
409        s.split(|c: char| !c.is_alphanumeric() && c != '_' && c != '-')
410            .map(|t| t.trim().to_ascii_lowercase())
411            .filter(|t| t.len() >= 4)
412            .collect()
413    }
414    let a = terms(original);
415    if a.is_empty() {
416        return 1.0;
417    }
418    let b = terms(compressed);
419    let overlap = a.iter().filter(|t| b.contains(*t)).count();
420    (overlap as f32 / a.len() as f32).clamp(0.0, 1.0)
421}
422
423fn compress_prose(text: &str, budget: usize, mode: EfficientMode) -> String {
424    let sents: Vec<&str> = text
425        .split(". ")
426        .flat_map(|l| l.split('\n'))
427        .filter(|s| !s.trim().is_empty())
428        .collect();
429    if sents.len() <= 2 {
430        return text.to_string();
431    }
432
433    // Intent vocabulary from the first two sentences (position-biased TF-IDF proxy).
434    let intent: HashSet<&str> = sents
435        .iter()
436        .take(2)
437        .flat_map(|s| s.split_whitespace())
438        .filter(|w| w.len() > 3)
439        .collect();
440    let n = sents.len();
441
442    let mut scored: Vec<(usize, f32)> = sents
443        .iter()
444        .enumerate()
445        .map(|(i, &s)| {
446            if must_keep(s, mode) {
447                return (i, f32::MAX);
448            }
449            let words: Vec<&str> = s.split_whitespace().collect();
450            let wc = words.len().max(1) as f32;
451            let overlap = words.iter().filter(|w| intent.contains(*w)).count() as f32;
452            let pos = if i == 0 {
453                2.5
454            } else if i < n / 4 {
455                1.5
456            } else if i > n * 4 / 5 {
457                1.2
458            } else {
459                1.0
460            };
461            let ent = if words
462                .iter()
463                .any(|w| w.parse::<f64>().is_ok() || w.starts_with("http"))
464            {
465                1.4
466            } else {
467                1.0
468            };
469            // Aggressive-only modifiers: boost soft-preserve sentences; penalise trailing-explanation
470            // clauses that typically start with "This ", "These ", "It " or "Which " and carry
471            // low new information (they rephrase or justify what came before).
472            let (soft_boost, trailing_pen) = if mode == EfficientMode::Aggressive {
473                let boost = if soft_match(s) { 1.3 } else { 1.0 };
474                let t = s.trim();
475                let pen = if t.starts_with("This ")
476                    || t.starts_with("These ")
477                    || t.starts_with("It ")
478                    || t.starts_with("Which ")
479                {
480                    0.65
481                } else {
482                    1.0
483                };
484                (boost, pen)
485            } else {
486                (1.0, 1.0)
487            };
488            (
489                i,
490                (overlap / wc + 0.2) * pos * ent * soft_boost * trailing_pen,
491            )
492        })
493        .collect();
494
495    scored.sort_unstable_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
496    let mut kept: HashSet<usize> = HashSet::new();
497    let mut used = 0usize;
498    for &(idx, score) in &scored {
499        let s = sents[idx];
500        if score == f32::MAX || used + tok(s) <= budget {
501            kept.insert(idx);
502            used += tok(s);
503        }
504        if used >= budget && score != f32::MAX {
505            break;
506        }
507    }
508
509    let mut joined: String = (0..n)
510        .filter(|i| kept.contains(i))
511        .map(|i| sents[i].trim())
512        .collect::<Vec<_>>()
513        .join(". ");
514    // Replace fillers with a single space so adjacent words don't fuse,
515    // then collapse any resulting double-spaces.
516    for filler in FILLERS {
517        joined = joined.replace(filler, " ");
518    }
519    // Collapse "word  word" → "word word" after filler removal.
520    while joined.contains("  ") {
521        joined = joined.replace("  ", " ");
522    }
523    // Re-capitalize the first character in case filler stripping left a lowercase fragment.
524    let joined = joined.trim();
525    let mut chars = joined.chars();
526    match chars.next() {
527        None => String::new(),
528        Some(c) => c.to_uppercase().collect::<String>() + chars.as_str(),
529    }
530}
531
532#[cfg(test)]
533mod tests {
534    use super::*;
535
536    #[test]
537    fn short_prompt_passthrough() {
538        let short = "Hello, please help me.";
539        let r = compress(short, EfficientMode::Balanced);
540        assert_eq!(r.text, short);
541        assert_eq!(r.tokens_saved(), 0);
542    }
543
544    #[test]
545    fn code_block_preserved_verbatim() {
546        let msg = "Fix my code:\n```rust\nfn add(a: i32, b: i32) -> i32 { a + b }\n```\nIt panics.";
547        let r = compress(msg, EfficientMode::Balanced);
548        assert!(
549            r.text.contains("fn add(a: i32"),
550            "code block must survive compression"
551        );
552    }
553
554    #[test]
555    fn off_mode_is_identity() {
556        let text = "word ".repeat(100);
557        let r = compress(&text, EfficientMode::Off);
558        assert_eq!(r.text, text);
559        assert_eq!(r.tokens_saved(), 0);
560    }
561
562    #[test]
563    fn balanced_reduces_long_prose() {
564        let msg =
565            "I am working on a React component and experiencing a problem with state management. \
566            The component re-renders multiple times when it should only render once. \
567            I have tried using useMemo but it does not seem to work as expected. \
568            Basically the error says too many re-renders and I believe the issue might be related \
569            to the useEffect dependency array. \
570            I think I need help understanding what is going wrong and how to resolve the problem. \
571            I would like to know if there is a standard approach for fixing infinite render loops. \
572            Please provide a clear explanation and I'd like step-by-step guidance if possible.";
573        let r = compress(msg, EfficientMode::Balanced);
574        let ratio = r.compressed_tokens as f32 / r.original_tokens as f32;
575        assert!(
576            ratio < 0.85,
577            "expected >15 % compression on long prose, got {ratio:.2}"
578        );
579        assert!(r.text.contains("React"), "intent keywords must survive");
580    }
581
582    #[test]
583    fn parse_config_roundtrip() {
584        assert_eq!(
585            EfficientMode::parse_config("balanced"),
586            EfficientMode::Balanced
587        );
588        assert_eq!(
589            EfficientMode::parse_config("AGGRESSIVE"),
590            EfficientMode::Aggressive
591        );
592        assert_eq!(EfficientMode::parse_config("off"), EfficientMode::Off);
593        assert_eq!(EfficientMode::parse_config("unknown"), EfficientMode::Off);
594    }
595
596    #[test]
597    fn parse_natural_language_roundtrip() {
598        assert_eq!(
599            EfficientMode::parse_natural_language("use aggressive eco mode for max savings"),
600            EfficientMode::Aggressive
601        );
602        assert_eq!(
603            EfficientMode::parse_natural_language("balanced mode please"),
604            EfficientMode::Balanced
605        );
606        assert_eq!(
607            EfficientMode::parse_natural_language("disable compression for this turn"),
608            EfficientMode::Off
609        );
610    }
611
612    #[test]
613    fn telemetry_callback_emits_metrics() {
614        use std::sync::{Arc, Mutex};
615        let captured: Arc<Mutex<Vec<CompressionMetrics>>> = Arc::new(Mutex::new(Vec::new()));
616        let sink = Arc::clone(&captured);
617        let compressor = PromptCompressor::with_telemetry_callback(
618            EfficientMode::Balanced,
619            Some(Box::new(move |m| {
620                sink.lock().expect("lock").push(m);
621            })),
622        );
623        let _ = compressor.compress_with_semantic_score(
624            "I think I would like to understand basically why the dashboard is showing a red error badge. \
625            Please note that I already restarted the daemon and still see the issue.",
626            Some(0.91),
627        );
628        let rows = captured.lock().expect("lock");
629        assert_eq!(rows.len(), 1);
630        let m = &rows[0];
631        assert_eq!(m.mode, EfficientMode::Balanced);
632        assert!(m.original_tokens >= m.compressed_tokens);
633        assert!(m.savings_ratio_pct >= 0.0);
634        assert_eq!(m.semantic_preservation_score, Some(0.91));
635    }
636
637    #[test]
638    fn semantic_preservation_score_reasonable_range() {
639        let original = "Please restart the daemon and check the red error badge in dashboard logs";
640        let compressed = "Restart daemon; check red error badge in dashboard logs";
641        let score = estimate_semantic_preservation_score(original, compressed);
642        assert!((0.0..=1.0).contains(&score));
643        assert!(score > 0.5, "expected high overlap score, got {score}");
644    }
645
646    #[test]
647    fn smoke_complex_ainl_workflow_question() {
648        let input = "\
649            I am really trying to understand basically why my AINL workflow is failing at the R http.GET step. \
650            I think the issue might be related to the timeout setting or the URL format that I am passing to the adapter. \
651            Essentially, the workflow looks like this: I start with L_start, then I call R http.GET https://api.example.com/data?key=abc&region=us-east-1 ->result, \
652            and after that I do R core.GET result body ->body. \
653            I have already tried increasing the timeout to 30 seconds by passing a third positional argument, but it does not seem to help. \
654            To be honest, I am not really sure whether the problem is the URL query string encoding, \
655            or whether the -> result binding is somehow not resolving the value correctly in the next step. \
656            Please note that I have already checked the adapter docs and the http adapter section of AGENTS.md. \
657            I would really appreciate a step-by-step explanation of what might be going wrong and what exact steps I should take to debug this. \
658            It would also be helpful if you could show me the correct opcode syntax for a GET request with headers and timeout.";
659        let r = compress(input, EfficientMode::Balanced);
660        let savings =
661            100usize.saturating_sub((r.compressed_tokens * 100) / r.original_tokens.max(1));
662        assert!(
663            r.text.contains("R http.GET") || r.text.contains("http.GET"),
664            "http.GET must survive: got: {}",
665            r.text
666        );
667        assert!(
668            r.text.contains("https://") || r.text.contains("api.example.com"),
669            "URL must survive: got: {}",
670            r.text
671        );
672        assert!(
673            r.text.contains("->"),
674            "-> binding must survive: got: {}",
675            r.text
676        );
677        assert!(
678            r.text.contains("steps") || r.text.contains("step"),
679            "steps/step must survive: got: {}",
680            r.text
681        );
682        assert!(
683            savings >= 10,
684            "expected ≥10 % savings on complex AINL question ({}→{} tok), got {}%: [{}]",
685            r.original_tokens,
686            r.compressed_tokens,
687            savings,
688            r.text
689        );
690    }
691
692    #[test]
693    fn aggressive_vs_balanced_gap() {
694        let everyday =
695            "I am working on a React component and experiencing a problem with state management. \
696            The component re-renders multiple times when it should only render once. \
697            I have tried using useMemo but it does not seem to work as expected. \
698            Basically the error says too many re-renders and I believe the issue might be related \
699            to the useEffect dependency array. \
700            I think I need help understanding what is going wrong and how to resolve the problem. \
701            I would like to know if there is a standard approach for fixing infinite render loops. \
702            Please provide a clear explanation and I'd like step-by-step guidance if possible.";
703        let bal = compress(everyday, EfficientMode::Balanced);
704        let agg = compress(everyday, EfficientMode::Aggressive);
705        let bal_pct =
706            100usize.saturating_sub((bal.compressed_tokens * 100) / bal.original_tokens.max(1));
707        let agg_pct =
708            100usize.saturating_sub((agg.compressed_tokens * 100) / agg.original_tokens.max(1));
709
710        let changelog = "The ArmaraOS kernel now injects efficient_mode into each scheduled run. \
711            This makes the list self-documenting and more robust for real dashboard status messages. \
712            The openfang runtime resolves the manifest field at startup. \
713            It is worth noting that the latency is under 30 ms for most prompts. \
714            These changes improve the armaraos agent scheduling pipeline significantly. \
715            Which means users can expect 20 % fewer API calls on high-volume deployments. \
716            The openfang kernel also now exposes a new manifest key for efficient_mode override. \
717            This ensures per-agent configuration always wins over the global config value.";
718        let bal_cl = compress(changelog, EfficientMode::Balanced);
719        let agg_cl = compress(changelog, EfficientMode::Aggressive);
720        let bal_cl_pct = 100usize
721            .saturating_sub((bal_cl.compressed_tokens * 100) / bal_cl.original_tokens.max(1));
722        let agg_cl_pct = 100usize
723            .saturating_sub((agg_cl.compressed_tokens * 100) / agg_cl.original_tokens.max(1));
724
725        assert!(
726            agg_pct > bal_pct + 10,
727            "Aggressive should beat Balanced by >10% on everyday prose; Bal={}% Agg={}%",
728            bal_pct,
729            agg_pct
730        );
731        assert!(
732            agg_cl_pct > bal_cl_pct + 8,
733            "Aggressive should beat Balanced by >8% on soft-identifier changelog; Bal={}% Agg={}%",
734            bal_cl_pct,
735            agg_cl_pct
736        );
737    }
738
739    #[test]
740    fn preserve_marker_forces_keep() {
741        let msg = "I want help. Please do not drop the exact steps required for this. ".repeat(20);
742        let r = compress(&msg, EfficientMode::Aggressive);
743        assert!(
744            r.text.contains("exact steps"),
745            "preserve marker must survive aggressive mode"
746        );
747    }
748
749    #[test]
750    fn readme_dashboard_example_ratio() {
751        let input = "I think I would like to understand basically why the dashboard is showing me \
752            a red error badge on the agents page. Essentially, it seems like the agent is not \
753            responding and I am not sure what steps I should take to investigate this issue. \
754            Please note that I have already tried restarting the daemon. To be honest, I am not \
755            really sure where to look next.";
756        let r = compress(input, EfficientMode::Balanced);
757        let savings =
758            100usize.saturating_sub((r.compressed_tokens * 100) / r.original_tokens.max(1));
759        assert!(
760            r.text.contains("red error badge") || r.text.contains("error badge"),
761            "error badge context must survive: got: {}",
762            r.text
763        );
764        assert!(
765            r.text.contains("daemon"),
766            "daemon restart context must survive"
767        );
768        assert!(
769            savings >= 30,
770            "expected ≥30 % savings on verbose dashboard question, got {}%: [{}]",
771            savings,
772            r.text
773        );
774    }
775
776    #[test]
777    fn http_adapter_prompt_preserves_technical_terms() {
778        let input =
779            "Can you help me understand why the R http.GET call is failing with a timeout? \
780            I am using the URL https://example.com/api?key=abc and getting a connection error. \
781            The adapter seems to not be working and I am not sure if it is the timeout setting \
782            or the URL format that is causing issues with the -> result binding.";
783        let r = compress(input, EfficientMode::Balanced);
784        assert!(
785            r.text.contains("R http.GET") || r.text.contains("http.GET"),
786            "R http.GET must survive: got: {}",
787            r.text
788        );
789        assert!(
790            r.text.contains("https://") || r.text.contains("http"),
791            "URL must survive: got: {}",
792            r.text
793        );
794        assert!(
795            r.text.contains("->"),
796            "-> binding must survive: got: {}",
797            r.text
798        );
799    }
800
801    #[test]
802    fn benchmark_mode_savings_corpus() {
803        let corpus = vec![
804            (
805                "dashboard-verbose",
806                "I think I would like to understand basically why the dashboard is showing me \
807                a red error badge on the agents page. Essentially, it seems like the agent is not \
808                responding and I am not sure what steps I should take to investigate this issue. \
809                Please note that I have already tried restarting the daemon. To be honest, I am not \
810                really sure where to look next.",
811            ),
812            (
813                "ainl-http-technical",
814                "I am really trying to understand basically why my AINL workflow is failing at the R http.GET step. \
815                I think the issue might be related to the timeout setting or the URL format that I am passing to the adapter. \
816                Essentially, the workflow looks like this: I start with L_start, then I call R http.GET https://api.example.com/data?key=abc&region=us-east-1 ->result, \
817                and after that I do R core.GET result body ->body. \
818                I have already tried increasing the timeout to 30 seconds by passing a third positional argument, but it does not seem to help. \
819                To be honest, I am not really sure whether the problem is the URL query string encoding, \
820                or whether the -> result binding is somehow not resolving the value correctly in the next step.",
821            ),
822            (
823                "everyday-prose",
824                "I am working on a React component and experiencing a problem with state management. \
825                The component re-renders multiple times when it should only render once. \
826                I have tried using useMemo but it does not seem to work as expected. \
827                Basically the error says too many re-renders and I believe the issue might be related \
828                to the useEffect dependency array. \
829                I think I need help understanding what is going wrong and how to resolve the problem. \
830                I would like to know if there is a standard approach for fixing infinite render loops. \
831                Please provide a clear explanation and I'd like step-by-step guidance if possible.",
832            ),
833            (
834                "changelog-soft-identifiers",
835                "The ArmaraOS kernel now injects efficient_mode into each scheduled run. \
836                This makes the list self-documenting and more robust for real dashboard status messages. \
837                The openfang runtime resolves the manifest field at startup. \
838                It is worth noting that the latency is under 30 ms for most prompts. \
839                These changes improve the armaraos agent scheduling pipeline significantly. \
840                Which means users can expect 20 % fewer API calls on high-volume deployments. \
841                The openfang kernel also now exposes a new manifest key for efficient_mode override. \
842                This ensures per-agent configuration always wins over the global config value.",
843            ),
844        ];
845
846        let mut balanced_pcts: Vec<u64> = Vec::new();
847        let mut aggressive_pcts: Vec<u64> = Vec::new();
848
849        for (name, input) in corpus {
850            let off = compress(input, EfficientMode::Off);
851            let bal = compress(input, EfficientMode::Balanced);
852            let agg = compress(input, EfficientMode::Aggressive);
853
854            let bal_pct = 100u64.saturating_sub(
855                (bal.compressed_tokens as u64 * 100) / bal.original_tokens.max(1) as u64,
856            );
857            let agg_pct = 100u64.saturating_sub(
858                (agg.compressed_tokens as u64 * 100) / agg.original_tokens.max(1) as u64,
859            );
860
861            balanced_pcts.push(bal_pct);
862            aggressive_pcts.push(agg_pct);
863
864            eprintln!(
865                "[bench] {name}: off={}tok, balanced={}tok (↓{}%), aggressive={}tok (↓{}%), delta=+{}%",
866                off.compressed_tokens,
867                bal.compressed_tokens,
868                bal_pct,
869                agg.compressed_tokens,
870                agg_pct,
871                agg_pct.saturating_sub(bal_pct)
872            );
873        }
874
875        balanced_pcts.sort_unstable();
876        aggressive_pcts.sort_unstable();
877        let mid = balanced_pcts.len() / 2;
878        let bal_median = balanced_pcts[mid];
879        let agg_median = aggressive_pcts[mid];
880        let bal_mean = balanced_pcts.iter().sum::<u64>() as f64 / balanced_pcts.len() as f64;
881        let agg_mean = aggressive_pcts.iter().sum::<u64>() as f64 / aggressive_pcts.len() as f64;
882
883        eprintln!(
884            "[bench-summary] balanced median={}%, mean={:.1}% | aggressive median={}%, mean={:.1}% | delta median=+{}%",
885            bal_median,
886            bal_mean,
887            agg_median,
888            agg_mean,
889            agg_median.saturating_sub(bal_median)
890        );
891
892        assert!(
893            agg_median >= bal_median,
894            "aggressive should not underperform balanced median"
895        );
896    }
897}