Skip to main content

ainl_compression/
lib.rs

1//! Heuristic compression primitives for AINL hosts.
2//!
3//! This crate currently exposes the ArmaraOS "Ultra Cost-Efficient Mode"
4//! input compressor. It is intentionally embedding-free and dependency-light
5//! so it can be reused across hosts without shipping local ML models.
6//!
7//! Companion modules (no I/O; host wiring stays out-of-crate):
8//! - [`profiles`] — built-in **compression profiles** and project→profile hints
9//! - [`adaptive`] — **content-shaped** `EfficientMode` recommendations
10//! - [`cache`] — **TTL hysteresis** for cache-aware coordination
11//!
12//! Set `RUST_LOG=ainl_compression=debug` to enable full before/after text
13//! logging per call (useful for tuning preserve lists and retention ratios).
14
15use std::collections::HashSet;
16use std::time::Instant;
17use tracing::debug;
18
19pub mod adaptive;
20pub mod cache;
21pub mod profiles;
22
23pub use adaptive::{recommend_mode_for_content, AdaptiveRecommendation};
24pub use cache::{cache_policy_summary, effective_ttl_with_hysteresis, CacheTtlResult};
25pub use profiles::{
26    list_builtin_profiles, resolve_builtin_profile, suggest_profile_id_for_project,
27    CompressionProfile, BUILTIN_PROFILES,
28};
29
30/// Input compression aggressiveness.
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
32#[cfg_attr(
33    feature = "graph-telemetry",
34    derive(serde::Serialize, serde::Deserialize)
35)]
36pub enum EfficientMode {
37    /// Pass through without modification.
38    #[default]
39    Off,
40    /// ~55 % token retention — sweet-spot 50–60 % reduction. (default)
41    Balanced,
42    /// ~40 % token retention — opt-in for high-volume / cost-sensitive paths.
43    Aggressive,
44}
45
46impl EfficientMode {
47    /// Parse from a config string; unknown values → `Off`.
48    pub fn parse_config(s: &str) -> Self {
49        match s.to_lowercase().as_str() {
50            "balanced" | "adaptive" => Self::Balanced,
51            "aggressive" => Self::Aggressive,
52            _ => Self::Off,
53        }
54    }
55
56    /// Parse from free-form natural language intent.
57    ///
58    /// Examples:
59    /// - "use aggressive eco mode" -> `Aggressive`
60    /// - "balanced mode please" -> `Balanced`
61    /// - "disable compression" -> `Off`
62    pub fn parse_natural_language(s: &str) -> Self {
63        let lo = s.to_ascii_lowercase();
64        let has = |needle: &str| lo.contains(needle);
65        if has("disable compression")
66            || has("no compression")
67            || has("compression off")
68            || has("eco off")
69            || has("turn off eco")
70            || has("off mode")
71        {
72            return Self::Off;
73        }
74        if has("aggressive")
75            || has("max savings")
76            || has("highest savings")
77            || has("ultra eco")
78            || has("eco aggressive")
79        {
80            return Self::Aggressive;
81        }
82        if has("balanced")
83            || has("default eco")
84            || has("eco balanced")
85            || has("enable eco")
86            || has("compression on")
87        {
88            return Self::Balanced;
89        }
90        Self::parse_config(&lo)
91    }
92
93    /// Token retention ratio.
94    ///
95    /// `Balanced` targets ~55 % retention (40–50 % reduction) — sweet-spot for most prompts.
96    /// `Aggressive` targets ~35 % retention (55–70 % reduction) — meaningfully wider gap vs
97    /// Balanced; soft-preserve terms become score-boosts rather than force-keeps, and
98    /// trailing-explanation sentences get a score penalty to prune meta-commentary.
99    fn retain(self) -> f32 {
100        match self {
101            Self::Balanced => 0.55,
102            Self::Aggressive => 0.35,
103            Self::Off => 1.0,
104        }
105    }
106}
107
108/// Structured telemetry emitted for each compression operation.
109#[derive(Debug, Clone)]
110#[cfg_attr(
111    feature = "graph-telemetry",
112    derive(serde::Serialize, serde::Deserialize)
113)]
114pub struct CompressionMetrics {
115    pub mode: EfficientMode,
116    pub original_chars: usize,
117    pub compressed_chars: usize,
118    pub original_tokens: usize,
119    pub compressed_tokens: usize,
120    pub tokens_saved: usize,
121    /// Percentage saved, range 0.0..100.0.
122    pub savings_ratio_pct: f32,
123    /// Optional caller-provided semantic preservation score.
124    pub semantic_preservation_score: Option<f32>,
125    pub elapsed_ms: u64,
126}
127
128impl CompressionMetrics {
129    #[must_use]
130    pub fn from_result(
131        mode: EfficientMode,
132        original_text: &str,
133        compressed: &Compressed,
134        semantic_preservation_score: Option<f32>,
135        elapsed_ms: u64,
136    ) -> Self {
137        let tokens_saved = compressed.tokens_saved();
138        let savings_ratio_pct = if compressed.original_tokens == 0 {
139            0.0
140        } else {
141            (tokens_saved as f32 * 100.0) / compressed.original_tokens as f32
142        };
143        Self {
144            mode,
145            original_chars: original_text.len(),
146            compressed_chars: compressed.text.len(),
147            original_tokens: compressed.original_tokens,
148            compressed_tokens: compressed.compressed_tokens,
149            tokens_saved,
150            savings_ratio_pct,
151            semantic_preservation_score,
152            elapsed_ms,
153        }
154    }
155}
156
157/// Optional telemetry sink for compression metrics.
158pub trait CompressionTelemetrySink: Send + Sync {
159    fn emit(&self, metrics: CompressionMetrics);
160}
161
162/// Standalone input prompt compressor.
163///
164/// This is the intended public API for external agents that want to adopt
165/// AINL eco-mode compression without pulling runtime-specific crates.
166pub struct PromptCompressor {
167    mode: EfficientMode,
168    emit_telemetry: Option<Box<dyn Fn(CompressionMetrics) + Send + Sync>>,
169}
170
171impl PromptCompressor {
172    #[must_use]
173    pub fn new(mode: EfficientMode) -> Self {
174        Self {
175            mode,
176            emit_telemetry: None,
177        }
178    }
179
180    #[must_use]
181    pub fn from_natural_language(mode_hint: &str) -> Self {
182        Self::new(EfficientMode::parse_natural_language(mode_hint))
183    }
184
185    #[must_use]
186    pub fn with_telemetry_callback(
187        mode: EfficientMode,
188        emit_telemetry: Option<Box<dyn Fn(CompressionMetrics) + Send + Sync>>,
189    ) -> Self {
190        Self {
191            mode,
192            emit_telemetry,
193        }
194    }
195
196    pub fn compress(&self, text: &str) -> Compressed {
197        self.compress_with_semantic_score(text, None)
198    }
199
200    pub fn compress_with_semantic_score(
201        &self,
202        text: &str,
203        semantic_preservation_score: Option<f32>,
204    ) -> Compressed {
205        let t0 = Instant::now();
206        let result = compress(text, self.mode);
207        if let Some(cb) = &self.emit_telemetry {
208            cb(CompressionMetrics::from_result(
209                self.mode,
210                text,
211                &result,
212                semantic_preservation_score,
213                t0.elapsed().as_millis() as u64,
214            ));
215        }
216        result
217    }
218}
219
220/// Result of a compression pass.
221pub struct Compressed {
222    /// Compressed (or original, on no-op) text.
223    pub text: String,
224    /// Estimated original token count (chars/4).
225    pub original_tokens: usize,
226    /// Estimated compressed token count.
227    pub compressed_tokens: usize,
228}
229
230impl Compressed {
231    /// Tokens saved; 0 when compression was a no-op.
232    pub fn tokens_saved(&self) -> usize {
233        self.original_tokens.saturating_sub(self.compressed_tokens)
234    }
235}
236
237/// Heuristic token estimate (~4 chars per token, +1 minimum).
238///
239/// Public alias of the internal `tok` helper so other AINL crates
240/// (notably [`ainl-context-compiler`](https://docs.rs/ainl-context-compiler)) can avoid
241/// duplicating the heuristic. Embedding-free and dependency-light by design — for a real
242/// tokenizer count, use a tiktoken/bpe crate at the host layer.
243#[inline]
244#[must_use]
245pub fn tokenize_estimate(s: &str) -> usize {
246    tok(s)
247}
248
249fn tok(s: &str) -> usize {
250    s.len() / 4 + 1
251}
252
253const FILLERS: &[&str] = &[
254    "I think ",
255    "I believe ",
256    "Basically, ",
257    "Essentially, ",
258    "Of course, ",
259    "Please note that ",
260    "It is worth noting that ",
261    "It's worth noting that ",
262    "I would like to ",
263    "I'd like to ",
264    "Don't hesitate to ",
265    "Feel free to ",
266    "As you know, ",
267    "As mentioned earlier, ",
268    "That being said, ",
269    "To be honest, ",
270    "Needless to say, ",
271    // Mid-sentence hedging words (always safe to strip)
272    " basically ",
273    " essentially ",
274    " simply ",
275    " just ",
276    " very ",
277    " really ",
278];
279
280/// Hard-preserve: force-keep in **both** Balanced and Aggressive.
281/// Irreplaceable content — actual opcodes, URLs, diagnostic history, user-intent markers.
282const HARD_PRESERVE: &[&str] = &[
283    "exact",
284    "steps",
285    "already tried",
286    "already restarted",
287    "already checked",
288    "restart",
289    "daemon",
290    "error",
291    "http://",
292    "https://",
293    "R http",
294    "R web",
295    "L_",
296    "->",
297    "::",
298    ".ainl",
299    "opcode",
300    "R queue",
301    "R llm",
302    "R core",
303    "R solana",
304    "R postgres",
305    "R redis",
306    "```",
307];
308
309/// Soft-preserve: force-keep in Balanced; **score-boost only** in Aggressive.
310/// These identifiers/units are important but the LLM can reconstruct context without them
311/// when the budget is tight.  Freeing them lets Aggressive prune changelog-dense text
312/// where these terms would otherwise lock in nearly every sentence.
313const SOFT_PRESERVE: &[&str] = &[
314    "##", " ms", " kb", " mb", " gb", " %", "openfang", "armaraos", "manifest",
315];
316
317fn hard_keep(s: &str) -> bool {
318    let lo = s.to_lowercase();
319    HARD_PRESERVE.iter().any(|p| lo.contains(&p.to_lowercase()))
320}
321
322fn soft_match(s: &str) -> bool {
323    let lo = s.to_lowercase();
324    SOFT_PRESERVE.iter().any(|p| lo.contains(&p.to_lowercase()))
325}
326
327/// Returns `true` when `s` must be included regardless of budget.
328fn must_keep(s: &str, mode: EfficientMode) -> bool {
329    hard_keep(s) || (mode != EfficientMode::Aggressive && soft_match(s))
330}
331
332/// Compress `text` toward `mode.retain()` of its original token budget.
333///
334/// Prompts shorter than 80 tokens, or `Off` mode, pass through unchanged.
335/// Code fences (` ``` `) are extracted and re-inserted verbatim.
336pub fn compress(text: &str, mode: EfficientMode) -> Compressed {
337    let orig = tok(text);
338    if mode == EfficientMode::Off || orig < 80 {
339        return Compressed {
340            text: text.to_string(),
341            original_tokens: orig,
342            compressed_tokens: orig,
343        };
344    }
345    // Floor: never go below 25 % of original (prevents total context loss on short messages),
346    // but keep it relative so both modes stay distinct on moderate-length inputs.
347    // The old fixed `.max(80)` floor was equalising Balanced and Aggressive on ~100–200 token
348    // messages because both natural budgets fell below 80, producing identical outputs.
349    let budget = ((orig as f32 * mode.retain()) as usize).max(orig / 4);
350
351    // Split at code fences; preserve code blocks verbatim.
352    let mut blocks: Vec<(bool, String)> = Vec::new();
353    let mut rest = text;
354    while let Some(f) = rest.find("```") {
355        if f > 0 {
356            blocks.push((false, rest[..f].to_string()));
357        }
358        rest = &rest[f + 3..];
359        if let Some(e) = rest.find("```") {
360            blocks.push((true, format!("```{}```", &rest[..e])));
361            rest = &rest[e + 3..];
362        } else {
363            blocks.push((true, format!("```{rest}")));
364            rest = "";
365            break;
366        }
367    }
368    if !rest.is_empty() {
369        blocks.push((false, rest.to_string()));
370    }
371
372    let code_tok: usize = blocks.iter().filter(|(c, _)| *c).map(|(_, t)| tok(t)).sum();
373    let mut prose_budget = budget.saturating_sub(code_tok);
374    let mut out: Vec<String> = Vec::new();
375
376    for (is_code, block) in &blocks {
377        if *is_code {
378            out.push(block.clone());
379            continue;
380        }
381        let prose = compress_prose(block, prose_budget, mode);
382        prose_budget = prose_budget.saturating_sub(tok(&prose));
383        out.push(prose);
384    }
385
386    let result = out.join("\n\n").trim().to_string();
387    let c = tok(&result);
388    // Safety: never return longer than original.
389    if c >= orig {
390        debug!(orig_tok = orig, "prompt_compressor: no gain — passthrough");
391        Compressed {
392            text: text.to_string(),
393            original_tokens: orig,
394            compressed_tokens: orig,
395        }
396    } else {
397        debug!(
398            orig_tok = orig,
399            compressed_tok = c,
400            savings_pct = 100u64.saturating_sub((c as u64 * 100) / orig.max(1) as u64),
401            original_text = %text,
402            compressed_text = %result,
403            "prompt_compressor: compressed"
404        );
405        Compressed {
406            text: result,
407            original_tokens: orig,
408            compressed_tokens: c,
409        }
410    }
411}
412
413/// Compress and return structured telemetry metrics in one call.
414pub fn compress_with_metrics(
415    text: &str,
416    mode: EfficientMode,
417    semantic_preservation_score: Option<f32>,
418) -> (Compressed, CompressionMetrics) {
419    let t0 = Instant::now();
420    let result = compress(text, mode);
421    let semantic_preservation_score = semantic_preservation_score
422        .or_else(|| Some(estimate_semantic_preservation_score(text, &result.text)));
423    let metrics = CompressionMetrics::from_result(
424        mode,
425        text,
426        &result,
427        semantic_preservation_score,
428        t0.elapsed().as_millis() as u64,
429    );
430    (result, metrics)
431}
432
433/// Lightweight lexical semantic-preservation heuristic in range 0.0..1.0.
434#[must_use]
435pub fn estimate_semantic_preservation_score(original: &str, compressed: &str) -> f32 {
436    fn terms(s: &str) -> std::collections::HashSet<String> {
437        s.split(|c: char| !c.is_alphanumeric() && c != '_' && c != '-')
438            .map(|t| t.trim().to_ascii_lowercase())
439            .filter(|t| t.len() >= 4)
440            .collect()
441    }
442    let a = terms(original);
443    if a.is_empty() {
444        return 1.0;
445    }
446    let b = terms(compressed);
447    let overlap = a.iter().filter(|t| b.contains(*t)).count();
448    (overlap as f32 / a.len() as f32).clamp(0.0, 1.0)
449}
450
451fn compress_prose(text: &str, budget: usize, mode: EfficientMode) -> String {
452    let sents: Vec<&str> = text
453        .split(". ")
454        .flat_map(|l| l.split('\n'))
455        .filter(|s| !s.trim().is_empty())
456        .collect();
457    if sents.len() <= 2 {
458        return text.to_string();
459    }
460
461    // Intent vocabulary from the first two sentences (position-biased TF-IDF proxy).
462    let intent: HashSet<&str> = sents
463        .iter()
464        .take(2)
465        .flat_map(|s| s.split_whitespace())
466        .filter(|w| w.len() > 3)
467        .collect();
468    let n = sents.len();
469
470    let mut scored: Vec<(usize, f32)> = sents
471        .iter()
472        .enumerate()
473        .map(|(i, &s)| {
474            if must_keep(s, mode) {
475                return (i, f32::MAX);
476            }
477            let words: Vec<&str> = s.split_whitespace().collect();
478            let wc = words.len().max(1) as f32;
479            let overlap = words.iter().filter(|w| intent.contains(*w)).count() as f32;
480            let pos = if i == 0 {
481                2.5
482            } else if i < n / 4 {
483                1.5
484            } else if i > n * 4 / 5 {
485                1.2
486            } else {
487                1.0
488            };
489            let ent = if words
490                .iter()
491                .any(|w| w.parse::<f64>().is_ok() || w.starts_with("http"))
492            {
493                1.4
494            } else {
495                1.0
496            };
497            // Aggressive-only modifiers: boost soft-preserve sentences; penalise trailing-explanation
498            // clauses that typically start with "This ", "These ", "It " or "Which " and carry
499            // low new information (they rephrase or justify what came before).
500            let (soft_boost, trailing_pen) = if mode == EfficientMode::Aggressive {
501                let boost = if soft_match(s) { 1.3 } else { 1.0 };
502                let t = s.trim();
503                let pen = if t.starts_with("This ")
504                    || t.starts_with("These ")
505                    || t.starts_with("It ")
506                    || t.starts_with("Which ")
507                {
508                    0.65
509                } else {
510                    1.0
511                };
512                (boost, pen)
513            } else {
514                (1.0, 1.0)
515            };
516            (
517                i,
518                (overlap / wc + 0.2) * pos * ent * soft_boost * trailing_pen,
519            )
520        })
521        .collect();
522
523    scored.sort_unstable_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
524    let mut kept: HashSet<usize> = HashSet::new();
525    let mut used = 0usize;
526    for &(idx, score) in &scored {
527        let s = sents[idx];
528        if score == f32::MAX || used + tok(s) <= budget {
529            kept.insert(idx);
530            used += tok(s);
531        }
532        if used >= budget && score != f32::MAX {
533            break;
534        }
535    }
536
537    let mut joined: String = (0..n)
538        .filter(|i| kept.contains(i))
539        .map(|i| sents[i].trim())
540        .collect::<Vec<_>>()
541        .join(". ");
542    // Replace fillers with a single space so adjacent words don't fuse,
543    // then collapse any resulting double-spaces.
544    for filler in FILLERS {
545        joined = joined.replace(filler, " ");
546    }
547    // Collapse "word  word" → "word word" after filler removal.
548    while joined.contains("  ") {
549        joined = joined.replace("  ", " ");
550    }
551    // Re-capitalize the first character in case filler stripping left a lowercase fragment.
552    let joined = joined.trim();
553    let mut chars = joined.chars();
554    match chars.next() {
555        None => String::new(),
556        Some(c) => c.to_uppercase().collect::<String>() + chars.as_str(),
557    }
558}
559
560#[cfg(test)]
561mod tests {
562    use super::*;
563
564    #[test]
565    fn short_prompt_passthrough() {
566        let short = "Hello, please help me.";
567        let r = compress(short, EfficientMode::Balanced);
568        assert_eq!(r.text, short);
569        assert_eq!(r.tokens_saved(), 0);
570    }
571
572    #[test]
573    fn code_block_preserved_verbatim() {
574        let msg = "Fix my code:\n```rust\nfn add(a: i32, b: i32) -> i32 { a + b }\n```\nIt panics.";
575        let r = compress(msg, EfficientMode::Balanced);
576        assert!(
577            r.text.contains("fn add(a: i32"),
578            "code block must survive compression"
579        );
580    }
581
582    #[test]
583    fn off_mode_is_identity() {
584        let text = "word ".repeat(100);
585        let r = compress(&text, EfficientMode::Off);
586        assert_eq!(r.text, text);
587        assert_eq!(r.tokens_saved(), 0);
588    }
589
590    #[test]
591    fn balanced_reduces_long_prose() {
592        let msg =
593            "I am working on a React component and experiencing a problem with state management. \
594            The component re-renders multiple times when it should only render once. \
595            I have tried using useMemo but it does not seem to work as expected. \
596            Basically the error says too many re-renders and I believe the issue might be related \
597            to the useEffect dependency array. \
598            I think I need help understanding what is going wrong and how to resolve the problem. \
599            I would like to know if there is a standard approach for fixing infinite render loops. \
600            Please provide a clear explanation and I'd like step-by-step guidance if possible.";
601        let r = compress(msg, EfficientMode::Balanced);
602        let ratio = r.compressed_tokens as f32 / r.original_tokens as f32;
603        assert!(
604            ratio < 0.85,
605            "expected >15 % compression on long prose, got {ratio:.2}"
606        );
607        assert!(r.text.contains("React"), "intent keywords must survive");
608    }
609
610    #[test]
611    fn parse_config_roundtrip() {
612        assert_eq!(
613            EfficientMode::parse_config("balanced"),
614            EfficientMode::Balanced
615        );
616        assert_eq!(
617            EfficientMode::parse_config("AGGRESSIVE"),
618            EfficientMode::Aggressive
619        );
620        assert_eq!(EfficientMode::parse_config("off"), EfficientMode::Off);
621        assert_eq!(
622            EfficientMode::parse_config("adaptive"),
623            EfficientMode::Balanced
624        );
625        assert_eq!(EfficientMode::parse_config("unknown"), EfficientMode::Off);
626    }
627
628    #[test]
629    fn parse_natural_language_roundtrip() {
630        assert_eq!(
631            EfficientMode::parse_natural_language("use aggressive eco mode for max savings"),
632            EfficientMode::Aggressive
633        );
634        assert_eq!(
635            EfficientMode::parse_natural_language("balanced mode please"),
636            EfficientMode::Balanced
637        );
638        assert_eq!(
639            EfficientMode::parse_natural_language("disable compression for this turn"),
640            EfficientMode::Off
641        );
642    }
643
644    #[test]
645    fn telemetry_callback_emits_metrics() {
646        use std::sync::{Arc, Mutex};
647        let captured: Arc<Mutex<Vec<CompressionMetrics>>> = Arc::new(Mutex::new(Vec::new()));
648        let sink = Arc::clone(&captured);
649        let compressor = PromptCompressor::with_telemetry_callback(
650            EfficientMode::Balanced,
651            Some(Box::new(move |m| {
652                sink.lock().expect("lock").push(m);
653            })),
654        );
655        let _ = compressor.compress_with_semantic_score(
656            "I think I would like to understand basically why the dashboard is showing a red error badge. \
657            Please note that I already restarted the daemon and still see the issue.",
658            Some(0.91),
659        );
660        let rows = captured.lock().expect("lock");
661        assert_eq!(rows.len(), 1);
662        let m = &rows[0];
663        assert_eq!(m.mode, EfficientMode::Balanced);
664        assert!(m.original_tokens >= m.compressed_tokens);
665        assert!(m.savings_ratio_pct >= 0.0);
666        assert_eq!(m.semantic_preservation_score, Some(0.91));
667    }
668
669    #[test]
670    fn semantic_preservation_score_reasonable_range() {
671        let original = "Please restart the daemon and check the red error badge in dashboard logs";
672        let compressed = "Restart daemon; check red error badge in dashboard logs";
673        let score = estimate_semantic_preservation_score(original, compressed);
674        assert!((0.0..=1.0).contains(&score));
675        assert!(score > 0.5, "expected high overlap score, got {score}");
676    }
677
678    #[test]
679    fn smoke_complex_ainl_workflow_question() {
680        let input = "\
681            I am really trying to understand basically why my AINL workflow is failing at the R http.GET step. \
682            I think the issue might be related to the timeout setting or the URL format that I am passing to the adapter. \
683            Essentially, the workflow looks like this: I start with L_start, then I call R http.GET https://api.example.com/data?key=abc&region=us-east-1 ->result, \
684            and after that I do R core.GET result body ->body. \
685            I have already tried increasing the timeout to 30 seconds by passing a third positional argument, but it does not seem to help. \
686            To be honest, I am not really sure whether the problem is the URL query string encoding, \
687            or whether the -> result binding is somehow not resolving the value correctly in the next step. \
688            Please note that I have already checked the adapter docs and the http adapter section of AGENTS.md. \
689            I would really appreciate a step-by-step explanation of what might be going wrong and what exact steps I should take to debug this. \
690            It would also be helpful if you could show me the correct opcode syntax for a GET request with headers and timeout.";
691        let r = compress(input, EfficientMode::Balanced);
692        let savings =
693            100usize.saturating_sub((r.compressed_tokens * 100) / r.original_tokens.max(1));
694        assert!(
695            r.text.contains("R http.GET") || r.text.contains("http.GET"),
696            "http.GET must survive: got: {}",
697            r.text
698        );
699        assert!(
700            r.text.contains("https://") || r.text.contains("api.example.com"),
701            "URL must survive: got: {}",
702            r.text
703        );
704        assert!(
705            r.text.contains("->"),
706            "-> binding must survive: got: {}",
707            r.text
708        );
709        assert!(
710            r.text.contains("steps") || r.text.contains("step"),
711            "steps/step must survive: got: {}",
712            r.text
713        );
714        assert!(
715            savings >= 10,
716            "expected ≥10 % savings on complex AINL question ({}→{} tok), got {}%: [{}]",
717            r.original_tokens,
718            r.compressed_tokens,
719            savings,
720            r.text
721        );
722    }
723
724    #[test]
725    fn aggressive_vs_balanced_gap() {
726        let everyday =
727            "I am working on a React component and experiencing a problem with state management. \
728            The component re-renders multiple times when it should only render once. \
729            I have tried using useMemo but it does not seem to work as expected. \
730            Basically the error says too many re-renders and I believe the issue might be related \
731            to the useEffect dependency array. \
732            I think I need help understanding what is going wrong and how to resolve the problem. \
733            I would like to know if there is a standard approach for fixing infinite render loops. \
734            Please provide a clear explanation and I'd like step-by-step guidance if possible.";
735        let bal = compress(everyday, EfficientMode::Balanced);
736        let agg = compress(everyday, EfficientMode::Aggressive);
737        let bal_pct =
738            100usize.saturating_sub((bal.compressed_tokens * 100) / bal.original_tokens.max(1));
739        let agg_pct =
740            100usize.saturating_sub((agg.compressed_tokens * 100) / agg.original_tokens.max(1));
741
742        let changelog = "The ArmaraOS kernel now injects efficient_mode into each scheduled run. \
743            This makes the list self-documenting and more robust for real dashboard status messages. \
744            The openfang runtime resolves the manifest field at startup. \
745            It is worth noting that the latency is under 30 ms for most prompts. \
746            These changes improve the armaraos agent scheduling pipeline significantly. \
747            Which means users can expect 20 % fewer API calls on high-volume deployments. \
748            The openfang kernel also now exposes a new manifest key for efficient_mode override. \
749            This ensures per-agent configuration always wins over the global config value.";
750        let bal_cl = compress(changelog, EfficientMode::Balanced);
751        let agg_cl = compress(changelog, EfficientMode::Aggressive);
752        let bal_cl_pct = 100usize
753            .saturating_sub((bal_cl.compressed_tokens * 100) / bal_cl.original_tokens.max(1));
754        let agg_cl_pct = 100usize
755            .saturating_sub((agg_cl.compressed_tokens * 100) / agg_cl.original_tokens.max(1));
756
757        assert!(
758            agg_pct > bal_pct + 10,
759            "Aggressive should beat Balanced by >10% on everyday prose; Bal={}% Agg={}%",
760            bal_pct,
761            agg_pct
762        );
763        assert!(
764            agg_cl_pct > bal_cl_pct + 8,
765            "Aggressive should beat Balanced by >8% on soft-identifier changelog; Bal={}% Agg={}%",
766            bal_cl_pct,
767            agg_cl_pct
768        );
769    }
770
771    #[test]
772    fn preserve_marker_forces_keep() {
773        let msg = "I want help. Please do not drop the exact steps required for this. ".repeat(20);
774        let r = compress(&msg, EfficientMode::Aggressive);
775        assert!(
776            r.text.contains("exact steps"),
777            "preserve marker must survive aggressive mode"
778        );
779    }
780
781    #[test]
782    fn readme_dashboard_example_ratio() {
783        let input = "I think I would like to understand basically why the dashboard is showing me \
784            a red error badge on the agents page. Essentially, it seems like the agent is not \
785            responding and I am not sure what steps I should take to investigate this issue. \
786            Please note that I have already tried restarting the daemon. To be honest, I am not \
787            really sure where to look next.";
788        let r = compress(input, EfficientMode::Balanced);
789        let savings =
790            100usize.saturating_sub((r.compressed_tokens * 100) / r.original_tokens.max(1));
791        assert!(
792            r.text.contains("red error badge") || r.text.contains("error badge"),
793            "error badge context must survive: got: {}",
794            r.text
795        );
796        assert!(
797            r.text.contains("daemon"),
798            "daemon restart context must survive"
799        );
800        assert!(
801            savings >= 30,
802            "expected ≥30 % savings on verbose dashboard question, got {}%: [{}]",
803            savings,
804            r.text
805        );
806    }
807
808    #[test]
809    fn http_adapter_prompt_preserves_technical_terms() {
810        let input =
811            "Can you help me understand why the R http.GET call is failing with a timeout? \
812            I am using the URL https://example.com/api?key=abc and getting a connection error. \
813            The adapter seems to not be working and I am not sure if it is the timeout setting \
814            or the URL format that is causing issues with the -> result binding.";
815        let r = compress(input, EfficientMode::Balanced);
816        assert!(
817            r.text.contains("R http.GET") || r.text.contains("http.GET"),
818            "R http.GET must survive: got: {}",
819            r.text
820        );
821        assert!(
822            r.text.contains("https://") || r.text.contains("http"),
823            "URL must survive: got: {}",
824            r.text
825        );
826        assert!(
827            r.text.contains("->"),
828            "-> binding must survive: got: {}",
829            r.text
830        );
831    }
832
833    #[test]
834    fn benchmark_mode_savings_corpus() {
835        let corpus = vec![
836            (
837                "dashboard-verbose",
838                "I think I would like to understand basically why the dashboard is showing me \
839                a red error badge on the agents page. Essentially, it seems like the agent is not \
840                responding and I am not sure what steps I should take to investigate this issue. \
841                Please note that I have already tried restarting the daemon. To be honest, I am not \
842                really sure where to look next.",
843            ),
844            (
845                "ainl-http-technical",
846                "I am really trying to understand basically why my AINL workflow is failing at the R http.GET step. \
847                I think the issue might be related to the timeout setting or the URL format that I am passing to the adapter. \
848                Essentially, the workflow looks like this: I start with L_start, then I call R http.GET https://api.example.com/data?key=abc&region=us-east-1 ->result, \
849                and after that I do R core.GET result body ->body. \
850                I have already tried increasing the timeout to 30 seconds by passing a third positional argument, but it does not seem to help. \
851                To be honest, I am not really sure whether the problem is the URL query string encoding, \
852                or whether the -> result binding is somehow not resolving the value correctly in the next step.",
853            ),
854            (
855                "everyday-prose",
856                "I am working on a React component and experiencing a problem with state management. \
857                The component re-renders multiple times when it should only render once. \
858                I have tried using useMemo but it does not seem to work as expected. \
859                Basically the error says too many re-renders and I believe the issue might be related \
860                to the useEffect dependency array. \
861                I think I need help understanding what is going wrong and how to resolve the problem. \
862                I would like to know if there is a standard approach for fixing infinite render loops. \
863                Please provide a clear explanation and I'd like step-by-step guidance if possible.",
864            ),
865            (
866                "changelog-soft-identifiers",
867                "The ArmaraOS kernel now injects efficient_mode into each scheduled run. \
868                This makes the list self-documenting and more robust for real dashboard status messages. \
869                The openfang runtime resolves the manifest field at startup. \
870                It is worth noting that the latency is under 30 ms for most prompts. \
871                These changes improve the armaraos agent scheduling pipeline significantly. \
872                Which means users can expect 20 % fewer API calls on high-volume deployments. \
873                The openfang kernel also now exposes a new manifest key for efficient_mode override. \
874                This ensures per-agent configuration always wins over the global config value.",
875            ),
876        ];
877
878        let mut balanced_pcts: Vec<u64> = Vec::new();
879        let mut aggressive_pcts: Vec<u64> = Vec::new();
880
881        for (name, input) in corpus {
882            let off = compress(input, EfficientMode::Off);
883            let bal = compress(input, EfficientMode::Balanced);
884            let agg = compress(input, EfficientMode::Aggressive);
885
886            let bal_pct = 100u64.saturating_sub(
887                (bal.compressed_tokens as u64 * 100) / bal.original_tokens.max(1) as u64,
888            );
889            let agg_pct = 100u64.saturating_sub(
890                (agg.compressed_tokens as u64 * 100) / agg.original_tokens.max(1) as u64,
891            );
892
893            balanced_pcts.push(bal_pct);
894            aggressive_pcts.push(agg_pct);
895
896            eprintln!(
897                "[bench] {name}: off={}tok, balanced={}tok (↓{}%), aggressive={}tok (↓{}%), delta=+{}%",
898                off.compressed_tokens,
899                bal.compressed_tokens,
900                bal_pct,
901                agg.compressed_tokens,
902                agg_pct,
903                agg_pct.saturating_sub(bal_pct)
904            );
905        }
906
907        balanced_pcts.sort_unstable();
908        aggressive_pcts.sort_unstable();
909        let mid = balanced_pcts.len() / 2;
910        let bal_median = balanced_pcts[mid];
911        let agg_median = aggressive_pcts[mid];
912        let bal_mean = balanced_pcts.iter().sum::<u64>() as f64 / balanced_pcts.len() as f64;
913        let agg_mean = aggressive_pcts.iter().sum::<u64>() as f64 / aggressive_pcts.len() as f64;
914
915        eprintln!(
916            "[bench-summary] balanced median={}%, mean={:.1}% | aggressive median={}%, mean={:.1}% | delta median=+{}%",
917            bal_median,
918            bal_mean,
919            agg_median,
920            agg_mean,
921            agg_median.saturating_sub(bal_median)
922        );
923
924        assert!(
925            agg_median >= bal_median,
926            "aggressive should not underperform balanced median"
927        );
928    }
929}