Skip to main content

roboticus_agent/
context.rs

1use roboticus_llm::format::UnifiedMessage;
2
3// ── Progressive compaction stages (OPENDEV pattern) ────────────────────────
4
5/// Progressive compaction stages, ordered from least to most aggressive.
6///
7/// The OPENDEV paper demonstrates that staged compression outperforms
8/// single-shot summarization because each stage preserves strictly more
9/// information than the next, allowing the system to use the *least
10/// aggressive* stage that fits the token budget.
11#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
12pub enum CompactionStage {
13    /// Stage 0: Full messages, no compression.
14    Verbatim,
15    /// Stage 1: Drop social filler (greetings, acks) but keep substantive content.
16    SelectiveTrim,
17    /// Stage 2: Apply entropy-based compression via `PromptCompressor` (~60% ratio).
18    SemanticCompress,
19    /// Stage 3: Reduce each message to its topic sentence.
20    TopicExtract,
21    /// Stage 4: Collapse entire conversation to a structural outline.
22    Skeleton,
23}
24
25impl CompactionStage {
26    /// Choose compaction stage based on how far over budget the content is.
27    ///
28    /// `excess_ratio` = current_tokens / target_tokens.
29    /// A ratio of 1.0 means exactly at budget; >1.0 means over.
30    pub fn from_excess(excess_ratio: f64) -> Self {
31        if excess_ratio <= 1.0 {
32            Self::Verbatim
33        } else if excess_ratio <= 1.5 {
34            Self::SelectiveTrim
35        } else if excess_ratio <= 2.5 {
36            Self::SemanticCompress
37        } else if excess_ratio <= 4.0 {
38            Self::TopicExtract
39        } else {
40            Self::Skeleton
41        }
42    }
43}
44
45/// Apply progressive compaction to a slice of messages at the requested stage.
46///
47/// System messages are always preserved. Higher stages produce shorter output.
48pub fn compact_to_stage(
49    messages: &[UnifiedMessage],
50    stage: CompactionStage,
51) -> Vec<UnifiedMessage> {
52    match stage {
53        CompactionStage::Verbatim => messages.to_vec(),
54        CompactionStage::SelectiveTrim => selective_trim(messages),
55        CompactionStage::SemanticCompress => semantic_compress(messages),
56        CompactionStage::TopicExtract => topic_extract(messages),
57        CompactionStage::Skeleton => skeleton_compress(messages),
58    }
59}
60
61/// Stage 1: Drop messages that are pure social filler.
62fn selective_trim(messages: &[UnifiedMessage]) -> Vec<UnifiedMessage> {
63    const FILLER: &[&str] = &[
64        "hello",
65        "hi",
66        "hey",
67        "thanks",
68        "thank you",
69        "ok",
70        "okay",
71        "sure",
72        "got it",
73        "sounds good",
74        "no problem",
75        "np",
76        "ack",
77        "roger",
78    ];
79    messages
80        .iter()
81        .filter(|m| {
82            if m.role == "system" {
83                return true;
84            }
85            // Keep any message with substantive length
86            if m.content.len() >= 40 {
87                return true;
88            }
89            let lower = m.content.trim().to_lowercase();
90            // Exact match (not substring) so "ok, I updated the schema" isn't
91            // falsely classified as filler.
92            !FILLER.contains(&lower.as_str())
93        })
94        .cloned()
95        .collect()
96}
97
98/// Stage 2: Entropy-based compression on non-system messages ≥100 chars.
99fn semantic_compress(messages: &[UnifiedMessage]) -> Vec<UnifiedMessage> {
100    use roboticus_llm::compression::PromptCompressor;
101    let compressor = PromptCompressor::new(0.6);
102    messages
103        .iter()
104        .map(|m| {
105            if m.role == "system" || m.content.len() < 100 {
106                m.clone()
107            } else {
108                UnifiedMessage {
109                    role: m.role.clone(),
110                    content: compressor.compress(&m.content),
111                    parts: None,
112                }
113            }
114        })
115        .collect()
116}
117
118/// Stage 3: Reduce each non-system message to its topic sentence.
119fn topic_extract(messages: &[UnifiedMessage]) -> Vec<UnifiedMessage> {
120    messages
121        .iter()
122        .map(|m| {
123            if m.role == "system" {
124                m.clone()
125            } else {
126                UnifiedMessage {
127                    role: m.role.clone(),
128                    content: extract_topic_sentence(&m.content),
129                    parts: None,
130                }
131            }
132        })
133        .collect()
134}
135
136/// Stage 4: Collapse all non-system messages into a single skeleton outline.
137fn skeleton_compress(messages: &[UnifiedMessage]) -> Vec<UnifiedMessage> {
138    let topics: Vec<String> = messages
139        .iter()
140        .filter(|m| m.role != "system")
141        .map(|m| {
142            let topic = extract_topic_sentence(&m.content);
143            format!("[{}] {}", m.role, topic)
144        })
145        .filter(|line| line.len() > 10)
146        .collect();
147
148    if topics.is_empty() {
149        return messages
150            .iter()
151            .filter(|m| m.role == "system")
152            .cloned()
153            .collect();
154    }
155
156    let mut result: Vec<UnifiedMessage> = messages
157        .iter()
158        .filter(|m| m.role == "system")
159        .cloned()
160        .collect();
161    result.push(UnifiedMessage {
162        role: "assistant".into(),
163        content: format!("[Conversation Skeleton]\n{}", topics.join("\n")),
164        parts: None,
165    });
166    result
167}
168
169/// Extract the first sentence (up to 120 chars) from text.
170fn extract_topic_sentence(text: &str) -> String {
171    let end = text
172        .find(". ")
173        .or_else(|| text.find(".\n"))
174        .or_else(|| text.find('?'))
175        .or_else(|| text.find('!'))
176        .map(|i| i + 1)
177        .unwrap_or_else(|| text.len().min(120));
178    text[..end.min(text.len())].trim().to_string()
179}
180
181// ── Complexity levels & context assembly ─────────────────────────────────────
182
183#[derive(Debug, Clone, Copy, PartialEq, Eq)]
184pub enum ComplexityLevel {
185    L0,
186    L1,
187    L2,
188    L3,
189}
190
191pub fn determine_level(complexity_score: f64) -> ComplexityLevel {
192    if complexity_score < 0.3 {
193        ComplexityLevel::L0
194    } else if complexity_score < 0.6 {
195        ComplexityLevel::L1
196    } else if complexity_score < 0.9 {
197        ComplexityLevel::L2
198    } else {
199        ComplexityLevel::L3
200    }
201}
202
203pub fn token_budget(level: ComplexityLevel) -> usize {
204    token_budget_with_config(level, &Default::default())
205}
206
207pub fn token_budget_with_config(
208    level: ComplexityLevel,
209    cfg: &roboticus_core::config::ContextBudgetConfig,
210) -> usize {
211    match level {
212        ComplexityLevel::L0 => cfg.l0,
213        ComplexityLevel::L1 => cfg.l1,
214        ComplexityLevel::L2 => cfg.l2,
215        ComplexityLevel::L3 => cfg.l3,
216    }
217}
218
219#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
220pub struct ContextFootprint {
221    pub token_budget: usize,
222    pub system_prompt_tokens: usize,
223    pub memory_tokens: usize,
224    pub history_tokens: usize,
225    pub history_depth: usize,
226}
227
228/// Rough estimate: ~4 characters per token.
229pub fn estimate_tokens(text: &str) -> usize {
230    text.len().div_ceil(4)
231}
232
233/// Assembles context messages within the token budget for the given complexity level.
234pub fn build_context(
235    level: ComplexityLevel,
236    system_prompt: &str,
237    memories: &str,
238    history: &[UnifiedMessage],
239) -> Vec<UnifiedMessage> {
240    build_context_with_budget(level, system_prompt, memories, history, &Default::default())
241}
242
243/// Like [`build_context`] but uses the supplied budget configuration.
244pub fn build_context_with_budget(
245    level: ComplexityLevel,
246    system_prompt: &str,
247    memories: &str,
248    history: &[UnifiedMessage],
249    budget_cfg: &roboticus_core::config::ContextBudgetConfig,
250) -> Vec<UnifiedMessage> {
251    build_context_with_budget_footprint(level, system_prompt, memories, history, budget_cfg).0
252}
253
254/// Like [`build_context_with_budget`] but also returns token/depth accounting.
255pub fn build_context_with_budget_footprint(
256    level: ComplexityLevel,
257    system_prompt: &str,
258    memories: &str,
259    history: &[UnifiedMessage],
260    budget_cfg: &roboticus_core::config::ContextBudgetConfig,
261) -> (Vec<UnifiedMessage>, ContextFootprint) {
262    let budget = token_budget_with_config(level, budget_cfg);
263    let mut used = 0usize;
264    let mut messages = Vec::new();
265    let mut footprint = ContextFootprint {
266        token_budget: budget,
267        ..ContextFootprint::default()
268    };
269
270    // System prompt is always included — it defines the agent's identity.
271    // If the prompt exceeds the entire budget, truncate it to fit but never
272    // drop it entirely (an agent without identity is worse than one with a
273    // truncated identity).
274    let sys_tokens = estimate_tokens(system_prompt);
275    if sys_tokens <= budget {
276        messages.push(UnifiedMessage {
277            role: "system".into(),
278            content: system_prompt.to_string(),
279            parts: None,
280        });
281        used += sys_tokens;
282        footprint.system_prompt_tokens += sys_tokens;
283    } else {
284        // Truncate the system prompt to roughly fit the budget.  Each token
285        // averages ~4 chars; we leave a small margin for the token estimator's
286        // over/under-count.
287        let max_chars = budget.saturating_mul(4);
288        let truncated: String = system_prompt.chars().take(max_chars).collect();
289        let truncated_tokens = estimate_tokens(&truncated);
290        messages.push(UnifiedMessage {
291            role: "system".into(),
292            content: truncated,
293            parts: None,
294        });
295        used += truncated_tokens;
296        footprint.system_prompt_tokens += truncated_tokens;
297        tracing::warn!(
298            sys_tokens,
299            budget,
300            "system prompt exceeds budget — truncated to fit"
301        );
302    }
303
304    if !memories.is_empty() {
305        let mem_tokens = estimate_tokens(memories);
306        if used + mem_tokens <= budget {
307            messages.push(UnifiedMessage {
308                role: "system".into(),
309                content: memories.to_string(),
310                parts: None,
311            });
312            used += mem_tokens;
313            footprint.memory_tokens += mem_tokens;
314        }
315    }
316
317    let mut history_buf: Vec<&UnifiedMessage> = Vec::new();
318    let mut history_tokens = 0usize;
319
320    for msg in history.iter().rev() {
321        let msg_tokens = estimate_tokens(&msg.content);
322        if used + history_tokens + msg_tokens > budget {
323            break;
324        }
325        history_tokens += msg_tokens;
326        history_buf.push(msg);
327    }
328
329    history_buf.reverse();
330    for msg in history_buf {
331        messages.push(msg.clone());
332        footprint.history_depth += 1;
333    }
334    footprint.history_tokens = history_tokens;
335
336    // Wire pruning path: if assembled context exceeds budget, soft-trim oldest
337    // non-system messages while preserving recency.
338    let prune_cfg = PruningConfig {
339        max_tokens: budget,
340        soft_trim_ratio: 1.0,
341        ..PruningConfig::default()
342    };
343    if needs_pruning(&messages, &prune_cfg) {
344        let trimmed = soft_trim(&messages, &prune_cfg).messages;
345        let footprint = classify_context_snapshot(&trimmed, memories.is_empty());
346        return (trimmed, footprint);
347    }
348
349    (messages, footprint)
350}
351
352/// Classify token allocation from finalized messages.
353///
354/// `memories_empty` should match the memories argument used during initial
355/// assembly. When memories were non-empty, the second system message (if
356/// present) is attributed to `memory_tokens`; all other system messages are
357/// `system_prompt_tokens`.
358pub fn classify_context_snapshot(
359    messages: &[UnifiedMessage],
360    memories_empty: bool,
361) -> ContextFootprint {
362    let mut footprint = ContextFootprint::default();
363    let mut system_seen = 0usize;
364    let memory_slot = if memories_empty { None } else { Some(1usize) };
365
366    for msg in messages {
367        let tokens = estimate_tokens(&msg.content);
368        if msg.role == "system" {
369            let idx = system_seen;
370            system_seen += 1;
371            if Some(idx) == memory_slot {
372                footprint.memory_tokens += tokens;
373            } else {
374                footprint.system_prompt_tokens += tokens;
375            }
376        } else {
377            footprint.history_tokens += tokens;
378            footprint.history_depth += 1;
379        }
380    }
381
382    footprint
383}
384
385/// Inject an instruction anti-fade micro-reminder into the message list.
386///
387/// The OPENDEV paper shows that LLM instruction-following degrades as
388/// conversation length grows — the system prompt fades from the model's
389/// effective attention. This function injects a compact distillation of
390/// key directives just before the final user message when the conversation
391/// exceeds `ANTI_FADE_TURN_THRESHOLD` non-system turns.
392///
393/// The reminder uses the `user` role rather than `system` to maintain
394/// compatibility with backends (e.g. llama.cpp, vLLM) whose chat templates
395/// enforce that system messages appear only at position 0. The `[System Note]`
396/// prefix signals the model to treat the content as an instruction.
397///
398/// Returns `true` if a reminder was injected, `false` otherwise.
399pub fn inject_instruction_reminder(messages: &mut Vec<UnifiedMessage>, reminder: &str) -> bool {
400    let non_system_turns = messages.iter().filter(|m| m.role != "system").count();
401    if non_system_turns < crate::prompt::ANTI_FADE_TURN_THRESHOLD {
402        return false;
403    }
404
405    // Find the last user message and inject the reminder just before it.
406    // This puts the reminder in the "recency hotspot" where it most influences
407    // the model's next generation. Using `user` role avoids violating chat
408    // template constraints that require system messages at position 0.
409    let insert_pos = messages
410        .iter()
411        .rposition(|m| m.role == "user")
412        .unwrap_or(messages.len());
413
414    messages.insert(
415        insert_pos,
416        UnifiedMessage {
417            role: "user".into(),
418            content: format!("[System Note] {reminder}"),
419            parts: None,
420        },
421    );
422    true
423}
424
425#[derive(Debug, Clone)]
426pub struct PruningConfig {
427    pub max_tokens: usize,
428    pub soft_trim_ratio: f64,
429    pub hard_clear_ratio: f64,
430    pub preserve_recent: usize,
431}
432
433impl Default for PruningConfig {
434    fn default() -> Self {
435        Self {
436            max_tokens: 128_000,
437            soft_trim_ratio: 0.8,
438            hard_clear_ratio: 0.95,
439            preserve_recent: 10,
440        }
441    }
442}
443
444#[derive(Debug, Clone)]
445pub struct PruningResult {
446    pub messages: Vec<UnifiedMessage>,
447    pub trimmed_count: usize,
448    pub compaction_summary: Option<String>,
449    pub total_tokens: usize,
450}
451
452pub fn count_tokens(messages: &[UnifiedMessage]) -> usize {
453    messages.iter().map(|m| estimate_tokens(&m.content)).sum()
454}
455
456pub fn needs_pruning(messages: &[UnifiedMessage], config: &PruningConfig) -> bool {
457    let tokens = count_tokens(messages);
458    tokens > ((config.max_tokens as f64 * config.soft_trim_ratio) as usize)
459}
460
461pub fn needs_hard_clear(messages: &[UnifiedMessage], config: &PruningConfig) -> bool {
462    let tokens = count_tokens(messages);
463    tokens > ((config.max_tokens as f64 * config.hard_clear_ratio) as usize)
464}
465
466/// Soft trim: remove oldest non-system messages while preserving the most recent N.
467pub fn soft_trim(messages: &[UnifiedMessage], config: &PruningConfig) -> PruningResult {
468    let target_tokens = (config.max_tokens as f64 * config.soft_trim_ratio) as usize;
469
470    let system_msgs: Vec<_> = messages
471        .iter()
472        .filter(|m| m.role == "system")
473        .cloned()
474        .collect();
475
476    let non_system: Vec<_> = messages
477        .iter()
478        .filter(|m| m.role != "system")
479        .cloned()
480        .collect();
481
482    let preserve_count = config.preserve_recent.min(non_system.len());
483    let preserved = &non_system[non_system.len().saturating_sub(preserve_count)..];
484
485    let mut result: Vec<UnifiedMessage> = system_msgs;
486    let system_tokens = count_tokens(&result);
487
488    let mut available = target_tokens.saturating_sub(system_tokens);
489    let mut kept = Vec::new();
490
491    for msg in preserved.iter().rev() {
492        let msg_tokens = estimate_tokens(&msg.content);
493        if msg_tokens <= available {
494            kept.push(msg.clone());
495            available = available.saturating_sub(msg_tokens);
496        }
497        // Skip individual messages that exceed remaining budget rather
498        // than breaking — older, smaller messages may still fit.
499    }
500    kept.reverse();
501
502    let trimmed_count = non_system.len() - kept.len();
503    result.extend(kept);
504
505    let total_tokens = count_tokens(&result);
506
507    PruningResult {
508        messages: result,
509        trimmed_count,
510        compaction_summary: None,
511        total_tokens,
512    }
513}
514
515/// Extract messages that would be trimmed (for summarization).
516pub fn extract_trimmable(
517    messages: &[UnifiedMessage],
518    config: &PruningConfig,
519) -> Vec<UnifiedMessage> {
520    let non_system: Vec<_> = messages
521        .iter()
522        .filter(|m| m.role != "system")
523        .cloned()
524        .collect();
525
526    let preserve_count = config.preserve_recent.min(non_system.len());
527    let trim_end = non_system.len().saturating_sub(preserve_count);
528
529    non_system[..trim_end].to_vec()
530}
531
532/// Build a summarization prompt from trimmed messages.
533pub fn build_compaction_prompt(trimmed: &[UnifiedMessage]) -> String {
534    let mut prompt = String::from(
535        "Summarize the following conversation history into a concise paragraph. \
536         Capture key facts, decisions, and context. Do not include greetings or filler.\n\n",
537    );
538
539    for msg in trimmed {
540        prompt.push_str(&format!("{}: {}\n", msg.role, msg.content));
541    }
542
543    prompt
544}
545
546/// Compress assembled context messages using the `PromptCompressor`.
547///
548/// System messages (prompt, memories) and older history get compressed.
549/// The most recent user message is preserved intact so the LLM understands
550/// the current query.  Messages under 50 tokens are skipped (not worth it).
551pub fn compress_context(messages: &mut [UnifiedMessage], target_ratio: f64) {
552    use roboticus_llm::compression::PromptCompressor;
553
554    let compressor = PromptCompressor::new(target_ratio);
555
556    // Find the last user message index — preserve it intact
557    let last_user_idx = messages.iter().rposition(|m| m.role == "user");
558
559    for (i, msg) in messages.iter_mut().enumerate() {
560        if Some(i) == last_user_idx {
561            continue; // preserve current query
562        }
563        // Only compress messages with enough content to be worth it (~50 tokens ≈ 200 chars)
564        if msg.content.len() < 200 {
565            continue;
566        }
567        msg.content = compressor.compress(&msg.content);
568    }
569}
570
571/// Insert a compaction summary as a system message after the original system messages.
572pub fn insert_compaction_summary(messages: &mut Vec<UnifiedMessage>, summary: String) {
573    let insert_pos = messages
574        .iter()
575        .position(|m| m.role != "system")
576        .unwrap_or(messages.len());
577
578    messages.insert(
579        insert_pos,
580        UnifiedMessage {
581            role: "system".into(),
582            content: format!("[Conversation Summary] {summary}"),
583            parts: None,
584        },
585    );
586}
587
588#[cfg(test)]
589mod tests {
590    use super::*;
591
592    #[test]
593    fn level_determination() {
594        assert_eq!(determine_level(0.0), ComplexityLevel::L0);
595        assert_eq!(determine_level(0.29), ComplexityLevel::L0);
596        assert_eq!(determine_level(0.3), ComplexityLevel::L1);
597        assert_eq!(determine_level(0.59), ComplexityLevel::L1);
598        assert_eq!(determine_level(0.6), ComplexityLevel::L2);
599        assert_eq!(determine_level(0.89), ComplexityLevel::L2);
600        assert_eq!(determine_level(0.9), ComplexityLevel::L3);
601        assert_eq!(determine_level(1.0), ComplexityLevel::L3);
602    }
603
604    #[test]
605    fn budget_values() {
606        assert_eq!(token_budget(ComplexityLevel::L0), 4_000);
607        assert_eq!(token_budget(ComplexityLevel::L1), 8_000);
608        assert_eq!(token_budget(ComplexityLevel::L2), 16_000);
609        assert_eq!(token_budget(ComplexityLevel::L3), 32_000);
610    }
611
612    #[test]
613    fn context_assembly_respects_budget() {
614        let sys = "You are a helpful agent.";
615        let mem = "User prefers concise answers.";
616        let history = vec![
617            UnifiedMessage {
618                role: "user".into(),
619                content: "Hello".into(),
620                parts: None,
621            },
622            UnifiedMessage {
623                role: "assistant".into(),
624                content: "Hi there!".into(),
625                parts: None,
626            },
627        ];
628
629        let ctx = build_context(ComplexityLevel::L0, sys, mem, &history);
630
631        assert!(!ctx.is_empty());
632        assert_eq!(ctx[0].role, "system");
633        assert_eq!(ctx[0].content, sys);
634
635        let total_chars: usize = ctx.iter().map(|m| m.content.len()).sum();
636        let total_tokens = total_chars.div_ceil(4);
637        assert!(total_tokens <= token_budget(ComplexityLevel::L0));
638    }
639
640    #[test]
641    fn context_truncates_old_history() {
642        let sys = "System prompt";
643        let mem = "";
644        let big_msg = "x".repeat(8000);
645        let history = vec![
646            UnifiedMessage {
647                role: "user".into(),
648                content: big_msg,
649                parts: None,
650            },
651            UnifiedMessage {
652                role: "user".into(),
653                content: "recent message".into(),
654                parts: None,
655            },
656        ];
657
658        let ctx = build_context(ComplexityLevel::L0, sys, mem, &history);
659        assert!(ctx.len() >= 2);
660        assert_eq!(ctx.last().unwrap().content, "recent message");
661    }
662
663    #[test]
664    fn pruning_config_defaults() {
665        let cfg = PruningConfig::default();
666        assert_eq!(cfg.max_tokens, 128_000);
667        assert_eq!(cfg.soft_trim_ratio, 0.8);
668        assert_eq!(cfg.hard_clear_ratio, 0.95);
669        assert_eq!(cfg.preserve_recent, 10);
670    }
671
672    #[test]
673    fn count_tokens_basic() {
674        let msgs = vec![UnifiedMessage {
675            role: "user".into(),
676            content: "hello world".into(),
677            parts: None,
678        }];
679        let tokens = count_tokens(&msgs);
680        assert!(tokens > 0);
681        assert_eq!(tokens, estimate_tokens("hello world"));
682    }
683
684    #[test]
685    fn needs_pruning_under_threshold() {
686        let msgs = vec![UnifiedMessage {
687            role: "user".into(),
688            content: "short".into(),
689            parts: None,
690        }];
691        let cfg = PruningConfig::default();
692        assert!(!needs_pruning(&msgs, &cfg));
693    }
694
695    #[test]
696    fn needs_pruning_over_threshold() {
697        let big = "x".repeat(500_000);
698        let msgs = vec![UnifiedMessage {
699            role: "user".into(),
700            content: big,
701            parts: None,
702        }];
703        let cfg = PruningConfig::default();
704        assert!(needs_pruning(&msgs, &cfg));
705    }
706
707    #[test]
708    fn soft_trim_preserves_recent() {
709        let mut msgs = Vec::new();
710        msgs.push(UnifiedMessage {
711            role: "system".into(),
712            content: "sys".into(),
713            parts: None,
714        });
715        for i in 0..20 {
716            msgs.push(UnifiedMessage {
717                role: if i % 2 == 0 { "user" } else { "assistant" }.into(),
718                content: format!("message {i}"),
719                parts: None,
720            });
721        }
722
723        let cfg = PruningConfig {
724            max_tokens: 200,
725            soft_trim_ratio: 0.8,
726            preserve_recent: 5,
727            ..Default::default()
728        };
729
730        let result = soft_trim(&msgs, &cfg);
731        assert!(result.messages[0].role == "system");
732        assert!(result.trimmed_count > 0);
733        let last = result.messages.last().unwrap();
734        assert_eq!(last.content, "message 19");
735    }
736
737    #[test]
738    fn extract_trimmable_gets_old_messages() {
739        let mut msgs = Vec::new();
740        msgs.push(UnifiedMessage {
741            role: "system".into(),
742            content: "sys".into(),
743            parts: None,
744        });
745        for i in 0..10 {
746            msgs.push(UnifiedMessage {
747                role: "user".into(),
748                content: format!("msg {i}"),
749                parts: None,
750            });
751        }
752
753        let cfg = PruningConfig {
754            preserve_recent: 3,
755            ..Default::default()
756        };
757        let trimmed = extract_trimmable(&msgs, &cfg);
758        assert_eq!(trimmed.len(), 7);
759        assert_eq!(trimmed[0].content, "msg 0");
760    }
761
762    #[test]
763    fn build_compaction_prompt_format() {
764        let msgs = vec![
765            UnifiedMessage {
766                role: "user".into(),
767                content: "hi".into(),
768                parts: None,
769            },
770            UnifiedMessage {
771                role: "assistant".into(),
772                content: "hello".into(),
773                parts: None,
774            },
775        ];
776        let prompt = build_compaction_prompt(&msgs);
777        assert!(prompt.contains("Summarize"));
778        assert!(prompt.contains("user: hi"));
779        assert!(prompt.contains("assistant: hello"));
780    }
781
782    #[test]
783    fn insert_compaction_summary_placement() {
784        let mut msgs = vec![
785            UnifiedMessage {
786                role: "system".into(),
787                content: "sys".into(),
788                parts: None,
789            },
790            UnifiedMessage {
791                role: "user".into(),
792                content: "hi".into(),
793                parts: None,
794            },
795        ];
796        insert_compaction_summary(&mut msgs, "summary here".into());
797        assert_eq!(msgs.len(), 3);
798        assert_eq!(msgs[0].role, "system");
799        assert_eq!(msgs[1].role, "system");
800        assert!(msgs[1].content.contains("summary here"));
801        assert_eq!(msgs[2].role, "user");
802    }
803
804    #[test]
805    fn needs_hard_clear_under_threshold() {
806        let msgs = vec![UnifiedMessage {
807            role: "user".into(),
808            content: "short".into(),
809            parts: None,
810        }];
811        let cfg = PruningConfig::default();
812        assert!(!needs_hard_clear(&msgs, &cfg));
813    }
814
815    #[test]
816    fn needs_hard_clear_over_threshold() {
817        // 128_000 * 0.95 = 121_600 tokens; each char ~0.25 tokens => 486_400 chars
818        let big = "y".repeat(500_000);
819        let msgs = vec![UnifiedMessage {
820            role: "user".into(),
821            content: big,
822            parts: None,
823        }];
824        let cfg = PruningConfig::default();
825        assert!(needs_hard_clear(&msgs, &cfg));
826    }
827
828    #[test]
829    fn insert_compaction_summary_no_system_messages() {
830        // When there are no system messages, summary should be inserted at position 0
831        let mut msgs = vec![
832            UnifiedMessage {
833                role: "user".into(),
834                content: "hello".into(),
835                parts: None,
836            },
837            UnifiedMessage {
838                role: "assistant".into(),
839                content: "hi".into(),
840                parts: None,
841            },
842        ];
843        insert_compaction_summary(&mut msgs, "compacted info".into());
844        assert_eq!(msgs.len(), 3);
845        assert_eq!(msgs[0].role, "system");
846        assert!(msgs[0].content.contains("compacted info"));
847        assert_eq!(msgs[1].role, "user");
848    }
849
850    #[test]
851    fn insert_compaction_summary_all_system_messages() {
852        // When all messages are system messages, summary is appended at the end
853        let mut msgs = vec![
854            UnifiedMessage {
855                role: "system".into(),
856                content: "sys1".into(),
857                parts: None,
858            },
859            UnifiedMessage {
860                role: "system".into(),
861                content: "sys2".into(),
862                parts: None,
863            },
864        ];
865        insert_compaction_summary(&mut msgs, "final summary".into());
866        assert_eq!(msgs.len(), 3);
867        // Insert at position 2 (len since no non-system found)
868        assert_eq!(msgs[2].role, "system");
869        assert!(msgs[2].content.contains("final summary"));
870    }
871
872    #[test]
873    fn build_context_sys_prompt_exceeds_budget() {
874        // System prompt is enormous relative to L0 budget (4000 tokens ~ 16000 chars)
875        let big_sys = "z".repeat(20_000);
876        let mem = "";
877        let history = vec![UnifiedMessage {
878            role: "user".into(),
879            content: "hi".into(),
880            parts: None,
881        }];
882
883        let ctx = build_context(ComplexityLevel::L0, &big_sys, mem, &history);
884        // System prompt is truncated to fit, never dropped entirely.
885        assert!(!ctx.is_empty());
886        assert_eq!(ctx[0].role, "system");
887        // The truncated content must be shorter than the original.
888        assert!(ctx[0].content.len() < big_sys.len());
889        // But still non-empty — agent always gets some identity.
890        assert!(!ctx[0].content.is_empty());
891    }
892
893    #[test]
894    fn build_context_empty_history() {
895        let sys = "Agent prompt";
896        let mem = "Memory info";
897        let history: Vec<UnifiedMessage> = vec![];
898
899        let ctx = build_context(ComplexityLevel::L1, sys, mem, &history);
900        assert_eq!(ctx.len(), 2); // system + memories
901        assert_eq!(ctx[0].content, sys);
902        assert_eq!(ctx[1].content, mem);
903    }
904
905    #[test]
906    fn build_context_returns_footprint_with_expected_split() {
907        let sys = "system prompt";
908        let mem = "memory block";
909        let history = vec![
910            UnifiedMessage {
911                role: "user".into(),
912                content: "hello".into(),
913                parts: None,
914            },
915            UnifiedMessage {
916                role: "assistant".into(),
917                content: "world".into(),
918                parts: None,
919            },
920        ];
921
922        let (ctx, fp) = build_context_with_budget_footprint(
923            ComplexityLevel::L1,
924            sys,
925            mem,
926            &history,
927            &Default::default(),
928        );
929
930        assert_eq!(ctx.len(), 4);
931        assert_eq!(fp.token_budget, token_budget(ComplexityLevel::L1));
932        assert_eq!(fp.system_prompt_tokens, estimate_tokens(sys));
933        assert_eq!(fp.memory_tokens, estimate_tokens(mem));
934        assert_eq!(
935            fp.history_tokens,
936            estimate_tokens("hello") + estimate_tokens("world")
937        );
938        assert_eq!(fp.history_depth, 2);
939
940        let classified = classify_context_snapshot(&ctx, false);
941        assert_eq!(classified.system_prompt_tokens, fp.system_prompt_tokens);
942        assert_eq!(classified.memory_tokens, fp.memory_tokens);
943        assert_eq!(classified.history_tokens, fp.history_tokens);
944        assert_eq!(classified.history_depth, fp.history_depth);
945    }
946
947    #[test]
948    fn soft_trim_no_non_system_messages() {
949        let msgs = vec![UnifiedMessage {
950            role: "system".into(),
951            content: "sys".into(),
952            parts: None,
953        }];
954        let cfg = PruningConfig {
955            max_tokens: 200,
956            preserve_recent: 5,
957            ..Default::default()
958        };
959        let result = soft_trim(&msgs, &cfg);
960        assert_eq!(result.messages.len(), 1);
961        assert_eq!(result.trimmed_count, 0);
962    }
963
964    #[test]
965    fn extract_trimmable_fewer_than_preserve() {
966        let msgs = vec![UnifiedMessage {
967            role: "user".into(),
968            content: "only one".into(),
969            parts: None,
970        }];
971        let cfg = PruningConfig {
972            preserve_recent: 5,
973            ..Default::default()
974        };
975        let trimmed = extract_trimmable(&msgs, &cfg);
976        assert!(
977            trimmed.is_empty(),
978            "nothing to trim if fewer than preserve_recent"
979        );
980    }
981
982    #[test]
983    fn count_tokens_empty() {
984        assert_eq!(count_tokens(&[]), 0);
985    }
986
987    // ── CompactionStage tests ──────────────────────────────────────────
988
989    #[test]
990    fn compaction_stage_from_excess_boundaries() {
991        assert_eq!(CompactionStage::from_excess(0.5), CompactionStage::Verbatim);
992        assert_eq!(CompactionStage::from_excess(1.0), CompactionStage::Verbatim);
993        assert_eq!(
994            CompactionStage::from_excess(1.01),
995            CompactionStage::SelectiveTrim
996        );
997        assert_eq!(
998            CompactionStage::from_excess(1.5),
999            CompactionStage::SelectiveTrim
1000        );
1001        assert_eq!(
1002            CompactionStage::from_excess(1.51),
1003            CompactionStage::SemanticCompress
1004        );
1005        assert_eq!(
1006            CompactionStage::from_excess(2.5),
1007            CompactionStage::SemanticCompress
1008        );
1009        assert_eq!(
1010            CompactionStage::from_excess(2.51),
1011            CompactionStage::TopicExtract
1012        );
1013        assert_eq!(
1014            CompactionStage::from_excess(4.0),
1015            CompactionStage::TopicExtract
1016        );
1017        assert_eq!(
1018            CompactionStage::from_excess(4.01),
1019            CompactionStage::Skeleton
1020        );
1021        assert_eq!(
1022            CompactionStage::from_excess(100.0),
1023            CompactionStage::Skeleton
1024        );
1025    }
1026
1027    #[test]
1028    fn compaction_stage_ordering() {
1029        assert!(CompactionStage::Verbatim < CompactionStage::SelectiveTrim);
1030        assert!(CompactionStage::SelectiveTrim < CompactionStage::SemanticCompress);
1031        assert!(CompactionStage::SemanticCompress < CompactionStage::TopicExtract);
1032        assert!(CompactionStage::TopicExtract < CompactionStage::Skeleton);
1033    }
1034
1035    #[test]
1036    fn selective_trim_removes_filler() {
1037        let msgs = vec![
1038            UnifiedMessage {
1039                role: "system".into(),
1040                content: "sys prompt".into(),
1041                parts: None,
1042            },
1043            UnifiedMessage {
1044                role: "user".into(),
1045                content: "hello".into(),
1046                parts: None,
1047            },
1048            UnifiedMessage {
1049                role: "assistant".into(),
1050                content: "ok".into(),
1051                parts: None,
1052            },
1053            UnifiedMessage {
1054                role: "user".into(),
1055                content: "Please analyze the data and find anomalies in the revenue stream".into(),
1056                parts: None,
1057            },
1058            UnifiedMessage {
1059                role: "assistant".into(),
1060                content: "thanks".into(),
1061                parts: None,
1062            },
1063        ];
1064        let result = selective_trim(&msgs);
1065        // System message always kept, substantive user message kept (>=40 chars),
1066        // filler "hello", "ok", "thanks" dropped
1067        assert_eq!(result.len(), 2);
1068        assert_eq!(result[0].role, "system");
1069        assert!(result[1].content.contains("analyze the data"));
1070    }
1071
1072    #[test]
1073    fn selective_trim_keeps_all_long_messages() {
1074        let msgs = vec![
1075            UnifiedMessage {
1076                role: "user".into(),
1077                content: "This is a long enough message that should never be trimmed away".into(),
1078                parts: None,
1079            },
1080            UnifiedMessage {
1081                role: "assistant".into(),
1082                content: "I agree, this response is also long enough to stay around".into(),
1083                parts: None,
1084            },
1085        ];
1086        let result = selective_trim(&msgs);
1087        assert_eq!(result.len(), 2);
1088    }
1089
1090    #[test]
1091    fn topic_extract_takes_first_sentence() {
1092        let msgs = vec![
1093            UnifiedMessage {
1094                role: "system".into(),
1095                content: "You are helpful.".into(),
1096                parts: None,
1097            },
1098            UnifiedMessage {
1099                role: "user".into(),
1100                content:
1101                    "Deploy the model to production. Then run the test suite. Finally update docs."
1102                        .into(),
1103                parts: None,
1104            },
1105        ];
1106        let result = topic_extract(&msgs);
1107        assert_eq!(result.len(), 2);
1108        assert_eq!(result[0].content, "You are helpful."); // system preserved
1109        assert_eq!(result[1].content, "Deploy the model to production."); // first sentence
1110    }
1111
1112    #[test]
1113    fn skeleton_compress_creates_outline() {
1114        let msgs = vec![
1115            UnifiedMessage {
1116                role: "system".into(),
1117                content: "System prompt".into(),
1118                parts: None,
1119            },
1120            UnifiedMessage {
1121                role: "user".into(),
1122                content: "How does authentication work in this app?".into(),
1123                parts: None,
1124            },
1125            UnifiedMessage {
1126                role: "assistant".into(),
1127                content: "Authentication uses JWT tokens with a 24-hour expiry. The flow starts at the login endpoint.".into(),
1128                parts: None,
1129            },
1130        ];
1131        let result = skeleton_compress(&msgs);
1132        // System message preserved + one skeleton assistant message
1133        assert_eq!(result.len(), 2);
1134        assert_eq!(result[0].content, "System prompt");
1135        assert_eq!(result[1].role, "assistant");
1136        assert!(result[1].content.contains("[Conversation Skeleton]"));
1137        assert!(result[1].content.contains("[user]"));
1138        assert!(result[1].content.contains("[assistant]"));
1139    }
1140
1141    #[test]
1142    fn skeleton_compress_empty_non_system() {
1143        let msgs = vec![UnifiedMessage {
1144            role: "system".into(),
1145            content: "sys".into(),
1146            parts: None,
1147        }];
1148        let result = skeleton_compress(&msgs);
1149        assert_eq!(result.len(), 1);
1150        assert_eq!(result[0].role, "system");
1151    }
1152
1153    #[test]
1154    fn compact_to_stage_verbatim_is_identity() {
1155        let msgs = vec![
1156            UnifiedMessage {
1157                role: "user".into(),
1158                content: "test".into(),
1159                parts: None,
1160            },
1161            UnifiedMessage {
1162                role: "assistant".into(),
1163                content: "resp".into(),
1164                parts: None,
1165            },
1166        ];
1167        let result = compact_to_stage(&msgs, CompactionStage::Verbatim);
1168        assert_eq!(result.len(), msgs.len());
1169        assert_eq!(result[0].content, "test");
1170        assert_eq!(result[1].content, "resp");
1171    }
1172
1173    #[test]
1174    fn compact_to_stage_dispatches_correctly() {
1175        let msgs = vec![
1176            UnifiedMessage {
1177                role: "user".into(),
1178                content: "hi".into(),
1179                parts: None,
1180            },
1181            UnifiedMessage {
1182                role: "user".into(),
1183                content: "Analyze the market data and identify trends in revenue growth over Q3"
1184                    .into(),
1185                parts: None,
1186            },
1187        ];
1188        // SelectiveTrim should remove the "hi" filler
1189        let trimmed = compact_to_stage(&msgs, CompactionStage::SelectiveTrim);
1190        assert_eq!(trimmed.len(), 1);
1191        assert!(trimmed[0].content.contains("Analyze"));
1192    }
1193
1194    #[test]
1195    fn extract_topic_sentence_with_period() {
1196        assert_eq!(
1197            extract_topic_sentence("First sentence. Second sentence. Third."),
1198            "First sentence."
1199        );
1200    }
1201
1202    #[test]
1203    fn extract_topic_sentence_with_question() {
1204        assert_eq!(
1205            extract_topic_sentence("What is this? More details here."),
1206            "What is this?"
1207        );
1208    }
1209
1210    #[test]
1211    fn extract_topic_sentence_no_punctuation() {
1212        let short = "Just some text without ending";
1213        assert_eq!(extract_topic_sentence(short), short);
1214    }
1215
1216    #[test]
1217    fn extract_topic_sentence_very_long() {
1218        let long = "x".repeat(200);
1219        let result = extract_topic_sentence(&long);
1220        assert!(result.len() <= 120);
1221    }
1222
1223    // ── Anti-fade injection tests ───────────────────────────────────────
1224
1225    fn make_msg(role: &str, content: &str) -> UnifiedMessage {
1226        UnifiedMessage {
1227            role: role.into(),
1228            content: content.into(),
1229            parts: None,
1230        }
1231    }
1232
1233    #[test]
1234    fn inject_reminder_skips_short_conversations() {
1235        let mut msgs = vec![
1236            make_msg("system", "You are helpful."),
1237            make_msg("user", "Hello"),
1238            make_msg("assistant", "Hi!"),
1239            make_msg("user", "How are you?"),
1240            make_msg("assistant", "Good, thanks!"),
1241        ];
1242        // Only 4 non-system turns, below threshold of 8
1243        let injected = inject_instruction_reminder(&mut msgs, "[Reminder] Be helpful.");
1244        assert!(!injected);
1245        assert_eq!(msgs.len(), 5);
1246    }
1247
1248    #[test]
1249    fn inject_reminder_fires_for_long_conversations() {
1250        let mut msgs = vec![make_msg("system", "You are helpful.")];
1251        // Add 10 user/assistant pairs (20 non-system turns)
1252        for i in 0..10 {
1253            msgs.push(make_msg("user", &format!("question {i}")));
1254            msgs.push(make_msg("assistant", &format!("answer {i}")));
1255        }
1256        let len_before = msgs.len();
1257        let injected = inject_instruction_reminder(&mut msgs, "[Reminder] Always be thorough.");
1258        assert!(injected);
1259        assert_eq!(msgs.len(), len_before + 1);
1260
1261        // The reminder should be inserted just before the last user message,
1262        // using "user" role with [System Note] prefix for backend compatibility.
1263        let reminder_idx = msgs
1264            .iter()
1265            .rposition(|m| m.content.contains("[System Note]"))
1266            .unwrap();
1267        assert_eq!(msgs[reminder_idx].role, "user");
1268        assert!(
1269            msgs[reminder_idx]
1270                .content
1271                .contains("[Reminder] Always be thorough.")
1272        );
1273    }
1274
1275    #[test]
1276    fn inject_reminder_places_before_last_user_message() {
1277        let mut msgs = vec![make_msg("system", "System prompt.")];
1278        for i in 0..5 {
1279            msgs.push(make_msg("user", &format!("q{i}")));
1280            msgs.push(make_msg("assistant", &format!("a{i}")));
1281        }
1282        // Final user message
1283        msgs.push(make_msg("user", "final question"));
1284
1285        let injected = inject_instruction_reminder(&mut msgs, "[Reminder] Key directive.");
1286        assert!(injected);
1287
1288        // Last message should still be the user's final question
1289        assert_eq!(msgs.last().unwrap().content, "final question");
1290        assert_eq!(msgs.last().unwrap().role, "user");
1291
1292        // Second-to-last should be the reminder (user role with [System Note] prefix)
1293        let second_last = &msgs[msgs.len() - 2];
1294        assert_eq!(second_last.role, "user");
1295        assert!(second_last.content.contains("[System Note]"));
1296        assert!(second_last.content.contains("[Reminder]"));
1297    }
1298
1299    #[test]
1300    fn inject_reminder_no_user_messages_appends_at_end() {
1301        let mut msgs = vec![make_msg("system", "System prompt.")];
1302        // Add only assistant messages (unusual but tests edge case)
1303        for i in 0..10 {
1304            msgs.push(make_msg("assistant", &format!("response {i}")));
1305        }
1306        let len_before = msgs.len();
1307        let injected = inject_instruction_reminder(&mut msgs, "[Reminder] Test.");
1308        assert!(injected);
1309        // When no user message found, inserts at the end
1310        assert_eq!(msgs.len(), len_before + 1);
1311        assert_eq!(
1312            msgs.last().unwrap().content,
1313            "[System Note] [Reminder] Test."
1314        );
1315        assert_eq!(msgs.last().unwrap().role, "user");
1316    }
1317}