Skip to main content

agent_sdk/context/
config.rs

1//! Configuration for context compaction.
2
3use serde::{Deserialize, Serialize};
4
5/// Default cap on the estimated tokens retained in the post-compaction tail.
6///
7/// This bounds `retain_recent`: even when `retain_recent` asks to keep many
8/// messages, the retained tail is trimmed so its estimated token count stays
9/// at or near this value (it is a soft cap — an indivisible tool pair may push
10/// the tail slightly over).
11pub const DEFAULT_MAX_RETAINED_TAIL_TOKENS: usize = 20_000;
12
13/// Default `max_tokens` budget for the LLM summarization call.
14///
15/// The summarization prompt asks for 500-1000 words, which can exceed the old
16/// hardcoded 2000-token ceiling for dense technical content. This default
17/// leaves headroom; truncation (a `MaxTokens` stop reason) is still detected
18/// and retried.
19pub const DEFAULT_SUMMARY_MAX_TOKENS: usize = 4_096;
20
21const fn default_max_retained_tail_tokens() -> usize {
22    DEFAULT_MAX_RETAINED_TAIL_TOKENS
23}
24
25const fn default_summary_max_tokens() -> usize {
26    DEFAULT_SUMMARY_MAX_TOKENS
27}
28
29/// Configuration for context compaction.
30///
31/// Controls when and how context compaction occurs.
32///
33/// # Example
34///
35/// ```
36/// use agent_sdk::context::CompactionConfig;
37///
38/// let config = CompactionConfig::default()
39///     .with_threshold_tokens(100_000)
40///     .with_retain_recent(20);
41/// ```
42#[derive(Clone, Debug, Serialize, Deserialize)]
43pub struct CompactionConfig {
44    /// Token threshold to trigger compaction.
45    /// When estimated tokens exceed this, compaction is triggered.
46    /// Default: 80,000 (conservative for 128K context models)
47    pub threshold_tokens: usize,
48
49    /// Number of recent messages to keep intact (not summarized).
50    /// These messages remain in full to preserve immediate context.
51    ///
52    /// The retained tail is additionally bounded by
53    /// [`max_retained_tail_tokens`](Self::max_retained_tail_tokens): if keeping
54    /// `retain_recent` messages would exceed that token budget, the oldest of
55    /// those messages are summarized instead so the tail stays within the cap.
56    /// Default: 10
57    pub retain_recent: usize,
58
59    /// Minimum messages before compaction is considered.
60    /// Prevents compaction when conversation is still short.
61    /// Default: 20
62    pub min_messages_for_compaction: usize,
63
64    /// Whether to automatically compact when threshold is reached.
65    /// If false, compaction only occurs on explicit request.
66    /// Default: true
67    pub auto_compact: bool,
68
69    /// Soft cap on the estimated tokens kept in the retained tail.
70    ///
71    /// Bounds [`retain_recent`](Self::retain_recent): the most recent messages
72    /// are kept only until this token budget is reached, after which older
73    /// messages are folded into the summary instead. The cap is soft because an
74    /// indivisible `tool_use`/`tool_result` pair may push the tail slightly
75    /// over. Raise it on large-context models, or lower it for more aggressive
76    /// compaction.
77    /// Default: 20,000
78    #[serde(default = "default_max_retained_tail_tokens")]
79    pub max_retained_tail_tokens: usize,
80
81    /// `max_tokens` budget for the LLM summarization call.
82    ///
83    /// If the summarizer hits this ceiling (a `MaxTokens` stop reason), the
84    /// compactor logs a warning and retries once with a larger budget before
85    /// marking the summary as truncated.
86    /// Default: 4,096
87    #[serde(default = "default_summary_max_tokens")]
88    pub summary_max_tokens: usize,
89}
90
91impl Default for CompactionConfig {
92    fn default() -> Self {
93        Self {
94            threshold_tokens: 80_000,
95            retain_recent: 10,
96            min_messages_for_compaction: 20,
97            auto_compact: true,
98            max_retained_tail_tokens: DEFAULT_MAX_RETAINED_TAIL_TOKENS,
99            summary_max_tokens: DEFAULT_SUMMARY_MAX_TOKENS,
100        }
101    }
102}
103
104impl CompactionConfig {
105    /// Create a new configuration with default values.
106    #[must_use]
107    pub fn new() -> Self {
108        Self::default()
109    }
110
111    /// Set the token threshold for compaction.
112    #[must_use]
113    pub const fn with_threshold_tokens(mut self, threshold: usize) -> Self {
114        self.threshold_tokens = threshold;
115        self
116    }
117
118    /// Set the number of recent messages to retain.
119    #[must_use]
120    pub const fn with_retain_recent(mut self, count: usize) -> Self {
121        self.retain_recent = count;
122        self
123    }
124
125    /// Set the minimum messages for compaction.
126    #[must_use]
127    pub const fn with_min_messages(mut self, count: usize) -> Self {
128        self.min_messages_for_compaction = count;
129        self
130    }
131
132    /// Set whether to auto-compact.
133    #[must_use]
134    pub const fn with_auto_compact(mut self, auto: bool) -> Self {
135        self.auto_compact = auto;
136        self
137    }
138
139    /// Set the soft cap on tokens kept in the retained tail.
140    ///
141    /// Bounds [`retain_recent`](Self::retain_recent). See the field docs for
142    /// the precise semantics.
143    #[must_use]
144    pub const fn with_max_retained_tail_tokens(mut self, tokens: usize) -> Self {
145        self.max_retained_tail_tokens = tokens;
146        self
147    }
148
149    /// Set the `max_tokens` budget for the summarization call.
150    #[must_use]
151    pub const fn with_summary_max_tokens(mut self, tokens: usize) -> Self {
152        self.summary_max_tokens = tokens;
153        self
154    }
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160
161    #[test]
162    fn test_default_config() {
163        let config = CompactionConfig::default();
164        assert_eq!(config.threshold_tokens, 80_000);
165        assert_eq!(config.retain_recent, 10);
166        assert_eq!(config.min_messages_for_compaction, 20);
167        assert!(config.auto_compact);
168        assert_eq!(
169            config.max_retained_tail_tokens,
170            DEFAULT_MAX_RETAINED_TAIL_TOKENS
171        );
172        assert_eq!(config.summary_max_tokens, DEFAULT_SUMMARY_MAX_TOKENS);
173    }
174
175    #[test]
176    fn test_builder_pattern() {
177        let config = CompactionConfig::new()
178            .with_threshold_tokens(50_000)
179            .with_retain_recent(5)
180            .with_min_messages(10)
181            .with_auto_compact(false)
182            .with_max_retained_tail_tokens(40_000)
183            .with_summary_max_tokens(8_000);
184
185        assert_eq!(config.threshold_tokens, 50_000);
186        assert_eq!(config.retain_recent, 5);
187        assert_eq!(config.min_messages_for_compaction, 10);
188        assert!(!config.auto_compact);
189        assert_eq!(config.max_retained_tail_tokens, 40_000);
190        assert_eq!(config.summary_max_tokens, 8_000);
191    }
192
193    #[test]
194    fn test_deserialize_without_new_fields_uses_defaults() -> anyhow::Result<()> {
195        // Configs serialized before the new knobs existed must still
196        // deserialize, falling back to the documented defaults.
197        let json = r#"{
198            "threshold_tokens": 1234,
199            "retain_recent": 7,
200            "min_messages_for_compaction": 3,
201            "auto_compact": true
202        }"#;
203
204        let config: CompactionConfig = serde_json::from_str(json)?;
205
206        assert_eq!(config.threshold_tokens, 1234);
207        assert_eq!(config.retain_recent, 7);
208        assert_eq!(
209            config.max_retained_tail_tokens,
210            DEFAULT_MAX_RETAINED_TAIL_TOKENS
211        );
212        assert_eq!(config.summary_max_tokens, DEFAULT_SUMMARY_MAX_TOKENS);
213
214        Ok(())
215    }
216}