agent_sdk/context/config.rs
1//! Configuration for context compaction.
2
3use serde::{Deserialize, Serialize};
4
5/// Default cap on the estimated tokens retained in the post-compaction tail.
6///
7/// This bounds `retain_recent`: even when `retain_recent` asks to keep many
8/// messages, the retained tail is trimmed so its estimated token count stays
9/// at or near this value (it is a soft cap — an indivisible tool pair may push
10/// the tail slightly over).
11pub const DEFAULT_MAX_RETAINED_TAIL_TOKENS: usize = 20_000;
12
13/// Default `max_tokens` budget for the LLM summarization call.
14///
15/// The summarization prompt asks for 500-1000 words, which can exceed the old
16/// hardcoded 2000-token ceiling for dense technical content. This default
17/// leaves headroom; truncation (a `MaxTokens` stop reason) is still detected
18/// and retried.
19pub const DEFAULT_SUMMARY_MAX_TOKENS: usize = 4_096;
20
21const fn default_max_retained_tail_tokens() -> usize {
22 DEFAULT_MAX_RETAINED_TAIL_TOKENS
23}
24
25const fn default_summary_max_tokens() -> usize {
26 DEFAULT_SUMMARY_MAX_TOKENS
27}
28
29/// Configuration for context compaction.
30///
31/// Controls when and how context compaction occurs.
32///
33/// # Example
34///
35/// ```
36/// use agent_sdk::context::CompactionConfig;
37///
38/// let config = CompactionConfig::default()
39/// .with_threshold_tokens(100_000)
40/// .with_retain_recent(20);
41/// ```
42#[derive(Clone, Debug, Serialize, Deserialize)]
43pub struct CompactionConfig {
44 /// Token threshold to trigger compaction.
45 /// When estimated tokens exceed this, compaction is triggered.
46 /// Default: 80,000 (conservative for 128K context models)
47 pub threshold_tokens: usize,
48
49 /// Number of recent messages to keep intact (not summarized).
50 /// These messages remain in full to preserve immediate context.
51 ///
52 /// The retained tail is additionally bounded by
53 /// [`max_retained_tail_tokens`](Self::max_retained_tail_tokens): if keeping
54 /// `retain_recent` messages would exceed that token budget, the oldest of
55 /// those messages are summarized instead so the tail stays within the cap.
56 /// Default: 10
57 pub retain_recent: usize,
58
59 /// Minimum messages before compaction is considered.
60 /// Prevents compaction when conversation is still short.
61 /// Default: 20
62 pub min_messages_for_compaction: usize,
63
64 /// Whether to automatically compact when threshold is reached.
65 /// If false, compaction only occurs on explicit request.
66 /// Default: true
67 pub auto_compact: bool,
68
69 /// Soft cap on the estimated tokens kept in the retained tail.
70 ///
71 /// Bounds [`retain_recent`](Self::retain_recent): the most recent messages
72 /// are kept only until this token budget is reached, after which older
73 /// messages are folded into the summary instead. The cap is soft because an
74 /// indivisible `tool_use`/`tool_result` pair may push the tail slightly
75 /// over. Raise it on large-context models, or lower it for more aggressive
76 /// compaction.
77 /// Default: 20,000
78 #[serde(default = "default_max_retained_tail_tokens")]
79 pub max_retained_tail_tokens: usize,
80
81 /// `max_tokens` budget for the LLM summarization call.
82 ///
83 /// If the summarizer hits this ceiling (a `MaxTokens` stop reason), the
84 /// compactor logs a warning and retries once with a larger budget before
85 /// marking the summary as truncated.
86 /// Default: 4,096
87 #[serde(default = "default_summary_max_tokens")]
88 pub summary_max_tokens: usize,
89}
90
91impl Default for CompactionConfig {
92 fn default() -> Self {
93 Self {
94 threshold_tokens: 80_000,
95 retain_recent: 10,
96 min_messages_for_compaction: 20,
97 auto_compact: true,
98 max_retained_tail_tokens: DEFAULT_MAX_RETAINED_TAIL_TOKENS,
99 summary_max_tokens: DEFAULT_SUMMARY_MAX_TOKENS,
100 }
101 }
102}
103
104impl CompactionConfig {
105 /// Create a new configuration with default values.
106 #[must_use]
107 pub fn new() -> Self {
108 Self::default()
109 }
110
111 /// Set the token threshold for compaction.
112 #[must_use]
113 pub const fn with_threshold_tokens(mut self, threshold: usize) -> Self {
114 self.threshold_tokens = threshold;
115 self
116 }
117
118 /// Set the number of recent messages to retain.
119 #[must_use]
120 pub const fn with_retain_recent(mut self, count: usize) -> Self {
121 self.retain_recent = count;
122 self
123 }
124
125 /// Set the minimum messages for compaction.
126 #[must_use]
127 pub const fn with_min_messages(mut self, count: usize) -> Self {
128 self.min_messages_for_compaction = count;
129 self
130 }
131
132 /// Set whether to auto-compact.
133 #[must_use]
134 pub const fn with_auto_compact(mut self, auto: bool) -> Self {
135 self.auto_compact = auto;
136 self
137 }
138
139 /// Set the soft cap on tokens kept in the retained tail.
140 ///
141 /// Bounds [`retain_recent`](Self::retain_recent). See the field docs for
142 /// the precise semantics.
143 #[must_use]
144 pub const fn with_max_retained_tail_tokens(mut self, tokens: usize) -> Self {
145 self.max_retained_tail_tokens = tokens;
146 self
147 }
148
149 /// Set the `max_tokens` budget for the summarization call.
150 #[must_use]
151 pub const fn with_summary_max_tokens(mut self, tokens: usize) -> Self {
152 self.summary_max_tokens = tokens;
153 self
154 }
155}
156
157#[cfg(test)]
158mod tests {
159 use super::*;
160
161 #[test]
162 fn test_default_config() {
163 let config = CompactionConfig::default();
164 assert_eq!(config.threshold_tokens, 80_000);
165 assert_eq!(config.retain_recent, 10);
166 assert_eq!(config.min_messages_for_compaction, 20);
167 assert!(config.auto_compact);
168 assert_eq!(
169 config.max_retained_tail_tokens,
170 DEFAULT_MAX_RETAINED_TAIL_TOKENS
171 );
172 assert_eq!(config.summary_max_tokens, DEFAULT_SUMMARY_MAX_TOKENS);
173 }
174
175 #[test]
176 fn test_builder_pattern() {
177 let config = CompactionConfig::new()
178 .with_threshold_tokens(50_000)
179 .with_retain_recent(5)
180 .with_min_messages(10)
181 .with_auto_compact(false)
182 .with_max_retained_tail_tokens(40_000)
183 .with_summary_max_tokens(8_000);
184
185 assert_eq!(config.threshold_tokens, 50_000);
186 assert_eq!(config.retain_recent, 5);
187 assert_eq!(config.min_messages_for_compaction, 10);
188 assert!(!config.auto_compact);
189 assert_eq!(config.max_retained_tail_tokens, 40_000);
190 assert_eq!(config.summary_max_tokens, 8_000);
191 }
192
193 #[test]
194 fn test_deserialize_without_new_fields_uses_defaults() -> anyhow::Result<()> {
195 // Configs serialized before the new knobs existed must still
196 // deserialize, falling back to the documented defaults.
197 let json = r#"{
198 "threshold_tokens": 1234,
199 "retain_recent": 7,
200 "min_messages_for_compaction": 3,
201 "auto_compact": true
202 }"#;
203
204 let config: CompactionConfig = serde_json::from_str(json)?;
205
206 assert_eq!(config.threshold_tokens, 1234);
207 assert_eq!(config.retain_recent, 7);
208 assert_eq!(
209 config.max_retained_tail_tokens,
210 DEFAULT_MAX_RETAINED_TAIL_TOKENS
211 );
212 assert_eq!(config.summary_max_tokens, DEFAULT_SUMMARY_MAX_TOKENS);
213
214 Ok(())
215 }
216}