ds_api/conversation/summarizer.rs
1//! Conversation summarizer trait and built-in implementations.
2//!
3//! The [`AUTO_SUMMARY_TAG`][crate::raw::request::message::AUTO_SUMMARY_TAG] constant
4//! in [`Message`][crate::raw::request::message::Message] defines the single source of
5//! truth for identifying auto-generated summary messages.
6//!
7//! # Trait
8//!
9//! [`Summarizer`] is an async trait with two methods:
10//! - [`should_summarize`][Summarizer::should_summarize] — synchronous check on the current history.
11//! - [`summarize`][Summarizer::summarize] — async, may perform an API call; mutates history in-place.
12//!
13//! # Built-in implementations
14//!
15//! | Type | Strategy |
16//! |---|---|
17//! | [`LlmSummarizer`] | Calls DeepSeek to produce a semantic summary; **default** for `DeepseekAgent`. |
18//! | [`SlidingWindowSummarizer`] | Keeps the last N messages and silently drops the rest; no API call. |
19
20use std::pin::Pin;
21
22use futures::Future;
23
24use crate::api::{ApiClient, ApiRequest};
25use crate::error::ApiError;
26use crate::raw::request::message::{Message, Role};
27
28// ── Trait ────────────────────────────────────────────────────────────────────
29
30/// Decides when and how to compress conversation history.
31///
32/// Both methods receive an immutable or mutable slice of the current history.
33/// Implementors are free to count tokens, count turns, check wall-clock time,
34/// or use any other heuristic.
35///
36/// The trait is object-safe via `BoxFuture`; you can store it as
37/// `Box<dyn Summarizer>` without `async_trait`.
38///
39/// # Implementing a custom summarizer
40///
41/// ```no_run
42/// use std::pin::Pin;
43/// use ds_api::conversation::Summarizer;
44/// use ds_api::error::ApiError;
45/// use ds_api::raw::request::message::Message;
46///
47/// /// Drops all history older than `max_turns` turns. No API call needed.
48/// struct TurnLimitSummarizer { max_turns: usize }
49///
50/// impl Summarizer for TurnLimitSummarizer {
51/// fn should_summarize(&self, history: &[Message]) -> bool {
52/// history.len() > self.max_turns
53/// }
54///
55/// fn summarize<'a>(
56/// &'a self,
57/// history: &'a mut Vec<Message>,
58/// ) -> Pin<Box<dyn std::future::Future<Output = Result<(), ApiError>> + Send + 'a>> {
59/// Box::pin(async move {
60/// if history.len() > self.max_turns {
61/// let drop_count = history.len() - self.max_turns;
62/// history.drain(0..drop_count);
63/// }
64/// Ok(())
65/// })
66/// }
67/// }
68///
69/// // Use it with an agent:
70/// use ds_api::DeepseekAgent;
71/// let agent = DeepseekAgent::new("sk-...")
72/// .with_summarizer(TurnLimitSummarizer { max_turns: 20 });
73/// ```
74pub trait Summarizer: Send + Sync {
75 /// Return `true` if the history should be summarized before the next API turn.
76 ///
77 /// This is called synchronously on every user-input push; keep it cheap.
78 fn should_summarize(&self, history: &[Message]) -> bool;
79
80 /// Compress `history` in-place, returning an error only for unrecoverable failures.
81 ///
82 /// On success the history must be shorter (or at most the same length) than before.
83 /// Implementations must **not** remove messages whose role is [`Role::System`] and
84 /// whose `name` field is not `Some("[auto-summary]")` — those are user-provided
85 /// system prompts and must be preserved.
86 fn summarize<'a>(
87 &'a self,
88 history: &'a mut Vec<Message>,
89 ) -> Pin<Box<dyn Future<Output = Result<(), ApiError>> + Send + 'a>>;
90}
91
92// ── Helpers ───────────────────────────────────────────────────────────────────
93
94/// Estimate the token count of a slice of messages using a fast character heuristic.
95///
96/// ASCII characters count as 1 char ≈ 0.25 tokens; CJK / multibyte characters are
97/// counted as 4 chars ≈ 1 token. System messages whose `name` is `[auto-summary]`
98/// are included in the estimate; other system messages (user-provided prompts) are
99/// excluded because they are permanent and we cannot remove them anyway.
100pub(crate) fn estimate_tokens(history: &[Message]) -> usize {
101 history
102 .iter()
103 .filter(|m| {
104 // Always exclude permanent system prompts from the token estimate;
105 // we can't remove them so counting them would trigger summarization
106 // that can never actually free those tokens.
107 if matches!(m.role, Role::System) {
108 // auto-summary placeholders are replaceable → count them
109 m.is_auto_summary()
110 } else {
111 true
112 }
113 })
114 .filter_map(|m| m.content.as_deref())
115 .map(|s| {
116 s.chars()
117 .map(|c| if c.is_ascii() { 1usize } else { 4 })
118 .sum::<usize>()
119 })
120 .sum::<usize>()
121 / 4
122}
123
124/// Partition `history` into (system_prompts, rest), where system prompts are
125/// permanent user-provided system messages (role=System, name≠"[auto-summary]").
126///
127/// Returns the indices of permanent system messages so callers can re-inject
128/// them after compressing the rest.
129fn extract_system_prompts(history: &mut Vec<Message>) -> Vec<Message> {
130 let mut prompts = Vec::new();
131 let mut i = 0;
132 while i < history.len() {
133 let m = &history[i];
134 let is_permanent_system = matches!(m.role, Role::System) && !m.is_auto_summary();
135 if is_permanent_system {
136 prompts.push(history.remove(i));
137 // don't increment i — the next element shifted into position i
138 } else {
139 i += 1;
140 }
141 }
142 prompts
143}
144
145// ── LlmSummarizer ─────────────────────────────────────────────────────────────
146
147/// Summarizes older conversation turns by asking DeepSeek to write a concise
148/// prose summary, then replaces the compressed turns with a single
149/// `Role::System` message containing that summary.
150///
151/// # Trigger
152///
153/// Fires when the estimated token count of the **compressible** portion of the
154/// history (everything except permanent system prompts) exceeds `token_threshold`.
155///
156/// # Behavior
157///
158/// 1. Permanent `Role::System` messages (user-provided via `with_system_prompt`)
159/// are extracted and re-prepended after summarization — they are never lost.
160/// 2. Any previous `[auto-summary]` system message is included in the text sent
161/// to the model so the new summary is cumulative.
162/// 3. The `retain_last` most recent non-system turns are kept verbatim; everything
163/// older is replaced by the LLM-generated summary.
164/// 4. If the API call fails the history is left **unchanged** and the error is
165/// returned so the caller can decide whether to abort or continue.
166///
167/// # Example
168///
169/// ```no_run
170/// use ds_api::{DeepseekAgent, ApiClient};
171/// use ds_api::conversation::LlmSummarizer;
172///
173/// let summarizer = LlmSummarizer::new(ApiClient::new("sk-..."));
174/// let agent = DeepseekAgent::new("sk-...")
175/// .with_summarizer(summarizer);
176/// ```
177#[derive(Clone)]
178pub struct LlmSummarizer {
179 /// Client used exclusively for summary API calls (can share the agent's token).
180 client: ApiClient,
181 /// Model used for the summarization API call. Defaults to `"deepseek-chat"`.
182 pub(crate) model: String,
183 /// Estimated token count above which summarization is triggered.
184 pub(crate) token_threshold: usize,
185 /// Number of most-recent non-system messages to retain verbatim.
186 pub(crate) retain_last: usize,
187}
188
189impl LlmSummarizer {
190 /// Create with default thresholds: trigger at ~60 000 tokens, retain last 10 turns.
191 ///
192 /// The summarization call uses `"deepseek-chat"` by default. Override with
193 /// [`with_model`][LlmSummarizer::with_model] — useful when the agent is
194 /// pointed at an OpenAI-compatible provider and you want the summarizer to
195 /// use the same model.
196 pub fn new(client: ApiClient) -> Self {
197 Self {
198 client,
199 model: "deepseek-chat".to_string(),
200 token_threshold: 60_000,
201 retain_last: 10,
202 }
203 }
204
205 /// Builder: set the model used for the summarization API call.
206 ///
207 /// ```no_run
208 /// use ds_api::{ApiClient, LlmSummarizer};
209 ///
210 /// let summarizer = LlmSummarizer::new(ApiClient::new("sk-..."))
211 /// .with_model("gpt-4o-mini");
212 /// ```
213 pub fn with_model(mut self, model: impl Into<String>) -> Self {
214 self.model = model.into();
215 self
216 }
217
218 /// Builder: set a custom token threshold.
219 pub fn token_threshold(mut self, n: usize) -> Self {
220 self.token_threshold = n;
221 self
222 }
223
224 /// Builder: set how many recent messages to keep verbatim.
225 pub fn retain_last(mut self, n: usize) -> Self {
226 self.retain_last = n;
227 self
228 }
229}
230
231impl Summarizer for LlmSummarizer {
232 fn should_summarize(&self, history: &[Message]) -> bool {
233 estimate_tokens(history) >= self.token_threshold
234 }
235
236 fn summarize<'a>(
237 &'a self,
238 history: &'a mut Vec<Message>,
239 ) -> Pin<Box<dyn Future<Output = Result<(), ApiError>> + Send + 'a>> {
240 Box::pin(async move {
241 // ── 1. Extract permanent system prompts ──────────────────────────
242 let system_prompts = extract_system_prompts(history);
243
244 // ── 2. Split off the tail we want to keep verbatim ───────────────
245 let retain = self.retain_last.min(history.len());
246 let split = history.len().saturating_sub(retain);
247 let tail: Vec<Message> = history.drain(split..).collect();
248
249 // history now contains only the "old" turns (including any previous
250 // [auto-summary] message).
251
252 if history.is_empty() {
253 // Nothing old enough to summarize — just restore everything.
254 history.extend(tail);
255 // re-prepend system prompts
256 for (i, p) in system_prompts.into_iter().enumerate() {
257 history.insert(i, p);
258 }
259 return Ok(());
260 }
261
262 // ── 3. Build a prompt asking the model for a summary ─────────────
263 //
264 // We format the old turns as a readable transcript and ask for a
265 // concise summary that preserves the most important facts and decisions.
266 let mut transcript = String::new();
267 for msg in &*history {
268 // skip the old auto-summary header line if present — the content
269 // itself is still useful context for the new summary
270 let role_label = match msg.role {
271 Role::User => "User",
272 Role::Assistant => "Assistant",
273 Role::System => "System",
274 Role::Tool => "Tool",
275 };
276 if let Some(content) = &msg.content {
277 transcript.push_str(&format!("{role_label}: {content}\n"));
278 }
279 }
280
281 let summarize_prompt = format!(
282 "Below is a conversation transcript. Write a concise summary (a few sentences \
283 to a short paragraph) that captures the key context, decisions, and facts \
284 established so far. The summary will replace the original transcript and be \
285 read by the same AI assistant as a memory aid — be precise and neutral.\n\n\
286 Transcript:\n{transcript}"
287 );
288
289 let req = ApiRequest::builder()
290 .with_model(self.model.clone())
291 .add_message(Message::new(Role::User, &summarize_prompt))
292 .max_tokens(512);
293
294 let response = self.client.send(req).await?;
295
296 let summary_text = response
297 .choices
298 .into_iter()
299 .next()
300 .and_then(|c| c.message.content)
301 .unwrap_or_else(|| transcript.clone());
302
303 // ── 4. Replace old turns with the summary message ────────────────
304 history.clear();
305
306 history.push(Message::auto_summary(format!(
307 "Summary of the conversation so far:\n{summary_text}"
308 )));
309
310 // ── 5. Re-attach the verbatim tail and system prompts ────────────
311 history.extend(tail);
312
313 for (i, p) in system_prompts.into_iter().enumerate() {
314 history.insert(i, p);
315 }
316
317 Ok(())
318 })
319 }
320}
321
322// ── SlidingWindowSummarizer ───────────────────────────────────────────────────
323
324/// Keeps only the most recent `window` messages and silently discards everything
325/// older. No API call is made.
326///
327/// Use this when you want predictable, zero-cost context management and are
328/// comfortable with the model losing access to earlier turns.
329///
330/// Permanent `Role::System` messages are always preserved regardless of `window`.
331///
332/// # Example
333///
334/// ```no_run
335/// use ds_api::DeepseekAgent;
336/// use ds_api::conversation::SlidingWindowSummarizer;
337///
338/// // Keep the last 20 non-system messages; trigger summarization above 30.
339/// let agent = DeepseekAgent::new("sk-...")
340/// .with_summarizer(
341/// SlidingWindowSummarizer::new(20)
342/// .trigger_at(30)
343/// );
344/// ```
345#[derive(Debug, Clone)]
346pub struct SlidingWindowSummarizer {
347 /// Maximum number of non-system messages to retain after summarization.
348 pub(crate) window: usize,
349 /// Number of non-system messages above which summarization is triggered.
350 /// Defaults to `window + 1` (trigger as soon as the window is exceeded by one).
351 pub(crate) trigger_at: Option<usize>,
352}
353
354impl SlidingWindowSummarizer {
355 /// Create a summarizer that retains at most `window` non-system messages.
356 ///
357 /// Summarization triggers as soon as the non-system message count exceeds
358 /// `window`. Use [`trigger_at`][Self::trigger_at] to set a larger trigger
359 /// threshold so the window only slides after a certain amount of growth.
360 pub fn new(window: usize) -> Self {
361 Self {
362 window,
363 trigger_at: None,
364 }
365 }
366
367 /// Builder: set the non-system message count that triggers summarization.
368 ///
369 /// Must be greater than `window`; if set to a value ≤ `window` it is
370 /// silently clamped to `window + 1`.
371 ///
372 /// # Example
373 ///
374 /// ```no_run
375 /// use ds_api::conversation::SlidingWindowSummarizer;
376 ///
377 /// // Retain 20 turns but only start trimming after reaching 40.
378 /// let s = SlidingWindowSummarizer::new(20).trigger_at(40);
379 /// ```
380 pub fn trigger_at(mut self, n: usize) -> Self {
381 self.trigger_at = Some(n.max(self.window + 1));
382 self
383 }
384}
385
386impl Summarizer for SlidingWindowSummarizer {
387 fn should_summarize(&self, history: &[Message]) -> bool {
388 let non_system = history
389 .iter()
390 .filter(|m| !matches!(m.role, Role::System))
391 .count();
392 let threshold = self.trigger_at.unwrap_or(self.window + 1);
393 non_system >= threshold
394 }
395
396 fn summarize<'a>(
397 &'a self,
398 history: &'a mut Vec<Message>,
399 ) -> Pin<Box<dyn Future<Output = Result<(), ApiError>> + Send + 'a>> {
400 Box::pin(async move {
401 // Extract and preserve permanent system prompts.
402 let system_prompts = extract_system_prompts(history);
403
404 // Remove any previous auto-summary messages — they're irrelevant
405 // for a pure sliding window.
406 history.retain(|m| !m.is_auto_summary());
407
408 // Keep only the last `window` non-system messages.
409 if history.len() > self.window {
410 let drop = history.len() - self.window;
411 history.drain(0..drop);
412 }
413
414 // Re-prepend the permanent system prompts at the front.
415 for (i, p) in system_prompts.into_iter().enumerate() {
416 history.insert(i, p);
417 }
418
419 Ok(())
420 })
421 }
422}
423
424// ── Tests ─────────────────────────────────────────────────────────────────────
425
426#[cfg(test)]
427mod tests {
428 use super::*;
429
430 fn msg(role: Role, text: &str) -> Message {
431 Message::new(role, text)
432 }
433
434 fn system_prompt(text: &str) -> Message {
435 // A permanent system prompt — no [auto-summary] name tag.
436 Message::new(Role::System, text)
437 }
438
439 // ── estimate_tokens ───────────────────────────────────────────────────────
440
441 #[test]
442 fn estimate_tokens_excludes_permanent_system() {
443 let history = vec![
444 system_prompt("You are a helpful assistant."),
445 msg(Role::User, "Hello"), // 5 chars → 1 token
446 msg(Role::Assistant, "Hi there"), // 8 chars → 2 tokens
447 ];
448 // Only the User + Assistant messages should contribute.
449 let est = estimate_tokens(&history);
450 assert!(est > 0);
451 // "Hello" + "Hi there" = 13 chars / 4 = 3 tokens
452 assert_eq!(est, 3);
453 }
454
455 #[test]
456 fn estimate_tokens_includes_auto_summary() {
457 let summary = Message::auto_summary("Some prior summary text.");
458
459 let history = vec![summary];
460 let est = estimate_tokens(&history);
461 assert!(est > 0);
462 }
463
464 // ── SlidingWindowSummarizer ───────────────────────────────────────────────
465
466 #[tokio::test]
467 async fn sliding_window_trims_to_window() {
468 let mut history = vec![
469 system_prompt("system"),
470 msg(Role::User, "a"),
471 msg(Role::Assistant, "b"),
472 msg(Role::User, "c"),
473 msg(Role::Assistant, "d"),
474 msg(Role::User, "e"),
475 ];
476
477 let s = SlidingWindowSummarizer::new(2);
478 assert!(s.should_summarize(&history));
479 s.summarize(&mut history).await.unwrap();
480
481 // system prompt preserved
482 assert!(
483 history
484 .iter()
485 .any(|m| matches!(m.role, Role::System) && m.content.as_deref() == Some("system"))
486 );
487
488 // at most window non-system messages remain
489 let non_sys: Vec<_> = history
490 .iter()
491 .filter(|m| !matches!(m.role, Role::System))
492 .collect();
493 assert_eq!(non_sys.len(), 2);
494
495 // the retained messages are the most recent ones
496 assert_eq!(non_sys[0].content.as_deref(), Some("d"));
497 assert_eq!(non_sys[1].content.as_deref(), Some("e"));
498 }
499
500 #[tokio::test]
501 async fn sliding_window_preserves_multiple_system_prompts() {
502 let mut p1 = system_prompt("prompt one");
503 let mut p2 = system_prompt("prompt two");
504 // Give them something to distinguish them from auto-summary
505 p1.name = None;
506 p2.name = None;
507
508 let mut history = vec![
509 p1.clone(),
510 p2.clone(),
511 msg(Role::User, "1"),
512 msg(Role::User, "2"),
513 msg(Role::User, "3"),
514 ];
515
516 let s = SlidingWindowSummarizer::new(1);
517 s.summarize(&mut history).await.unwrap();
518
519 let sys_msgs: Vec<_> = history
520 .iter()
521 .filter(|m| matches!(m.role, Role::System))
522 .collect();
523 assert_eq!(sys_msgs.len(), 2);
524 assert_eq!(sys_msgs[0].content.as_deref(), Some("prompt one"));
525 assert_eq!(sys_msgs[1].content.as_deref(), Some("prompt two"));
526 }
527
528 #[tokio::test]
529 async fn sliding_window_removes_old_auto_summary() {
530 let auto = Message::auto_summary("old summary");
531
532 let mut history = vec![
533 system_prompt("permanent"),
534 auto,
535 msg(Role::User, "a"),
536 msg(Role::User, "b"),
537 msg(Role::User, "c"),
538 ];
539
540 let s = SlidingWindowSummarizer::new(2);
541 s.summarize(&mut history).await.unwrap();
542
543 // old auto-summary should be gone
544 assert!(!history.iter().any(|m| m.is_auto_summary()));
545
546 // permanent system prompt preserved
547 assert!(
548 history
549 .iter()
550 .any(|m| m.content.as_deref() == Some("permanent"))
551 );
552 }
553
554 #[tokio::test]
555 async fn sliding_window_noop_when_within_window() {
556 let mut history = vec![msg(Role::User, "a"), msg(Role::Assistant, "b")];
557
558 let s = SlidingWindowSummarizer::new(4);
559 assert!(!s.should_summarize(&history));
560 s.summarize(&mut history).await.unwrap();
561 assert_eq!(history.len(), 2);
562 }
563
564 // ── should_summarize ─────────────────────────────────────────────────────
565
566 #[test]
567 fn should_summarize_triggers_at_window_exceeded() {
568 let history = vec![
569 msg(Role::User, "a"),
570 msg(Role::User, "b"),
571 msg(Role::User, "c"),
572 ];
573 let s = SlidingWindowSummarizer::new(2);
574 assert!(s.should_summarize(&history));
575
576 let short = vec![msg(Role::User, "only")];
577 assert!(!s.should_summarize(&short));
578 }
579}