Skip to main content

ds_api/conversation/
summarizer.rs

1//! Conversation summarizer trait and built-in implementations.
2//!
3//! The [`AUTO_SUMMARY_TAG`][crate::raw::request::message::AUTO_SUMMARY_TAG] constant
4//! in [`Message`][crate::raw::request::message::Message] defines the single source of
5//! truth for identifying auto-generated summary messages.
6//!
7//! # Trait
8//!
9//! [`Summarizer`] is an async trait with two methods:
10//! - [`should_summarize`][Summarizer::should_summarize] — synchronous check on the current history.
11//! - [`summarize`][Summarizer::summarize] — async, may perform an API call; mutates history in-place.
12//!
13//! # Built-in implementations
14//!
15//! | Type | Strategy |
16//! |---|---|
17//! | [`LlmSummarizer`] | Calls DeepSeek to produce a semantic summary; **default** for `DeepseekAgent`. |
18//! | [`SlidingWindowSummarizer`] | Keeps the last N messages and silently drops the rest; no API call. |
19
20use std::pin::Pin;
21
22use futures::Future;
23
24use crate::api::{ApiClient, ApiRequest};
25use crate::error::ApiError;
26use crate::raw::request::message::{Message, Role};
27
28// ── Trait ────────────────────────────────────────────────────────────────────
29
30/// Decides when and how to compress conversation history.
31///
32/// Both methods receive an immutable or mutable slice of the current history.
33/// Implementors are free to count tokens, count turns, check wall-clock time,
34/// or use any other heuristic.
35///
36/// The trait is object-safe via `BoxFuture`; you can store it as
37/// `Box<dyn Summarizer>` without `async_trait`.
38pub trait Summarizer: Send + Sync {
39    /// Return `true` if the history should be summarized before the next API turn.
40    ///
41    /// This is called synchronously on every user-input push; keep it cheap.
42    fn should_summarize(&self, history: &[Message]) -> bool;
43
44    /// Compress `history` in-place, returning an error only for unrecoverable failures.
45    ///
46    /// On success the history must be shorter (or at most the same length) than before.
47    /// Implementations must **not** remove messages whose role is [`Role::System`] and
48    /// whose `name` field is not `Some("[auto-summary]")` — those are user-provided
49    /// system prompts and must be preserved.
50    fn summarize<'a>(
51        &'a self,
52        history: &'a mut Vec<Message>,
53    ) -> Pin<Box<dyn Future<Output = Result<(), ApiError>> + Send + 'a>>;
54}
55
56// ── Helpers ───────────────────────────────────────────────────────────────────
57
58/// Estimate the token count of a slice of messages using a fast character heuristic.
59///
60/// ASCII characters count as 1 char ≈ 0.25 tokens; CJK / multibyte characters are
61/// counted as 4 chars ≈ 1 token.  System messages whose `name` is `[auto-summary]`
62/// are included in the estimate; other system messages (user-provided prompts) are
63/// excluded because they are permanent and we cannot remove them anyway.
64pub(crate) fn estimate_tokens(history: &[Message]) -> usize {
65    history
66        .iter()
67        .filter(|m| {
68            // Always exclude permanent system prompts from the token estimate;
69            // we can't remove them so counting them would trigger summarization
70            // that can never actually free those tokens.
71            if matches!(m.role, Role::System) {
72                // auto-summary placeholders are replaceable → count them
73                m.is_auto_summary()
74            } else {
75                true
76            }
77        })
78        .filter_map(|m| m.content.as_deref())
79        .map(|s| {
80            s.chars()
81                .map(|c| if c.is_ascii() { 1usize } else { 4 })
82                .sum::<usize>()
83        })
84        .sum::<usize>()
85        / 4
86}
87
88/// Partition `history` into (system_prompts, rest), where system prompts are
89/// permanent user-provided system messages (role=System, name≠"[auto-summary]").
90///
91/// Returns the indices of permanent system messages so callers can re-inject
92/// them after compressing the rest.
93fn extract_system_prompts(history: &mut Vec<Message>) -> Vec<Message> {
94    let mut prompts = Vec::new();
95    let mut i = 0;
96    while i < history.len() {
97        let m = &history[i];
98        let is_permanent_system = matches!(m.role, Role::System) && !m.is_auto_summary();
99        if is_permanent_system {
100            prompts.push(history.remove(i));
101            // don't increment i — the next element shifted into position i
102        } else {
103            i += 1;
104        }
105    }
106    prompts
107}
108
109// ── LlmSummarizer ─────────────────────────────────────────────────────────────
110
111/// Summarizes older conversation turns by asking DeepSeek to write a concise
112/// prose summary, then replaces the compressed turns with a single
113/// `Role::System` message containing that summary.
114///
115/// # Trigger
116///
117/// Fires when the estimated token count of the **compressible** portion of the
118/// history (everything except permanent system prompts) exceeds `token_threshold`.
119///
120/// # Behavior
121///
122/// 1. Permanent `Role::System` messages (user-provided via `with_system_prompt`)
123///    are extracted and re-prepended after summarization — they are never lost.
124/// 2. Any previous `[auto-summary]` system message is included in the text sent
125///    to the model so the new summary is cumulative.
126/// 3. The `retain_last` most recent non-system turns are kept verbatim; everything
127///    older is replaced by the LLM-generated summary.
128/// 4. If the API call fails the history is left **unchanged** and the error is
129///    returned so the caller can decide whether to abort or continue.
130///
131/// # Example
132///
133/// ```no_run
134/// use ds_api::{DeepseekAgent, ApiClient};
135/// use ds_api::conversation::LlmSummarizer;
136///
137/// let summarizer = LlmSummarizer::new(ApiClient::new("sk-..."));
138/// let agent = DeepseekAgent::new("sk-...")
139///     .with_summarizer(summarizer);
140/// ```
141#[derive(Clone)]
142pub struct LlmSummarizer {
143    /// Client used exclusively for summary API calls (can share the agent's token).
144    client: ApiClient,
145    /// Estimated token count above which summarization is triggered.
146    pub(crate) token_threshold: usize,
147    /// Number of most-recent non-system messages to retain verbatim.
148    pub(crate) retain_last: usize,
149}
150
151impl LlmSummarizer {
152    /// Create with default thresholds: trigger at ~60 000 tokens, retain last 10 turns.
153    pub fn new(client: ApiClient) -> Self {
154        Self {
155            client,
156            token_threshold: 60_000,
157            retain_last: 10,
158        }
159    }
160
161    /// Builder: set a custom token threshold.
162    pub fn token_threshold(mut self, n: usize) -> Self {
163        self.token_threshold = n;
164        self
165    }
166
167    /// Builder: set how many recent messages to keep verbatim.
168    pub fn retain_last(mut self, n: usize) -> Self {
169        self.retain_last = n;
170        self
171    }
172}
173
174impl Summarizer for LlmSummarizer {
175    fn should_summarize(&self, history: &[Message]) -> bool {
176        estimate_tokens(history) >= self.token_threshold
177    }
178
179    fn summarize<'a>(
180        &'a self,
181        history: &'a mut Vec<Message>,
182    ) -> Pin<Box<dyn Future<Output = Result<(), ApiError>> + Send + 'a>> {
183        Box::pin(async move {
184            // ── 1. Extract permanent system prompts ──────────────────────────
185            let system_prompts = extract_system_prompts(history);
186
187            // ── 2. Split off the tail we want to keep verbatim ───────────────
188            let retain = self.retain_last.min(history.len());
189            let split = history.len().saturating_sub(retain);
190            let tail: Vec<Message> = history.drain(split..).collect();
191
192            // history now contains only the "old" turns (including any previous
193            // [auto-summary] message).
194
195            if history.is_empty() {
196                // Nothing old enough to summarize — just restore everything.
197                history.extend(tail);
198                // re-prepend system prompts
199                for (i, p) in system_prompts.into_iter().enumerate() {
200                    history.insert(i, p);
201                }
202                return Ok(());
203            }
204
205            // ── 3. Build a prompt asking the model for a summary ─────────────
206            //
207            // We format the old turns as a readable transcript and ask for a
208            // concise summary that preserves the most important facts and decisions.
209            let mut transcript = String::new();
210            for msg in &*history {
211                // skip the old auto-summary header line if present — the content
212                // itself is still useful context for the new summary
213                let role_label = match msg.role {
214                    Role::User => "User",
215                    Role::Assistant => "Assistant",
216                    Role::System => "System",
217                    Role::Tool => "Tool",
218                };
219                if let Some(content) = &msg.content {
220                    transcript.push_str(&format!("{role_label}: {content}\n"));
221                }
222            }
223
224            let summarize_prompt = format!(
225                "Below is a conversation transcript. Write a concise summary (a few sentences \
226                 to a short paragraph) that captures the key context, decisions, and facts \
227                 established so far. The summary will replace the original transcript and be \
228                 read by the same AI assistant as a memory aid — be precise and neutral.\n\n\
229                 Transcript:\n{transcript}"
230            );
231
232            let req = ApiRequest::builder()
233                .add_message(Message::new(Role::User, &summarize_prompt))
234                .max_tokens(512);
235
236            let response = self.client.send(req).await?;
237
238            let summary_text = response
239                .choices
240                .into_iter()
241                .next()
242                .and_then(|c| c.message.content)
243                .unwrap_or_else(|| transcript.clone());
244
245            // ── 4. Replace old turns with the summary message ────────────────
246            history.clear();
247
248            history.push(Message::auto_summary(format!(
249                "Summary of the conversation so far:\n{summary_text}"
250            )));
251
252            // ── 5. Re-attach the verbatim tail and system prompts ────────────
253            history.extend(tail);
254
255            for (i, p) in system_prompts.into_iter().enumerate() {
256                history.insert(i, p);
257            }
258
259            Ok(())
260        })
261    }
262}
263
264// ── SlidingWindowSummarizer ───────────────────────────────────────────────────
265
266/// Keeps only the most recent `window` messages and silently discards everything
267/// older.  No API call is made.
268///
269/// Use this when you want predictable, zero-cost context management and are
270/// comfortable with the model losing access to earlier turns.
271///
272/// Permanent `Role::System` messages are always preserved regardless of `window`.
273///
274/// # Example
275///
276/// ```no_run
277/// use ds_api::{DeepseekAgent};
278/// use ds_api::conversation::SlidingWindowSummarizer;
279///
280/// let agent = DeepseekAgent::new("sk-...")
281///     .with_summarizer(SlidingWindowSummarizer::new(20));
282/// ```
283#[derive(Debug, Clone)]
284pub struct SlidingWindowSummarizer {
285    /// Maximum number of non-system messages to retain.
286    pub(crate) window: usize,
287}
288
289impl SlidingWindowSummarizer {
290    /// Create a summarizer that keeps at most `window` non-system messages.
291    pub fn new(window: usize) -> Self {
292        Self { window }
293    }
294}
295
296impl Summarizer for SlidingWindowSummarizer {
297    fn should_summarize(&self, history: &[Message]) -> bool {
298        let non_system = history
299            .iter()
300            .filter(|m| !matches!(m.role, Role::System))
301            .count();
302        non_system > self.window
303    }
304
305    fn summarize<'a>(
306        &'a self,
307        history: &'a mut Vec<Message>,
308    ) -> Pin<Box<dyn Future<Output = Result<(), ApiError>> + Send + 'a>> {
309        Box::pin(async move {
310            // Extract and preserve permanent system prompts.
311            let system_prompts = extract_system_prompts(history);
312
313            // Remove any previous auto-summary messages — they're irrelevant
314            // for a pure sliding window.
315            history.retain(|m| !m.is_auto_summary());
316
317            // Keep only the last `window` non-system messages.
318            if history.len() > self.window {
319                let drop = history.len() - self.window;
320                history.drain(0..drop);
321            }
322
323            // Re-prepend the permanent system prompts at the front.
324            for (i, p) in system_prompts.into_iter().enumerate() {
325                history.insert(i, p);
326            }
327
328            Ok(())
329        })
330    }
331}
332
333// ── Tests ─────────────────────────────────────────────────────────────────────
334
335#[cfg(test)]
336mod tests {
337    use super::*;
338
339    fn msg(role: Role, text: &str) -> Message {
340        Message::new(role, text)
341    }
342
343    fn system_prompt(text: &str) -> Message {
344        // A permanent system prompt — no [auto-summary] name tag.
345        Message::new(Role::System, text)
346    }
347
348    // ── estimate_tokens ───────────────────────────────────────────────────────
349
350    #[test]
351    fn estimate_tokens_excludes_permanent_system() {
352        let history = vec![
353            system_prompt("You are a helpful assistant."),
354            msg(Role::User, "Hello"),         // 5 chars → 1 token
355            msg(Role::Assistant, "Hi there"), // 8 chars → 2 tokens
356        ];
357        // Only the User + Assistant messages should contribute.
358        let est = estimate_tokens(&history);
359        assert!(est > 0);
360        // "Hello" + "Hi there" = 13 chars / 4 = 3 tokens
361        assert_eq!(est, 3);
362    }
363
364    #[test]
365    fn estimate_tokens_includes_auto_summary() {
366        let summary = Message::auto_summary("Some prior summary text.");
367
368        let history = vec![summary];
369        let est = estimate_tokens(&history);
370        assert!(est > 0);
371    }
372
373    // ── SlidingWindowSummarizer ───────────────────────────────────────────────
374
375    #[tokio::test]
376    async fn sliding_window_trims_to_window() {
377        let mut history = vec![
378            system_prompt("system"),
379            msg(Role::User, "a"),
380            msg(Role::Assistant, "b"),
381            msg(Role::User, "c"),
382            msg(Role::Assistant, "d"),
383            msg(Role::User, "e"),
384        ];
385
386        let s = SlidingWindowSummarizer::new(2);
387        assert!(s.should_summarize(&history));
388        s.summarize(&mut history).await.unwrap();
389
390        // system prompt preserved
391        assert!(
392            history
393                .iter()
394                .any(|m| matches!(m.role, Role::System) && m.content.as_deref() == Some("system"))
395        );
396
397        // at most window non-system messages remain
398        let non_sys: Vec<_> = history
399            .iter()
400            .filter(|m| !matches!(m.role, Role::System))
401            .collect();
402        assert_eq!(non_sys.len(), 2);
403
404        // the retained messages are the most recent ones
405        assert_eq!(non_sys[0].content.as_deref(), Some("d"));
406        assert_eq!(non_sys[1].content.as_deref(), Some("e"));
407    }
408
409    #[tokio::test]
410    async fn sliding_window_preserves_multiple_system_prompts() {
411        let mut p1 = system_prompt("prompt one");
412        let mut p2 = system_prompt("prompt two");
413        // Give them something to distinguish them from auto-summary
414        p1.name = None;
415        p2.name = None;
416
417        let mut history = vec![
418            p1.clone(),
419            p2.clone(),
420            msg(Role::User, "1"),
421            msg(Role::User, "2"),
422            msg(Role::User, "3"),
423        ];
424
425        let s = SlidingWindowSummarizer::new(1);
426        s.summarize(&mut history).await.unwrap();
427
428        let sys_msgs: Vec<_> = history
429            .iter()
430            .filter(|m| matches!(m.role, Role::System))
431            .collect();
432        assert_eq!(sys_msgs.len(), 2);
433        assert_eq!(sys_msgs[0].content.as_deref(), Some("prompt one"));
434        assert_eq!(sys_msgs[1].content.as_deref(), Some("prompt two"));
435    }
436
437    #[tokio::test]
438    async fn sliding_window_removes_old_auto_summary() {
439        let auto = Message::auto_summary("old summary");
440
441        let mut history = vec![
442            system_prompt("permanent"),
443            auto,
444            msg(Role::User, "a"),
445            msg(Role::User, "b"),
446            msg(Role::User, "c"),
447        ];
448
449        let s = SlidingWindowSummarizer::new(2);
450        s.summarize(&mut history).await.unwrap();
451
452        // old auto-summary should be gone
453        assert!(!history.iter().any(|m| m.is_auto_summary()));
454
455        // permanent system prompt preserved
456        assert!(
457            history
458                .iter()
459                .any(|m| m.content.as_deref() == Some("permanent"))
460        );
461    }
462
463    #[tokio::test]
464    async fn sliding_window_noop_when_within_window() {
465        let mut history = vec![msg(Role::User, "a"), msg(Role::Assistant, "b")];
466
467        let s = SlidingWindowSummarizer::new(4);
468        assert!(!s.should_summarize(&history));
469        s.summarize(&mut history).await.unwrap();
470        assert_eq!(history.len(), 2);
471    }
472
473    // ── should_summarize ─────────────────────────────────────────────────────
474
475    #[test]
476    fn should_summarize_triggers_at_window_exceeded() {
477        let history = vec![
478            msg(Role::User, "a"),
479            msg(Role::User, "b"),
480            msg(Role::User, "c"),
481        ];
482        let s = SlidingWindowSummarizer::new(2);
483        assert!(s.should_summarize(&history));
484
485        let short = vec![msg(Role::User, "only")];
486        assert!(!s.should_summarize(&short));
487    }
488}