use std::pin::Pin;
use futures::Future;
use crate::api::{ApiClient, ApiRequest};
use crate::error::ApiError;
use crate::raw::request::message::{Message, Role};
pub trait Summarizer: Send + Sync {
fn should_summarize(&self, history: &[Message]) -> bool;
fn summarize<'a>(
&'a self,
history: &'a mut Vec<Message>,
) -> Pin<Box<dyn Future<Output = Result<(), ApiError>> + Send + 'a>>;
}
pub(crate) fn estimate_tokens(history: &[Message]) -> usize {
history
.iter()
.filter(|m| {
if matches!(m.role, Role::System) {
m.is_auto_summary()
} else {
true
}
})
.filter_map(|m| m.content.as_deref())
.map(|s| {
s.chars()
.map(|c| if c.is_ascii() { 1usize } else { 4 })
.sum::<usize>()
})
.sum::<usize>()
/ 4
}
fn extract_system_prompts(history: &mut Vec<Message>) -> Vec<Message> {
let mut prompts = Vec::new();
let mut i = 0;
while i < history.len() {
let m = &history[i];
let is_permanent_system = matches!(m.role, Role::System) && !m.is_auto_summary();
if is_permanent_system {
prompts.push(history.remove(i));
} else {
i += 1;
}
}
prompts
}
#[derive(Clone)]
pub struct LlmSummarizer {
client: ApiClient,
pub(crate) model: String,
pub(crate) token_threshold: usize,
pub(crate) retain_last: usize,
}
impl LlmSummarizer {
pub fn new(client: ApiClient) -> Self {
Self {
client,
model: "deepseek-chat".to_string(),
token_threshold: 60_000,
retain_last: 10,
}
}
pub fn with_model(mut self, model: impl Into<String>) -> Self {
self.model = model.into();
self
}
pub fn token_threshold(mut self, n: usize) -> Self {
self.token_threshold = n;
self
}
pub fn retain_last(mut self, n: usize) -> Self {
self.retain_last = n;
self
}
}
impl Summarizer for LlmSummarizer {
fn should_summarize(&self, history: &[Message]) -> bool {
estimate_tokens(history) >= self.token_threshold
}
fn summarize<'a>(
&'a self,
history: &'a mut Vec<Message>,
) -> Pin<Box<dyn Future<Output = Result<(), ApiError>> + Send + 'a>> {
Box::pin(async move {
let system_prompts = extract_system_prompts(history);
let retain = self.retain_last.min(history.len());
let mut split = history.len().saturating_sub(retain);
while split < history.len() {
let current_is_tool = matches!(history[split].role, Role::Tool);
let prev_is_call = if split > 0 {
history[split - 1]
.tool_calls
.as_ref()
.is_some_and(|tc| !tc.is_empty())
} else {
false
};
if current_is_tool || prev_is_call {
split += 1;
} else {
break;
}
}
let tail: Vec<Message> = history.drain(split..).collect();
if history.is_empty() {
history.extend(tail);
for (i, p) in system_prompts.into_iter().enumerate() {
history.insert(i, p);
}
return Ok(());
}
let mut transcript = String::new();
for msg in &*history {
let role_label = match msg.role {
Role::User => "User",
Role::Assistant => "Assistant",
Role::System => "System",
Role::Tool => "Tool",
};
let content_text = msg.content.clone().unwrap_or_else(|| {
msg.tool_calls
.as_ref()
.map(|calls| format!("[Calls Tools: {:?}]", calls))
.unwrap_or_default()
});
if !content_text.is_empty() {
transcript.push_str(&format!("{role_label}: {content_text}\n"));
}
}
let summarize_prompt = format!(
"Below is a conversation transcript. Write a concise summary (a few sentences \
to a short paragraph) that captures the key context, decisions, and facts \
established so far. The summary will replace the original transcript and be \
read by the same AI assistant as a memory aid — be precise and neutral.\n\n\
Transcript:\n{transcript}"
);
let req = ApiRequest::builder()
.with_model(self.model.clone())
.add_message(Message::new(Role::User, &summarize_prompt))
.max_tokens(512);
let response = self.client.send(req).await?;
let summary_text = response
.choices
.into_iter()
.next()
.and_then(|c| c.message.content)
.unwrap_or_else(|| transcript.clone());
history.clear();
history.push(Message::auto_summary(format!(
"Summary of the conversation so far:\n{summary_text}"
)));
history.extend(tail);
for (i, p) in system_prompts.into_iter().enumerate() {
history.insert(i, p);
}
Ok(())
})
}
}
#[derive(Debug, Clone)]
pub struct SlidingWindowSummarizer {
pub(crate) window: usize,
pub(crate) trigger_at: Option<usize>,
}
impl SlidingWindowSummarizer {
pub fn new(window: usize) -> Self {
Self {
window,
trigger_at: None,
}
}
pub fn trigger_at(mut self, n: usize) -> Self {
self.trigger_at = Some(n.max(self.window + 1));
self
}
}
impl Summarizer for SlidingWindowSummarizer {
fn should_summarize(&self, history: &[Message]) -> bool {
let non_system = history
.iter()
.filter(|m| !matches!(m.role, Role::System))
.count();
let threshold = self.trigger_at.unwrap_or(self.window + 1);
non_system >= threshold
}
fn summarize<'a>(
&'a self,
history: &'a mut Vec<Message>,
) -> Pin<Box<dyn Future<Output = Result<(), ApiError>> + Send + 'a>> {
Box::pin(async move {
let system_prompts = extract_system_prompts(history);
history.retain(|m| !m.is_auto_summary());
if history.len() > self.window {
let drop = history.len() - self.window;
history.drain(0..drop);
}
for (i, p) in system_prompts.into_iter().enumerate() {
history.insert(i, p);
}
Ok(())
})
}
}
#[cfg(test)]
mod tests {
use super::*;
fn msg(role: Role, text: &str) -> Message {
Message::new(role, text)
}
fn system_prompt(text: &str) -> Message {
Message::new(Role::System, text)
}
#[test]
fn estimate_tokens_excludes_permanent_system() {
let history = vec![
system_prompt("You are a helpful assistant."),
msg(Role::User, "Hello"), msg(Role::Assistant, "Hi there"), ];
let est = estimate_tokens(&history);
assert!(est > 0);
assert_eq!(est, 3);
}
#[test]
fn estimate_tokens_includes_auto_summary() {
let summary = Message::auto_summary("Some prior summary text.");
let history = vec![summary];
let est = estimate_tokens(&history);
assert!(est > 0);
}
#[tokio::test]
async fn sliding_window_trims_to_window() {
let mut history = vec![
system_prompt("system"),
msg(Role::User, "a"),
msg(Role::Assistant, "b"),
msg(Role::User, "c"),
msg(Role::Assistant, "d"),
msg(Role::User, "e"),
];
let s = SlidingWindowSummarizer::new(2);
assert!(s.should_summarize(&history));
s.summarize(&mut history).await.unwrap();
assert!(
history
.iter()
.any(|m| matches!(m.role, Role::System) && m.content.as_deref() == Some("system"))
);
let non_sys: Vec<_> = history
.iter()
.filter(|m| !matches!(m.role, Role::System))
.collect();
assert_eq!(non_sys.len(), 2);
assert_eq!(non_sys[0].content.as_deref(), Some("d"));
assert_eq!(non_sys[1].content.as_deref(), Some("e"));
}
#[tokio::test]
async fn sliding_window_preserves_multiple_system_prompts() {
let mut p1 = system_prompt("prompt one");
let mut p2 = system_prompt("prompt two");
p1.name = None;
p2.name = None;
let mut history = vec![
p1.clone(),
p2.clone(),
msg(Role::User, "1"),
msg(Role::User, "2"),
msg(Role::User, "3"),
];
let s = SlidingWindowSummarizer::new(1);
s.summarize(&mut history).await.unwrap();
let sys_msgs: Vec<_> = history
.iter()
.filter(|m| matches!(m.role, Role::System))
.collect();
assert_eq!(sys_msgs.len(), 2);
assert_eq!(sys_msgs[0].content.as_deref(), Some("prompt one"));
assert_eq!(sys_msgs[1].content.as_deref(), Some("prompt two"));
}
#[tokio::test]
async fn sliding_window_removes_old_auto_summary() {
let auto = Message::auto_summary("old summary");
let mut history = vec![
system_prompt("permanent"),
auto,
msg(Role::User, "a"),
msg(Role::User, "b"),
msg(Role::User, "c"),
];
let s = SlidingWindowSummarizer::new(2);
s.summarize(&mut history).await.unwrap();
assert!(!history.iter().any(|m| m.is_auto_summary()));
assert!(
history
.iter()
.any(|m| m.content.as_deref() == Some("permanent"))
);
}
#[tokio::test]
async fn sliding_window_noop_when_within_window() {
let mut history = vec![msg(Role::User, "a"), msg(Role::Assistant, "b")];
let s = SlidingWindowSummarizer::new(4);
assert!(!s.should_summarize(&history));
s.summarize(&mut history).await.unwrap();
assert_eq!(history.len(), 2);
}
#[test]
fn should_summarize_triggers_at_window_exceeded() {
let history = vec![
msg(Role::User, "a"),
msg(Role::User, "b"),
msg(Role::User, "c"),
];
let s = SlidingWindowSummarizer::new(2);
assert!(s.should_summarize(&history));
let short = vec![msg(Role::User, "only")];
assert!(!s.should_summarize(&short));
}
}