use std::fmt::Write;
use std::path::Path;
use std::sync::Arc;
use anyhow::Result;
use chrono::{DateTime, Utc};
use tokio::sync::Mutex;
use crate::client::DeepSeekClient;
use crate::compaction::KEEP_RECENT_MESSAGES;
use crate::compaction::plan_compaction;
use crate::llm_client::LlmClient;
use crate::models::{ContentBlock, Message, MessageRequest, SystemBlock, SystemPrompt};
pub const DEFAULT_SEAM_MODEL: &str = "deepseek-v4-flash";
pub const DEFAULT_L1_THRESHOLD: usize = 192_000;
pub const DEFAULT_L2_THRESHOLD: usize = 384_000;
pub const DEFAULT_L3_THRESHOLD: usize = 576_000;
pub const DEFAULT_CYCLE_THRESHOLD: usize = 768_000;
pub const VERBATIM_WINDOW_TURNS: usize = 16;
const L1_MAX_TOKENS: u32 = 3_200;
const L2_MAX_TOKENS: u32 = 2_400;
const L3_MAX_TOKENS: u32 = 1_600;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SeamConfig {
pub enabled: bool,
pub verbatim_window_turns: usize,
pub l1_threshold: usize,
pub l2_threshold: usize,
pub l3_threshold: usize,
pub cycle_threshold: usize,
pub seam_model: String,
}
impl Default for SeamConfig {
fn default() -> Self {
Self {
enabled: true,
verbatim_window_turns: VERBATIM_WINDOW_TURNS,
l1_threshold: DEFAULT_L1_THRESHOLD,
l2_threshold: DEFAULT_L2_THRESHOLD,
l3_threshold: DEFAULT_L3_THRESHOLD,
cycle_threshold: DEFAULT_CYCLE_THRESHOLD,
seam_model: DEFAULT_SEAM_MODEL.to_string(),
}
}
}
#[derive(Debug, Clone)]
pub struct SeamMetadata {
pub level: u8,
#[allow(dead_code)]
pub start_idx: usize,
#[allow(dead_code)]
pub end_idx: usize,
#[allow(dead_code)]
pub token_estimate: usize,
#[allow(dead_code)]
pub timestamp: DateTime<Utc>,
#[allow(dead_code)]
pub model: String,
}
pub struct SeamManager {
flash_client: DeepSeekClient,
config: SeamConfig,
active_seams: Arc<Mutex<Vec<SeamMetadata>>>,
}
impl SeamManager {
pub fn new(flash_client: DeepSeekClient, config: SeamConfig) -> Self {
Self {
flash_client,
config,
active_seams: Arc::new(Mutex::new(Vec::new())),
}
}
pub fn config(&self) -> &SeamConfig {
&self.config
}
pub async fn seam_count(&self) -> usize {
self.active_seams.lock().await.len()
}
#[must_use]
pub fn seam_level_for(
&self,
cumulative_tokens: usize,
highest_existing_level: Option<u8>,
) -> Option<u8> {
if !self.config.enabled {
return None;
}
let highest = highest_existing_level.unwrap_or(0);
if highest < 1 && cumulative_tokens >= self.config.l1_threshold {
return Some(1);
}
if highest < 2 && cumulative_tokens >= self.config.l2_threshold {
return Some(2);
}
if highest < 3 && cumulative_tokens >= self.config.l3_threshold {
return Some(3);
}
None
}
#[must_use]
#[allow(dead_code)]
pub fn should_cycle(&self, cumulative_tokens: usize) -> bool {
self.config.enabled && cumulative_tokens >= self.config.cycle_threshold
}
pub fn verbatim_window_start(&self, message_count: usize) -> usize {
let turn_count = message_count / 2; let verbatim_turns = self.config.verbatim_window_turns.min(turn_count);
let verbatim_messages = (verbatim_turns * 2).min(message_count);
message_count.saturating_sub(verbatim_messages)
}
pub async fn produce_soft_seam(
&self,
messages: &[Message],
level: u8,
start_idx: usize,
end_idx: usize,
workspace: Option<&Path>,
pinned_indices: &[usize],
) -> Result<String> {
if messages.is_empty() || start_idx >= end_idx {
return Ok(String::new());
}
let range = &messages[start_idx..end_idx.min(messages.len())];
if range.is_empty() {
return Ok(String::new());
}
let plan = plan_compaction(
range,
workspace,
KEEP_RECENT_MESSAGES.min(range.len().saturating_sub(1)),
Some(pinned_indices),
None,
);
let to_summarize: Vec<&Message> = range
.iter()
.enumerate()
.filter(|(idx, _msg)| !plan.pinned_indices.contains(idx))
.map(|(_idx, msg)| msg)
.collect();
if to_summarize.is_empty() {
return Ok(String::new());
}
let summary = self
.summarize_messages(&to_summarize, level, start_idx, end_idx)
.await?;
let density_label = match level {
1 => "~2,500 tokens",
2 => "~1,800 tokens",
3 => "~1,200 tokens",
_ => "unknown",
};
let timestamp = Utc::now();
let token_estimate = summary.len() / 4;
{
let mut seams = self.active_seams.lock().await;
seams.push(SeamMetadata {
level,
start_idx,
end_idx,
token_estimate,
timestamp,
model: self.config.seam_model.clone(),
});
}
Ok(format!(
"<archived_context level=\"{level}\" range=\"msg {start_idx}-{end_idx}\" \
tokens=\"~{token_estimate}\" density=\"{density_label}\" \
model=\"{seam_model}\" timestamp=\"{ts}\">\n\
{summary}\n\
</archived_context>",
seam_model = self.config.seam_model,
ts = timestamp.to_rfc3339()
))
}
pub async fn recompact(
&self,
existing_seams: &[String],
new_messages: &[&Message],
level: u8,
start_idx: usize,
end_idx: usize,
) -> Result<String> {
let mut input = String::from(
"## Prior Context Summaries\n\n\
The following <archived_context> blocks were produced earlier. \
Merge their key information into a single denser summary.\n\n",
);
for (i, seam) in existing_seams.iter().enumerate() {
let _ = write!(input, "### Seam {}\n{seam}\n\n", i + 1);
}
if !new_messages.is_empty() {
input.push_str("## Recent Messages\n\n");
for msg in new_messages {
let role = &msg.role;
for block in &msg.content {
if let ContentBlock::Text { text, .. } = block {
let _ = write!(input, "**{role}:** {text}\n\n");
}
}
}
}
let (max_tokens, word_limit) = match level {
2 => (L2_MAX_TOKENS, 700),
3 => (L3_MAX_TOKENS, 400),
_ => (L3_MAX_TOKENS, 400),
};
let request = MessageRequest {
model: self.config.seam_model.clone(),
messages: vec![Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: format!(
"Synthesize the following context into a single dense summary. \
Preserve: decisions made, file paths, error messages, \
constraints, hypotheses, open questions, and task state. \
Drop: greeting, filler, repeated information. \
Keep it under {word_limit} words.\n\n{input}"
),
cache_control: None,
}],
}],
max_tokens,
system: Some(SystemPrompt::Text(
"You are a context compaction specialist. Produce dense, factual summaries that \
preserve every decision, path, error, constraint, and open question. Drop \
conversational filler and repetition."
.to_string(),
)),
tools: None,
tool_choice: None,
metadata: None,
thinking: None,
reasoning_effort: None,
stream: Some(false),
temperature: Some(0.1),
top_p: None,
};
let response = self.flash_client.create_message(request).await?;
let summary = response
.content
.iter()
.filter_map(|block| match block {
ContentBlock::Text { text, .. } => Some(text.clone()),
_ => None,
})
.collect::<Vec<_>>()
.join("\n");
let token_estimate = summary.len() / 4;
let timestamp = Utc::now();
{
let mut seams = self.active_seams.lock().await;
seams.push(SeamMetadata {
level,
start_idx,
end_idx,
token_estimate,
timestamp,
model: self.config.seam_model.clone(),
});
}
Ok(format!(
"<archived_context level=\"{level}\" range=\"msg {start_idx}-{end_idx}\" \
tokens=\"~{token_estimate}\" model=\"{model}\" timestamp=\"{ts}\">\n\
{summary}\n\
</archived_context>",
model = self.config.seam_model,
ts = timestamp.to_rfc3339()
))
}
pub async fn produce_flash_briefing(
&self,
existing_seams: &[String],
structured_state: Option<&str>,
) -> Result<String> {
let mut input = String::from(
"## Briefing Request\n\n\
Produce a <carry_forward> block summarizing the session state. \
Include: decisions made + why, constraints discovered, \
hypotheses being tested, approaches that failed, open questions. \
Do NOT include tool output bytes, file contents, or step-by-step recaps.\n\n",
);
if let Some(state) = structured_state {
let _ = write!(input, "## Structured State\n\n{state}\n\n");
}
if !existing_seams.is_empty() {
input.push_str("## Prior Context Summaries\n\n");
for (i, seam) in existing_seams.iter().enumerate() {
let _ = write!(input, "### Seam {}\n{seam}\n\n", i + 1);
}
} else {
input.push_str(
"No prior context summaries available. Produce a brief carry-forward \
from the structured state alone.\n",
);
}
let request = MessageRequest {
model: self.config.seam_model.clone(),
messages: vec![Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: input,
cache_control: None,
}],
}],
max_tokens: 4_096,
system: Some(SystemPrompt::Blocks(vec![SystemBlock {
block_type: "text".to_string(),
text: crate::cycle_manager::CYCLE_HANDOFF_TEMPLATE.to_string(),
cache_control: None,
}])),
tools: None,
tool_choice: None,
metadata: None,
thinking: None,
reasoning_effort: None,
stream: Some(false),
temperature: Some(0.2),
top_p: None,
};
let response = self.flash_client.create_message(request).await?;
let raw = response
.content
.iter()
.filter_map(|block| match block {
ContentBlock::Text { text, .. } => Some(text.as_str()),
_ => None,
})
.collect::<Vec<_>>()
.join("\n");
Ok(crate::cycle_manager::extract_carry_forward(&raw))
}
async fn summarize_messages(
&self,
messages: &[&Message],
level: u8,
start_idx: usize,
end_idx: usize,
) -> Result<String> {
let mut conversation = String::new();
for msg in messages {
let role = if msg.role == "user" {
"User"
} else {
"Assistant"
};
for block in &msg.content {
match block {
ContentBlock::Text { text, .. } => {
let snippet = truncate_chars(text, 800);
let _ = write!(conversation, "{role}: {snippet}\n\n");
}
ContentBlock::ToolUse { name, .. } => {
let _ = write!(conversation, "{role}: [Used tool: {name}]\n\n");
}
ContentBlock::ToolResult { content, .. } => {
let snippet = truncate_chars(content, 200);
let _ = write!(conversation, "Tool result: {snippet}\n\n");
}
ContentBlock::Thinking { .. } => {
}
ContentBlock::ServerToolUse { .. }
| ContentBlock::ToolSearchToolResult { .. }
| ContentBlock::CodeExecutionToolResult { .. } => {}
}
}
}
let (max_tokens, word_limit) = match level {
1 => (L1_MAX_TOKENS, 800),
2 => (L2_MAX_TOKENS, 600),
3 => (L3_MAX_TOKENS, 400),
_ => (L3_MAX_TOKENS, 400),
};
let request = MessageRequest {
model: self.config.seam_model.clone(),
messages: vec![Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: format!(
"Summarize the following conversation segment (messages {start_idx}-{end_idx}). \
Preserve: key decisions and their rationale, exact file paths, \
command invocations, error messages, tool-result facts, constraints \
discovered, hypotheses being tested, and open questions. \
Drop: greetings, filler, repeated information, and thinking blocks. \
Keep it under {word_limit} words.\n\n---\n\n{conversation}"
),
cache_control: None,
}],
}],
max_tokens,
system: Some(SystemPrompt::Text(
"You are a context summarization specialist. Produce dense, factual summaries \
that preserve every decision, path, error, constraint, and open question. \
Never omit a file path, error message, or decision rationale."
.to_string(),
)),
tools: None,
tool_choice: None,
metadata: None,
thinking: None,
reasoning_effort: None,
stream: Some(false),
temperature: Some(0.1),
top_p: None,
};
let response = self.flash_client.create_message(request).await?;
let summary = response
.content
.iter()
.filter_map(|block| match block {
ContentBlock::Text { text, .. } => Some(text.clone()),
_ => None,
})
.collect::<Vec<_>>()
.join("\n");
Ok(summary)
}
pub async fn collect_seam_texts(&self, messages: &[Message]) -> Vec<String> {
let _seams = self.active_seams.lock().await;
let mut texts = Vec::new();
for msg in messages {
if msg.role == "assistant" {
for block in &msg.content {
if let ContentBlock::Text { text, .. } = block
&& text.contains("<archived_context")
{
texts.push(text.clone());
}
}
}
}
texts
}
pub async fn highest_level(&self) -> Option<u8> {
let seams = self.active_seams.lock().await;
seams.last().map(|s| s.level)
}
pub async fn reset(&self) {
self.active_seams.lock().await.clear();
}
}
fn truncate_chars(text: &str, max_chars: usize) -> String {
if max_chars == 0 {
return String::new();
}
if text.chars().count() <= max_chars {
return text.to_string();
}
text.chars().take(max_chars).collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn seam_levels_fire_in_order() {
let config = SeamConfig::default();
assert!(config.enabled && 100_000 < config.l1_threshold);
assert!(192_000 >= config.l1_threshold);
assert!(384_000 >= config.l2_threshold);
assert!(576_000 >= config.l3_threshold);
}
#[test]
fn cycle_threshold_check() {
let config = SeamConfig::default();
assert!(768_000 >= config.cycle_threshold);
assert!(700_000 < config.cycle_threshold);
}
#[test]
fn verbatim_window_calculation() {
let config = SeamConfig {
verbatim_window_turns: 4,
..Default::default()
};
assert_eq!(20usize.saturating_sub(8), 12);
assert_eq!(8usize.saturating_sub(8), 0);
assert_eq!(4usize.saturating_sub(4), 0);
let _ = config;
}
#[test]
fn truncate_chars_handles_unicode() {
assert_eq!(truncate_chars("abc😀é", 3), "abc".to_string());
assert_eq!(truncate_chars("abc😀é", 4), "abc😀".to_string());
assert_eq!(truncate_chars("abc😀é", 10), "abc😀é".to_string());
assert_eq!(truncate_chars("", 5), "".to_string());
}
#[test]
fn disabled_config() {
let config = SeamConfig {
enabled: false,
..Default::default()
};
assert!(!config.enabled);
}
}