use serde_json::Value;
use crate::error::Result;
use crate::model::SystemLanguageModel;
use crate::options::GenerationOptions;
use crate::session::Session;
pub const DEFAULT_CONTEXT_TOKENS: usize = 4096;
#[derive(Debug, Clone, Copy)]
pub struct ContextLimit {
pub max_tokens: usize,
pub reserved_response_tokens: usize,
pub chars_per_token: usize,
}
impl ContextLimit {
pub fn new(max_tokens: usize) -> Self {
Self {
max_tokens,
reserved_response_tokens: 0,
chars_per_token: 4,
}
}
pub fn default_on_device() -> Self {
Self {
max_tokens: DEFAULT_CONTEXT_TOKENS,
reserved_response_tokens: 512,
chars_per_token: 4,
}
}
pub fn with_reserved_response_tokens(mut self, tokens: usize) -> Self {
self.reserved_response_tokens = tokens;
self
}
pub fn with_chars_per_token(mut self, chars: usize) -> Self {
if chars > 0 {
self.chars_per_token = chars;
}
self
}
}
#[derive(Debug, Clone, Copy)]
pub struct ContextUsage {
pub estimated_tokens: usize,
pub max_tokens: usize,
pub reserved_response_tokens: usize,
pub available_tokens: usize,
pub utilization: f32,
pub over_limit: bool,
}
pub struct CompactedSession {
pub session: Session,
pub summary: String,
}
#[derive(Debug, Clone)]
pub struct CompactionConfig {
pub chunk_tokens: usize,
pub max_summary_tokens: usize,
pub instructions: String,
pub summary_options: GenerationOptions,
pub chars_per_token: usize,
}
impl Default for CompactionConfig {
fn default() -> Self {
Self {
chunk_tokens: 800,
max_summary_tokens: 400,
instructions: "Summarize the conversation for future context. Preserve user intent, key facts, decisions, and open questions. Keep the summary concise."
.to_string(),
summary_options: GenerationOptions::builder()
.temperature(0.2)
.max_response_tokens(256)
.build(),
chars_per_token: 4,
}
}
}
pub fn context_usage_from_transcript(
transcript_json: &str,
limit: &ContextLimit,
) -> Result<ContextUsage> {
let transcript_text = transcript_to_text(transcript_json)?;
let estimated_tokens = estimate_tokens(&transcript_text, limit.chars_per_token);
let available_tokens = limit
.max_tokens
.saturating_sub(limit.reserved_response_tokens);
let utilization = if limit.max_tokens == 0 {
0.0
} else {
estimated_tokens as f32 / limit.max_tokens as f32
};
let over_limit = estimated_tokens > available_tokens;
Ok(ContextUsage {
estimated_tokens,
max_tokens: limit.max_tokens,
reserved_response_tokens: limit.reserved_response_tokens,
available_tokens,
utilization,
over_limit,
})
}
pub fn compact_transcript(
model: &SystemLanguageModel,
transcript_json: &str,
config: &CompactionConfig,
) -> Result<String> {
let transcript_text = transcript_to_text(transcript_json)?;
if transcript_text.trim().is_empty() {
return Ok(String::new());
}
let chunks = chunk_text(
&transcript_text,
config.chunk_tokens,
config.chars_per_token,
);
let mut summary = String::new();
for chunk in chunks {
let session = Session::with_instructions(model, &config.instructions)?;
let prompt = build_summary_prompt(
&summary,
&chunk,
config.max_summary_tokens,
config.chars_per_token,
);
let response = session.respond(&prompt, &config.summary_options)?;
summary = response.into_content();
}
Ok(summary)
}
pub fn compact_session_if_needed(
model: &SystemLanguageModel,
session: &Session,
limit: &ContextLimit,
config: &CompactionConfig,
base_instructions: Option<&str>,
) -> Result<Option<CompactedSession>> {
let usage = session.context_usage(limit)?;
if !usage.over_limit {
return Ok(None);
}
let transcript_json = session.transcript_json()?;
let summary = compact_transcript(model, &transcript_json, config)?;
let compacted = session_from_summary(model, base_instructions, &summary)?;
Ok(Some(CompactedSession {
session: compacted,
summary,
}))
}
pub fn session_from_summary(
model: &SystemLanguageModel,
base_instructions: Option<&str>,
summary: &str,
) -> Result<Session> {
match compacted_instructions(base_instructions, summary) {
Some(instructions) => Session::with_instructions(model, &instructions),
None => Session::new(model),
}
}
pub fn compacted_instructions(base_instructions: Option<&str>, summary: &str) -> Option<String> {
let base = base_instructions.map_or("", str::trim);
let summary = summary.trim();
match (base.is_empty(), summary.is_empty()) {
(true, true) => None,
(false, true) => Some(base.to_string()),
(true, false) => Some(format!("Conversation summary:\n{summary}")),
(false, false) => Some(format!("{base}\n\nConversation summary:\n{summary}")),
}
}
pub fn transcript_to_text(transcript_json: &str) -> Result<String> {
let value: Value = serde_json::from_str(transcript_json)?;
let mut lines = Vec::new();
collect_transcript_lines(&value, &mut lines);
if lines.is_empty() {
Ok(transcript_json.to_string())
} else {
Ok(lines.join("\n"))
}
}
pub fn estimate_tokens(text: &str, chars_per_token: usize) -> usize {
let denom = chars_per_token.max(1);
let chars = text.chars().count();
chars.div_ceil(denom)
}
fn build_summary_prompt(
current_summary: &str,
chunk: &str,
max_summary_tokens: usize,
chars_per_token: usize,
) -> String {
if current_summary.trim().is_empty() {
format!(
"Summarize the following conversation transcript:\n\n{chunk}\n\nReturn a concise summary."
)
} else {
let summary_tokens = estimate_tokens(current_summary, chars_per_token);
let truncated_summary = if summary_tokens > max_summary_tokens {
let max_chars = max_summary_tokens.saturating_mul(chars_per_token.max(1));
let char_count = current_summary.chars().count();
if char_count > max_chars {
let skip = char_count - max_chars;
format!(
"..{}",
current_summary.chars().skip(skip).collect::<String>()
)
} else {
current_summary.to_string()
}
} else {
current_summary.to_string()
};
format!(
"Update the summary with new conversation content.\n\nCurrent summary:\n{truncated_summary}\n\nNew transcript chunk:\n{chunk}\n\nReturn the updated concise summary."
)
}
}
fn chunk_text(text: &str, chunk_tokens: usize, chars_per_token: usize) -> Vec<String> {
let max_chars = chunk_tokens.max(1).saturating_mul(chars_per_token.max(1));
let mut chunks = Vec::new();
let mut current = String::new();
for line in text.lines() {
let line_len = line.chars().count() + 1;
if !current.is_empty() && current.chars().count() + line_len > max_chars {
chunks.push(current.trim_end().to_string());
current.clear();
}
current.push_str(line);
current.push('\n');
}
if !current.trim().is_empty() {
chunks.push(current.trim_end().to_string());
}
if chunks.is_empty() {
chunks.push(text.to_string());
}
chunks
}
fn collect_transcript_lines(value: &Value, out: &mut Vec<String>) {
match value {
Value::Array(items) => {
for item in items {
collect_transcript_lines(item, out);
}
}
Value::Object(map) => {
let mut processed_content = false;
if let Some(role) = map.get("role").and_then(Value::as_str) {
let content = map
.get("content")
.and_then(Value::as_str)
.or_else(|| map.get("text").and_then(Value::as_str));
if let Some(content) = content {
out.push(format!("{role}: {content}"));
processed_content = true;
}
}
for key in ["content", "text", "prompt", "response", "instructions"] {
if processed_content && matches!(key, "content" | "text") {
continue;
}
if let Some(text) = map.get(key).and_then(Value::as_str) {
out.push(text.to_string());
}
}
for (key, value) in map {
if matches!(
key.as_str(),
"role" | "content" | "text" | "prompt" | "response" | "instructions"
) {
continue;
}
collect_transcript_lines(value, out);
}
}
_ => {}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_estimate_tokens() {
let text = "abcd";
assert_eq!(estimate_tokens(text, 4), 1);
assert_eq!(estimate_tokens(text, 3), 2);
}
#[test]
fn test_chunk_text() {
let text = "Line one\nLine two\nLine three";
let chunks = chunk_text(text, 2, 4);
assert!(!chunks.is_empty());
}
#[test]
fn test_compacted_instructions() {
assert_eq!(compacted_instructions(None, ""), None);
assert_eq!(
compacted_instructions(Some("You are helpful."), ""),
Some("You are helpful.".to_string())
);
assert_eq!(
compacted_instructions(None, "Summary body"),
Some("Conversation summary:\nSummary body".to_string())
);
assert_eq!(
compacted_instructions(Some("You are helpful."), "Summary body"),
Some("You are helpful.\n\nConversation summary:\nSummary body".to_string())
);
}
}