1use serde_json::Value;
4
5use crate::error::Result;
6use crate::model::SystemLanguageModel;
7use crate::options::GenerationOptions;
8use crate::session::Session;
9
10pub const DEFAULT_CONTEXT_TOKENS: usize = 4096;
19
20#[derive(Debug, Clone, Copy)]
22pub struct ContextLimit {
23 pub max_tokens: usize,
25 pub reserved_response_tokens: usize,
27 pub chars_per_token: usize,
29}
30
31impl ContextLimit {
32 pub fn new(max_tokens: usize) -> Self {
34 Self {
35 max_tokens,
36 reserved_response_tokens: 0,
37 chars_per_token: 4,
38 }
39 }
40
41 pub fn default_on_device() -> Self {
43 Self {
44 max_tokens: DEFAULT_CONTEXT_TOKENS,
45 reserved_response_tokens: 512,
46 chars_per_token: 4,
47 }
48 }
49
50 pub fn with_reserved_response_tokens(mut self, tokens: usize) -> Self {
52 self.reserved_response_tokens = tokens;
53 self
54 }
55
56 pub fn with_chars_per_token(mut self, chars: usize) -> Self {
58 if chars > 0 {
59 self.chars_per_token = chars;
60 }
61 self
62 }
63}
64
65#[derive(Debug, Clone, Copy)]
67pub struct ContextUsage {
68 pub estimated_tokens: usize,
70 pub max_tokens: usize,
72 pub reserved_response_tokens: usize,
74 pub available_tokens: usize,
76 pub utilization: f32,
78 pub over_limit: bool,
80}
81
82pub struct CompactedSession {
84 pub session: Session,
86 pub summary: String,
88}
89
90#[derive(Debug, Clone)]
92pub struct CompactionConfig {
93 pub chunk_tokens: usize,
95 pub max_summary_tokens: usize,
101 pub instructions: String,
103 pub summary_options: GenerationOptions,
105 pub chars_per_token: usize,
107}
108
109impl Default for CompactionConfig {
110 fn default() -> Self {
111 Self {
112 chunk_tokens: 800,
113 max_summary_tokens: 400,
114 instructions: "Summarize the conversation for future context. Preserve user intent, key facts, decisions, and open questions. Keep the summary concise."
115 .to_string(),
116 summary_options: GenerationOptions::builder()
117 .temperature(0.2)
118 .max_response_tokens(256)
119 .build(),
120 chars_per_token: 4,
121 }
122 }
123}
124
125pub fn context_usage_from_transcript(
127 transcript_json: &str,
128 limit: &ContextLimit,
129) -> Result<ContextUsage> {
130 let transcript_text = transcript_to_text(transcript_json)?;
131 let estimated_tokens = estimate_tokens(&transcript_text, limit.chars_per_token);
132 let available_tokens = limit
133 .max_tokens
134 .saturating_sub(limit.reserved_response_tokens);
135 let utilization = if limit.max_tokens == 0 {
136 0.0
137 } else {
138 estimated_tokens as f32 / limit.max_tokens as f32
139 };
140 let over_limit = estimated_tokens > available_tokens;
141
142 Ok(ContextUsage {
143 estimated_tokens,
144 max_tokens: limit.max_tokens,
145 reserved_response_tokens: limit.reserved_response_tokens,
146 available_tokens,
147 utilization,
148 over_limit,
149 })
150}
151
152pub fn compact_transcript(
154 model: &SystemLanguageModel,
155 transcript_json: &str,
156 config: &CompactionConfig,
157) -> Result<String> {
158 let transcript_text = transcript_to_text(transcript_json)?;
159 if transcript_text.trim().is_empty() {
160 return Ok(String::new());
161 }
162
163 let chunks = chunk_text(
164 &transcript_text,
165 config.chunk_tokens,
166 config.chars_per_token,
167 );
168
169 let mut summary = String::new();
170
171 for chunk in chunks {
172 let session = Session::with_instructions(model, &config.instructions)?;
173 let prompt = build_summary_prompt(
174 &summary,
175 &chunk,
176 config.max_summary_tokens,
177 config.chars_per_token,
178 );
179 let response = session.respond(&prompt, &config.summary_options)?;
180 summary = response.into_content();
181 }
182
183 Ok(summary)
184}
185
186pub fn compact_session_if_needed(
195 model: &SystemLanguageModel,
196 session: &Session,
197 limit: &ContextLimit,
198 config: &CompactionConfig,
199 base_instructions: Option<&str>,
200) -> Result<Option<CompactedSession>> {
201 let usage = session.context_usage(limit)?;
202 if !usage.over_limit {
203 return Ok(None);
204 }
205
206 let transcript_json = session.transcript_json()?;
207 let summary = compact_transcript(model, &transcript_json, config)?;
208 let compacted = session_from_summary(model, base_instructions, &summary)?;
209
210 Ok(Some(CompactedSession {
211 session: compacted,
212 summary,
213 }))
214}
215
216pub fn session_from_summary(
218 model: &SystemLanguageModel,
219 base_instructions: Option<&str>,
220 summary: &str,
221) -> Result<Session> {
222 match compacted_instructions(base_instructions, summary) {
223 Some(instructions) => Session::with_instructions(model, &instructions),
224 None => Session::new(model),
225 }
226}
227
228pub fn compacted_instructions(base_instructions: Option<&str>, summary: &str) -> Option<String> {
230 let base = base_instructions.map_or("", str::trim);
231 let summary = summary.trim();
232
233 match (base.is_empty(), summary.is_empty()) {
234 (true, true) => None,
235 (false, true) => Some(base.to_string()),
236 (true, false) => Some(format!("Conversation summary:\n{summary}")),
237 (false, false) => Some(format!("{base}\n\nConversation summary:\n{summary}")),
238 }
239}
240
241pub fn transcript_to_text(transcript_json: &str) -> Result<String> {
243 let value: Value = serde_json::from_str(transcript_json)?;
244 let mut lines = Vec::new();
245 collect_transcript_lines(&value, &mut lines);
246
247 if lines.is_empty() {
248 Ok(transcript_json.to_string())
249 } else {
250 Ok(lines.join("\n"))
251 }
252}
253
254pub fn estimate_tokens(text: &str, chars_per_token: usize) -> usize {
256 let denom = chars_per_token.max(1);
257 let chars = text.chars().count();
258 chars.div_ceil(denom)
259}
260
261fn build_summary_prompt(
262 current_summary: &str,
263 chunk: &str,
264 max_summary_tokens: usize,
265 chars_per_token: usize,
266) -> String {
267 if current_summary.trim().is_empty() {
268 format!(
269 "Summarize the following conversation transcript:\n\n{chunk}\n\nReturn a concise summary."
270 )
271 } else {
272 let summary_tokens = estimate_tokens(current_summary, chars_per_token);
274 let truncated_summary = if summary_tokens > max_summary_tokens {
275 let max_chars = max_summary_tokens.saturating_mul(chars_per_token.max(1));
277 let char_count = current_summary.chars().count();
278 if char_count > max_chars {
279 let skip = char_count - max_chars;
280 format!(
281 "..{}",
282 current_summary.chars().skip(skip).collect::<String>()
283 )
284 } else {
285 current_summary.to_string()
286 }
287 } else {
288 current_summary.to_string()
289 };
290
291 format!(
292 "Update the summary with new conversation content.\n\nCurrent summary:\n{truncated_summary}\n\nNew transcript chunk:\n{chunk}\n\nReturn the updated concise summary."
293 )
294 }
295}
296
297fn chunk_text(text: &str, chunk_tokens: usize, chars_per_token: usize) -> Vec<String> {
298 let max_chars = chunk_tokens.max(1).saturating_mul(chars_per_token.max(1));
299 let mut chunks = Vec::new();
300 let mut current = String::new();
301
302 for line in text.lines() {
303 let line_len = line.chars().count() + 1;
304 if !current.is_empty() && current.chars().count() + line_len > max_chars {
305 chunks.push(current.trim_end().to_string());
306 current.clear();
307 }
308 current.push_str(line);
309 current.push('\n');
310 }
311
312 if !current.trim().is_empty() {
313 chunks.push(current.trim_end().to_string());
314 }
315
316 if chunks.is_empty() {
317 chunks.push(text.to_string());
318 }
319
320 chunks
321}
322
323fn collect_transcript_lines(value: &Value, out: &mut Vec<String>) {
324 match value {
325 Value::Array(items) => {
326 for item in items {
327 collect_transcript_lines(item, out);
328 }
329 }
330 Value::Object(map) => {
331 let mut processed_content = false;
333
334 if let Some(role) = map.get("role").and_then(Value::as_str) {
336 let content = map
337 .get("content")
338 .and_then(Value::as_str)
339 .or_else(|| map.get("text").and_then(Value::as_str));
340 if let Some(content) = content {
341 out.push(format!("{role}: {content}"));
342 processed_content = true;
343 }
344 }
345
346 for key in ["content", "text", "prompt", "response", "instructions"] {
348 if processed_content && matches!(key, "content" | "text") {
349 continue;
350 }
351 if let Some(text) = map.get(key).and_then(Value::as_str) {
352 out.push(text.to_string());
353 }
354 }
355
356 for (key, value) in map {
358 if matches!(
359 key.as_str(),
360 "role" | "content" | "text" | "prompt" | "response" | "instructions"
361 ) {
362 continue;
363 }
364 collect_transcript_lines(value, out);
365 }
366 }
367 _ => {}
368 }
369}
370
371#[cfg(test)]
372mod tests {
373 use super::*;
374
375 #[test]
376 fn test_estimate_tokens() {
377 let text = "abcd";
378 assert_eq!(estimate_tokens(text, 4), 1);
379 assert_eq!(estimate_tokens(text, 3), 2);
380 }
381
382 #[test]
383 fn test_chunk_text() {
384 let text = "Line one\nLine two\nLine three";
385 let chunks = chunk_text(text, 2, 4);
386 assert!(!chunks.is_empty());
387 }
388
389 #[test]
390 fn test_compacted_instructions() {
391 assert_eq!(compacted_instructions(None, ""), None);
392 assert_eq!(
393 compacted_instructions(Some("You are helpful."), ""),
394 Some("You are helpful.".to_string())
395 );
396 assert_eq!(
397 compacted_instructions(None, "Summary body"),
398 Some("Conversation summary:\nSummary body".to_string())
399 );
400 assert_eq!(
401 compacted_instructions(Some("You are helpful."), "Summary body"),
402 Some("You are helpful.\n\nConversation summary:\nSummary body".to_string())
403 );
404 }
405}