1use serde_json::Value;
4
5use crate::error::Result;
6use crate::model::SystemLanguageModel;
7use crate::options::GenerationOptions;
8use crate::session::Session;
9
10pub const DEFAULT_CONTEXT_TOKENS: usize = 4096;
19
20#[derive(Debug, Clone, Copy)]
22pub struct ContextLimit {
23 pub max_tokens: usize,
25 pub reserved_response_tokens: usize,
27 pub chars_per_token: usize,
29}
30
31impl ContextLimit {
32 pub fn new(max_tokens: usize) -> Self {
34 Self {
35 max_tokens,
36 reserved_response_tokens: 0,
37 chars_per_token: 4,
38 }
39 }
40
41 pub fn default_on_device() -> Self {
43 Self {
44 max_tokens: DEFAULT_CONTEXT_TOKENS,
45 reserved_response_tokens: 512,
46 chars_per_token: 4,
47 }
48 }
49
50 pub fn with_reserved_response_tokens(mut self, tokens: usize) -> Self {
52 self.reserved_response_tokens = tokens;
53 self
54 }
55
56 pub fn with_chars_per_token(mut self, chars: usize) -> Self {
58 if chars > 0 {
59 self.chars_per_token = chars;
60 }
61 self
62 }
63}
64
65#[derive(Debug, Clone, Copy)]
67pub struct ContextUsage {
68 pub estimated_tokens: usize,
70 pub max_tokens: usize,
72 pub reserved_response_tokens: usize,
74 pub available_tokens: usize,
76 pub utilization: f32,
78 pub over_limit: bool,
80}
81
82#[derive(Debug, Clone)]
84pub struct CompactionConfig {
85 pub chunk_tokens: usize,
87 pub max_summary_tokens: usize,
93 pub instructions: String,
95 pub summary_options: GenerationOptions,
97 pub chars_per_token: usize,
99}
100
101impl Default for CompactionConfig {
102 fn default() -> Self {
103 Self {
104 chunk_tokens: 800,
105 max_summary_tokens: 400,
106 instructions: "Summarize the conversation for future context. Preserve user intent, key facts, decisions, and open questions. Keep the summary concise."
107 .to_string(),
108 summary_options: GenerationOptions::builder()
109 .temperature(0.2)
110 .max_response_tokens(256)
111 .build(),
112 chars_per_token: 4,
113 }
114 }
115}
116
117pub fn context_usage_from_transcript(
119 transcript_json: &str,
120 limit: &ContextLimit,
121) -> Result<ContextUsage> {
122 let transcript_text = transcript_to_text(transcript_json)?;
123 let estimated_tokens = estimate_tokens(&transcript_text, limit.chars_per_token);
124 let available_tokens = limit
125 .max_tokens
126 .saturating_sub(limit.reserved_response_tokens);
127 let utilization = if limit.max_tokens == 0 {
128 0.0
129 } else {
130 estimated_tokens as f32 / limit.max_tokens as f32
131 };
132 let over_limit = estimated_tokens > available_tokens;
133
134 Ok(ContextUsage {
135 estimated_tokens,
136 max_tokens: limit.max_tokens,
137 reserved_response_tokens: limit.reserved_response_tokens,
138 available_tokens,
139 utilization,
140 over_limit,
141 })
142}
143
144pub fn compact_transcript(
146 model: &SystemLanguageModel,
147 transcript_json: &str,
148 config: &CompactionConfig,
149) -> Result<String> {
150 let transcript_text = transcript_to_text(transcript_json)?;
151 if transcript_text.trim().is_empty() {
152 return Ok(String::new());
153 }
154
155 let chunks = chunk_text(
156 &transcript_text,
157 config.chunk_tokens,
158 config.chars_per_token,
159 );
160
161 let mut summary = String::new();
162
163 for chunk in chunks {
164 let session = Session::with_instructions(model, &config.instructions)?;
165 let prompt = build_summary_prompt(
166 &summary,
167 &chunk,
168 config.max_summary_tokens,
169 config.chars_per_token,
170 );
171 let response = session.respond(&prompt, &config.summary_options)?;
172 summary = response.into_content();
173 }
174
175 Ok(summary)
176}
177
178pub fn transcript_to_text(transcript_json: &str) -> Result<String> {
180 let value: Value = serde_json::from_str(transcript_json)?;
181 let mut lines = Vec::new();
182 collect_transcript_lines(&value, &mut lines);
183
184 if lines.is_empty() {
185 Ok(transcript_json.to_string())
186 } else {
187 Ok(lines.join("\n"))
188 }
189}
190
191pub fn estimate_tokens(text: &str, chars_per_token: usize) -> usize {
193 let denom = chars_per_token.max(1);
194 let chars = text.chars().count();
195 chars.div_ceil(denom)
196}
197
198fn build_summary_prompt(
199 current_summary: &str,
200 chunk: &str,
201 max_summary_tokens: usize,
202 chars_per_token: usize,
203) -> String {
204 if current_summary.trim().is_empty() {
205 format!(
206 "Summarize the following conversation transcript:\n\n{chunk}\n\nReturn a concise summary."
207 )
208 } else {
209 let summary_tokens = estimate_tokens(current_summary, chars_per_token);
211 let truncated_summary = if summary_tokens > max_summary_tokens {
212 let max_chars = max_summary_tokens.saturating_mul(chars_per_token.max(1));
214 let char_count = current_summary.chars().count();
215 if char_count > max_chars {
216 let skip = char_count - max_chars;
217 format!(
218 "..{}",
219 current_summary.chars().skip(skip).collect::<String>()
220 )
221 } else {
222 current_summary.to_string()
223 }
224 } else {
225 current_summary.to_string()
226 };
227
228 format!(
229 "Update the summary with new conversation content.\n\nCurrent summary:\n{truncated_summary}\n\nNew transcript chunk:\n{chunk}\n\nReturn the updated concise summary."
230 )
231 }
232}
233
234fn chunk_text(text: &str, chunk_tokens: usize, chars_per_token: usize) -> Vec<String> {
235 let max_chars = chunk_tokens.max(1).saturating_mul(chars_per_token.max(1));
236 let mut chunks = Vec::new();
237 let mut current = String::new();
238
239 for line in text.lines() {
240 let line_len = line.chars().count() + 1;
241 if !current.is_empty() && current.chars().count() + line_len > max_chars {
242 chunks.push(current.trim_end().to_string());
243 current.clear();
244 }
245 current.push_str(line);
246 current.push('\n');
247 }
248
249 if !current.trim().is_empty() {
250 chunks.push(current.trim_end().to_string());
251 }
252
253 if chunks.is_empty() {
254 chunks.push(text.to_string());
255 }
256
257 chunks
258}
259
260fn collect_transcript_lines(value: &Value, out: &mut Vec<String>) {
261 match value {
262 Value::Array(items) => {
263 for item in items {
264 collect_transcript_lines(item, out);
265 }
266 }
267 Value::Object(map) => {
268 let mut processed_content = false;
270
271 if let Some(role) = map.get("role").and_then(Value::as_str) {
273 let content = map
274 .get("content")
275 .and_then(Value::as_str)
276 .or_else(|| map.get("text").and_then(Value::as_str));
277 if let Some(content) = content {
278 out.push(format!("{role}: {content}"));
279 processed_content = true;
280 }
281 }
282
283 for key in ["content", "text", "prompt", "response", "instructions"] {
285 if processed_content && matches!(key, "content" | "text") {
286 continue;
287 }
288 if let Some(text) = map.get(key).and_then(Value::as_str) {
289 out.push(text.to_string());
290 }
291 }
292
293 for (key, value) in map {
295 if matches!(
296 key.as_str(),
297 "role" | "content" | "text" | "prompt" | "response" | "instructions"
298 ) {
299 continue;
300 }
301 collect_transcript_lines(value, out);
302 }
303 }
304 _ => {}
305 }
306}
307
308#[cfg(test)]
309mod tests {
310 use super::*;
311
312 #[test]
313 fn test_estimate_tokens() {
314 let text = "abcd";
315 assert_eq!(estimate_tokens(text, 4), 1);
316 assert_eq!(estimate_tokens(text, 3), 2);
317 }
318
319 #[test]
320 fn test_chunk_text() {
321 let text = "Line one\nLine two\nLine three";
322 let chunks = chunk_text(text, 2, 4);
323 assert!(!chunks.is_empty());
324 }
325}