1use serde_json::Value;
4
5use crate::error::Result;
6use crate::model::SystemLanguageModel;
7use crate::options::GenerationOptions;
8use crate::session::Session;
9
10pub const DEFAULT_CONTEXT_TOKENS: usize = 4096;
19
20#[derive(Debug, Clone, Copy)]
22pub struct ContextLimit {
23 pub max_tokens: usize,
25 pub reserved_response_tokens: usize,
27 pub chars_per_token: usize,
29}
30
31impl ContextLimit {
32 pub fn new(max_tokens: usize) -> Self {
34 Self {
35 max_tokens,
36 reserved_response_tokens: 0,
37 chars_per_token: 4,
38 }
39 }
40
41 pub fn default_on_device() -> Self {
43 Self {
44 max_tokens: DEFAULT_CONTEXT_TOKENS,
45 reserved_response_tokens: 512,
46 chars_per_token: 4,
47 }
48 }
49
50 pub fn with_reserved_response_tokens(mut self, tokens: usize) -> Self {
52 self.reserved_response_tokens = tokens;
53 self
54 }
55
56 pub fn with_chars_per_token(mut self, chars: usize) -> Self {
58 if chars > 0 {
59 self.chars_per_token = chars;
60 }
61 self
62 }
63}
64
65#[derive(Debug, Clone, Copy)]
67pub struct ContextUsage {
68 pub estimated_tokens: usize,
70 pub max_tokens: usize,
72 pub reserved_response_tokens: usize,
74 pub available_tokens: usize,
76 pub utilization: f32,
78 pub over_limit: bool,
80}
81
82#[derive(Debug, Clone)]
84pub struct CompactionConfig {
85 pub chunk_tokens: usize,
87 pub instructions: String,
89 pub summary_options: GenerationOptions,
91 pub chars_per_token: usize,
93}
94
95impl Default for CompactionConfig {
96 fn default() -> Self {
97 Self {
98 chunk_tokens: 800,
99 instructions: "Summarize the conversation for future context. Preserve user intent, key facts, decisions, and open questions. Keep the summary concise."
100 .to_string(),
101 summary_options: GenerationOptions::builder()
102 .temperature(0.2)
103 .max_response_tokens(256)
104 .build(),
105 chars_per_token: 4,
106 }
107 }
108}
109
110pub fn context_usage_from_transcript(
112 transcript_json: &str,
113 limit: &ContextLimit,
114) -> Result<ContextUsage> {
115 let transcript_text = transcript_to_text(transcript_json)?;
116 let estimated_tokens = estimate_tokens(&transcript_text, limit.chars_per_token);
117 let available_tokens = limit
118 .max_tokens
119 .saturating_sub(limit.reserved_response_tokens);
120 let utilization = if limit.max_tokens == 0 {
121 0.0
122 } else {
123 estimated_tokens as f32 / limit.max_tokens as f32
124 };
125 let over_limit = estimated_tokens > available_tokens;
126
127 Ok(ContextUsage {
128 estimated_tokens,
129 max_tokens: limit.max_tokens,
130 reserved_response_tokens: limit.reserved_response_tokens,
131 available_tokens,
132 utilization,
133 over_limit,
134 })
135}
136
137pub fn compact_transcript(
139 model: &SystemLanguageModel,
140 transcript_json: &str,
141 config: &CompactionConfig,
142) -> Result<String> {
143 let transcript_text = transcript_to_text(transcript_json)?;
144 if transcript_text.trim().is_empty() {
145 return Ok(String::new());
146 }
147
148 let chunks = chunk_text(
149 &transcript_text,
150 config.chunk_tokens,
151 config.chars_per_token,
152 );
153
154 let mut summary = String::new();
155
156 for chunk in chunks {
157 let session = Session::with_instructions(model, &config.instructions)?;
158 let prompt = build_summary_prompt(&summary, &chunk);
159 let response = session.respond(&prompt, &config.summary_options)?;
160 summary = response.into_content();
161 }
162
163 Ok(summary)
164}
165
166pub fn transcript_to_text(transcript_json: &str) -> Result<String> {
168 let value: Value = serde_json::from_str(transcript_json)?;
169 let mut lines = Vec::new();
170 collect_transcript_lines(&value, &mut lines);
171
172 if lines.is_empty() {
173 Ok(transcript_json.to_string())
174 } else {
175 Ok(lines.join("\n"))
176 }
177}
178
179pub fn estimate_tokens(text: &str, chars_per_token: usize) -> usize {
181 let denom = chars_per_token.max(1);
182 let chars = text.chars().count();
183 chars.div_ceil(denom)
184}
185
186fn build_summary_prompt(current_summary: &str, chunk: &str) -> String {
187 if current_summary.trim().is_empty() {
188 format!(
189 "Summarize the following conversation transcript:\n\n{chunk}\n\nReturn a concise summary."
190 )
191 } else {
192 format!(
193 "Update the summary with new conversation content.\n\nCurrent summary:\n{current_summary}\n\nNew transcript chunk:\n{chunk}\n\nReturn the updated concise summary."
194 )
195 }
196}
197
198fn chunk_text(text: &str, chunk_tokens: usize, chars_per_token: usize) -> Vec<String> {
199 let max_chars = chunk_tokens.max(1).saturating_mul(chars_per_token.max(1));
200 let mut chunks = Vec::new();
201 let mut current = String::new();
202
203 for line in text.lines() {
204 let line_len = line.chars().count() + 1;
205 if !current.is_empty() && current.chars().count() + line_len > max_chars {
206 chunks.push(current.trim_end().to_string());
207 current.clear();
208 }
209 current.push_str(line);
210 current.push('\n');
211 }
212
213 if !current.trim().is_empty() {
214 chunks.push(current.trim_end().to_string());
215 }
216
217 if chunks.is_empty() {
218 chunks.push(text.to_string());
219 }
220
221 chunks
222}
223
224fn collect_transcript_lines(value: &Value, out: &mut Vec<String>) {
225 match value {
226 Value::Array(items) => {
227 for item in items {
228 collect_transcript_lines(item, out);
229 }
230 }
231 Value::Object(map) => {
232 let mut processed_content = false;
234
235 if let Some(role) = map.get("role").and_then(Value::as_str) {
237 let content = map
238 .get("content")
239 .and_then(Value::as_str)
240 .or_else(|| map.get("text").and_then(Value::as_str));
241 if let Some(content) = content {
242 out.push(format!("{role}: {content}"));
243 processed_content = true;
244 }
245 }
246
247 for key in ["content", "text", "prompt", "response", "instructions"] {
249 if processed_content && matches!(key, "content" | "text") {
250 continue;
251 }
252 if let Some(text) = map.get(key).and_then(Value::as_str) {
253 out.push(text.to_string());
254 }
255 }
256
257 for (key, value) in map {
259 if matches!(
260 key.as_str(),
261 "role" | "content" | "text" | "prompt" | "response" | "instructions"
262 ) {
263 continue;
264 }
265 collect_transcript_lines(value, out);
266 }
267 }
268 _ => {}
269 }
270}
271
272#[cfg(test)]
273mod tests {
274 use super::*;
275
276 #[test]
277 fn test_estimate_tokens() {
278 let text = "abcd";
279 assert_eq!(estimate_tokens(text, 4), 1);
280 assert_eq!(estimate_tokens(text, 3), 2);
281 }
282
283 #[test]
284 fn test_chunk_text() {
285 let text = "Line one\nLine two\nLine three";
286 let chunks = chunk_text(text, 2, 4);
287 assert!(!chunks.is_empty());
288 }
289}