Skip to main content

fm_rs/
context.rs

1//! Context window tracking and compaction helpers.
2
3use serde_json::Value;
4
5use crate::error::Result;
6use crate::model::SystemLanguageModel;
7use crate::options::GenerationOptions;
8use crate::session::Session;
9
10/// Default context window size for Apple's on-device Foundation Models.
11///
12/// This value is based on observed behavior during WWDC 2025 sessions and early
13/// developer testing. Apple has not officially documented the context window size.
14/// The actual limit may vary by device, model version, or available memory.
15///
16/// For production use, monitor [`ContextUsage::utilization`] and implement
17/// compaction strategies when approaching the limit.
18pub const DEFAULT_CONTEXT_TOKENS: usize = 4096;
19
20/// Configuration for estimating context usage.
21#[derive(Debug, Clone, Copy)]
22pub struct ContextLimit {
23    /// Maximum tokens available in the session context window.
24    pub max_tokens: usize,
25    /// Tokens reserved for the model's next response.
26    pub reserved_response_tokens: usize,
27    /// Estimated characters per token (English ~3-4, CJK ~1).
28    pub chars_per_token: usize,
29}
30
31impl ContextLimit {
32    /// Creates a new context limit with a max token budget.
33    pub fn new(max_tokens: usize) -> Self {
34        Self {
35            max_tokens,
36            reserved_response_tokens: 0,
37            chars_per_token: 4,
38        }
39    }
40
41    /// Creates a default configuration for on-device models.
42    pub fn default_on_device() -> Self {
43        Self {
44            max_tokens: DEFAULT_CONTEXT_TOKENS,
45            reserved_response_tokens: 512,
46            chars_per_token: 4,
47        }
48    }
49
50    /// Sets the reserved response tokens.
51    pub fn with_reserved_response_tokens(mut self, tokens: usize) -> Self {
52        self.reserved_response_tokens = tokens;
53        self
54    }
55
56    /// Sets the character-per-token estimate.
57    pub fn with_chars_per_token(mut self, chars: usize) -> Self {
58        if chars > 0 {
59            self.chars_per_token = chars;
60        }
61        self
62    }
63}
64
65/// Estimated context usage for a session.
66#[derive(Debug, Clone, Copy)]
67pub struct ContextUsage {
68    /// Estimated number of tokens consumed by the transcript.
69    pub estimated_tokens: usize,
70    /// Maximum tokens configured for the session.
71    pub max_tokens: usize,
72    /// Tokens reserved for the next response.
73    pub reserved_response_tokens: usize,
74    /// Estimated tokens available for prompts before hitting the limit.
75    pub available_tokens: usize,
76    /// Estimated utilization ratio (0.0 - 1.0+).
77    pub utilization: f32,
78    /// Whether the estimate exceeds the available budget.
79    pub over_limit: bool,
80}
81
82/// Result of compacting a session into a new summarized session.
83pub struct CompactedSession {
84    /// Newly created session seeded with compacted summary instructions.
85    pub session: Session,
86    /// Compacted summary generated from the prior transcript.
87    pub summary: String,
88}
89
90/// Configuration for transcript compaction.
91#[derive(Debug, Clone)]
92pub struct CompactionConfig {
93    /// Estimated tokens per chunk sent to the summarizer.
94    pub chunk_tokens: usize,
95    /// Maximum tokens allowed for the rolling summary.
96    ///
97    /// As chunks are processed, the running summary can grow unbounded.
98    /// This limit ensures the summary is truncated to avoid exceeding
99    /// the model's context window during multi-chunk compaction.
100    pub max_summary_tokens: usize,
101    /// Instructions for the summarizer session.
102    pub instructions: String,
103    /// Options used for summary generation.
104    pub summary_options: GenerationOptions,
105    /// Estimated characters per token.
106    pub chars_per_token: usize,
107}
108
109impl Default for CompactionConfig {
110    fn default() -> Self {
111        Self {
112            chunk_tokens: 800,
113            max_summary_tokens: 400,
114            instructions: "Summarize the conversation for future context. Preserve user intent, key facts, decisions, and open questions. Keep the summary concise."
115                .to_string(),
116            summary_options: GenerationOptions::builder()
117                .temperature(0.2)
118                .max_response_tokens(256)
119                .build(),
120            chars_per_token: 4,
121        }
122    }
123}
124
125/// Estimates token usage for the session transcript JSON.
126pub fn context_usage_from_transcript(
127    transcript_json: &str,
128    limit: &ContextLimit,
129) -> Result<ContextUsage> {
130    let transcript_text = transcript_to_text(transcript_json)?;
131    let estimated_tokens = estimate_tokens(&transcript_text, limit.chars_per_token);
132    let available_tokens = limit
133        .max_tokens
134        .saturating_sub(limit.reserved_response_tokens);
135    let utilization = if limit.max_tokens == 0 {
136        0.0
137    } else {
138        estimated_tokens as f32 / limit.max_tokens as f32
139    };
140    let over_limit = estimated_tokens > available_tokens;
141
142    Ok(ContextUsage {
143        estimated_tokens,
144        max_tokens: limit.max_tokens,
145        reserved_response_tokens: limit.reserved_response_tokens,
146        available_tokens,
147        utilization,
148        over_limit,
149    })
150}
151
152/// Compacts a transcript into a summary using the on-device model.
153pub fn compact_transcript(
154    model: &SystemLanguageModel,
155    transcript_json: &str,
156    config: &CompactionConfig,
157) -> Result<String> {
158    let transcript_text = transcript_to_text(transcript_json)?;
159    if transcript_text.trim().is_empty() {
160        return Ok(String::new());
161    }
162
163    let chunks = chunk_text(
164        &transcript_text,
165        config.chunk_tokens,
166        config.chars_per_token,
167    );
168
169    let mut summary = String::new();
170
171    for chunk in chunks {
172        let session = Session::with_instructions(model, &config.instructions)?;
173        let prompt = build_summary_prompt(
174            &summary,
175            &chunk,
176            config.max_summary_tokens,
177            config.chars_per_token,
178        );
179        let response = session.respond(&prompt, &config.summary_options)?;
180        summary = response.into_content();
181    }
182
183    Ok(summary)
184}
185
186/// Compacts a session transcript and creates a new summarized session when needed.
187///
188/// This helper implements a common context-window rollover pattern:
189/// 1. Estimate usage from the current session transcript.
190/// 2. If still within budget, return `Ok(None)`.
191/// 3. If over budget, summarize the transcript and return a fresh `Session`.
192///
193/// `base_instructions` are prepended to the generated summary in the compacted session.
194pub fn compact_session_if_needed(
195    model: &SystemLanguageModel,
196    session: &Session,
197    limit: &ContextLimit,
198    config: &CompactionConfig,
199    base_instructions: Option<&str>,
200) -> Result<Option<CompactedSession>> {
201    let usage = session.context_usage(limit)?;
202    if !usage.over_limit {
203        return Ok(None);
204    }
205
206    let transcript_json = session.transcript_json()?;
207    let summary = compact_transcript(model, &transcript_json, config)?;
208    let compacted = session_from_summary(model, base_instructions, &summary)?;
209
210    Ok(Some(CompactedSession {
211        session: compacted,
212        summary,
213    }))
214}
215
216/// Creates a new session from optional base instructions and a conversation summary.
217pub fn session_from_summary(
218    model: &SystemLanguageModel,
219    base_instructions: Option<&str>,
220    summary: &str,
221) -> Result<Session> {
222    match compacted_instructions(base_instructions, summary) {
223        Some(instructions) => Session::with_instructions(model, &instructions),
224        None => Session::new(model),
225    }
226}
227
228/// Builds instructions text for a compacted session.
229pub fn compacted_instructions(base_instructions: Option<&str>, summary: &str) -> Option<String> {
230    let base = base_instructions.map_or("", str::trim);
231    let summary = summary.trim();
232
233    match (base.is_empty(), summary.is_empty()) {
234        (true, true) => None,
235        (false, true) => Some(base.to_string()),
236        (true, false) => Some(format!("Conversation summary:\n{summary}")),
237        (false, false) => Some(format!("{base}\n\nConversation summary:\n{summary}")),
238    }
239}
240
241/// Extracts readable text from transcript JSON.
242pub fn transcript_to_text(transcript_json: &str) -> Result<String> {
243    let value: Value = serde_json::from_str(transcript_json)?;
244    let mut lines = Vec::new();
245    collect_transcript_lines(&value, &mut lines);
246
247    if lines.is_empty() {
248        Ok(transcript_json.to_string())
249    } else {
250        Ok(lines.join("\n"))
251    }
252}
253
254/// Estimates tokens based on a characters-per-token heuristic.
255pub fn estimate_tokens(text: &str, chars_per_token: usize) -> usize {
256    let denom = chars_per_token.max(1);
257    let chars = text.chars().count();
258    chars.div_ceil(denom)
259}
260
261fn build_summary_prompt(
262    current_summary: &str,
263    chunk: &str,
264    max_summary_tokens: usize,
265    chars_per_token: usize,
266) -> String {
267    if current_summary.trim().is_empty() {
268        format!(
269            "Summarize the following conversation transcript:\n\n{chunk}\n\nReturn a concise summary."
270        )
271    } else {
272        // Truncate summary if it exceeds the token limit to prevent unbounded growth
273        let summary_tokens = estimate_tokens(current_summary, chars_per_token);
274        let truncated_summary = if summary_tokens > max_summary_tokens {
275            // Keep the end of the summary to preserve recent context
276            let max_chars = max_summary_tokens.saturating_mul(chars_per_token.max(1));
277            let char_count = current_summary.chars().count();
278            if char_count > max_chars {
279                let skip = char_count - max_chars;
280                format!(
281                    "..{}",
282                    current_summary.chars().skip(skip).collect::<String>()
283                )
284            } else {
285                current_summary.to_string()
286            }
287        } else {
288            current_summary.to_string()
289        };
290
291        format!(
292            "Update the summary with new conversation content.\n\nCurrent summary:\n{truncated_summary}\n\nNew transcript chunk:\n{chunk}\n\nReturn the updated concise summary."
293        )
294    }
295}
296
297fn chunk_text(text: &str, chunk_tokens: usize, chars_per_token: usize) -> Vec<String> {
298    let max_chars = chunk_tokens.max(1).saturating_mul(chars_per_token.max(1));
299    let mut chunks = Vec::new();
300    let mut current = String::new();
301
302    for line in text.lines() {
303        let line_len = line.chars().count() + 1;
304        if !current.is_empty() && current.chars().count() + line_len > max_chars {
305            chunks.push(current.trim_end().to_string());
306            current.clear();
307        }
308        current.push_str(line);
309        current.push('\n');
310    }
311
312    if !current.trim().is_empty() {
313        chunks.push(current.trim_end().to_string());
314    }
315
316    if chunks.is_empty() {
317        chunks.push(text.to_string());
318    }
319
320    chunks
321}
322
323fn collect_transcript_lines(value: &Value, out: &mut Vec<String>) {
324    match value {
325        Value::Array(items) => {
326            for item in items {
327                collect_transcript_lines(item, out);
328            }
329        }
330        Value::Object(map) => {
331            // Track which keys we've already processed to avoid double-counting
332            let mut processed_content = false;
333
334            // If this is a message with role+content, add as "{role}: {content}"
335            if let Some(role) = map.get("role").and_then(Value::as_str) {
336                let content = map
337                    .get("content")
338                    .and_then(Value::as_str)
339                    .or_else(|| map.get("text").and_then(Value::as_str));
340                if let Some(content) = content {
341                    out.push(format!("{role}: {content}"));
342                    processed_content = true;
343                }
344            }
345
346            // Add standalone text fields, skipping content/text if already included above
347            for key in ["content", "text", "prompt", "response", "instructions"] {
348                if processed_content && matches!(key, "content" | "text") {
349                    continue;
350                }
351                if let Some(text) = map.get(key).and_then(Value::as_str) {
352                    out.push(text.to_string());
353                }
354            }
355
356            // Recurse into other fields
357            for (key, value) in map {
358                if matches!(
359                    key.as_str(),
360                    "role" | "content" | "text" | "prompt" | "response" | "instructions"
361                ) {
362                    continue;
363                }
364                collect_transcript_lines(value, out);
365            }
366        }
367        _ => {}
368    }
369}
370
371#[cfg(test)]
372mod tests {
373    use super::*;
374
375    #[test]
376    fn test_estimate_tokens() {
377        let text = "abcd";
378        assert_eq!(estimate_tokens(text, 4), 1);
379        assert_eq!(estimate_tokens(text, 3), 2);
380    }
381
382    #[test]
383    fn test_chunk_text() {
384        let text = "Line one\nLine two\nLine three";
385        let chunks = chunk_text(text, 2, 4);
386        assert!(!chunks.is_empty());
387    }
388
389    #[test]
390    fn test_compacted_instructions() {
391        assert_eq!(compacted_instructions(None, ""), None);
392        assert_eq!(
393            compacted_instructions(Some("You are helpful."), ""),
394            Some("You are helpful.".to_string())
395        );
396        assert_eq!(
397            compacted_instructions(None, "Summary body"),
398            Some("Conversation summary:\nSummary body".to_string())
399        );
400        assert_eq!(
401            compacted_instructions(Some("You are helpful."), "Summary body"),
402            Some("You are helpful.\n\nConversation summary:\nSummary body".to_string())
403        );
404    }
405}