fm_rs/
context.rs

1//! Context window tracking and compaction helpers.
2
3use serde_json::Value;
4
5use crate::error::Result;
6use crate::model::SystemLanguageModel;
7use crate::options::GenerationOptions;
8use crate::session::Session;
9
10/// Default context window size for Apple's on-device Foundation Models.
11///
12/// This value is based on observed behavior during WWDC 2025 sessions and early
13/// developer testing. Apple has not officially documented the context window size.
14/// The actual limit may vary by device, model version, or available memory.
15///
16/// For production use, monitor [`ContextUsage::utilization`] and implement
17/// compaction strategies when approaching the limit.
18pub const DEFAULT_CONTEXT_TOKENS: usize = 4096;
19
20/// Configuration for estimating context usage.
21#[derive(Debug, Clone, Copy)]
22pub struct ContextLimit {
23    /// Maximum tokens available in the session context window.
24    pub max_tokens: usize,
25    /// Tokens reserved for the model's next response.
26    pub reserved_response_tokens: usize,
27    /// Estimated characters per token (English ~3-4, CJK ~1).
28    pub chars_per_token: usize,
29}
30
31impl ContextLimit {
32    /// Creates a new context limit with a max token budget.
33    pub fn new(max_tokens: usize) -> Self {
34        Self {
35            max_tokens,
36            reserved_response_tokens: 0,
37            chars_per_token: 4,
38        }
39    }
40
41    /// Creates a default configuration for on-device models.
42    pub fn default_on_device() -> Self {
43        Self {
44            max_tokens: DEFAULT_CONTEXT_TOKENS,
45            reserved_response_tokens: 512,
46            chars_per_token: 4,
47        }
48    }
49
50    /// Sets the reserved response tokens.
51    pub fn with_reserved_response_tokens(mut self, tokens: usize) -> Self {
52        self.reserved_response_tokens = tokens;
53        self
54    }
55
56    /// Sets the character-per-token estimate.
57    pub fn with_chars_per_token(mut self, chars: usize) -> Self {
58        if chars > 0 {
59            self.chars_per_token = chars;
60        }
61        self
62    }
63}
64
65/// Estimated context usage for a session.
66#[derive(Debug, Clone, Copy)]
67pub struct ContextUsage {
68    /// Estimated number of tokens consumed by the transcript.
69    pub estimated_tokens: usize,
70    /// Maximum tokens configured for the session.
71    pub max_tokens: usize,
72    /// Tokens reserved for the next response.
73    pub reserved_response_tokens: usize,
74    /// Estimated tokens available for prompts before hitting the limit.
75    pub available_tokens: usize,
76    /// Estimated utilization ratio (0.0 - 1.0+).
77    pub utilization: f32,
78    /// Whether the estimate exceeds the available budget.
79    pub over_limit: bool,
80}
81
82/// Configuration for transcript compaction.
83#[derive(Debug, Clone)]
84pub struct CompactionConfig {
85    /// Estimated tokens per chunk sent to the summarizer.
86    pub chunk_tokens: usize,
87    /// Maximum tokens allowed for the rolling summary.
88    ///
89    /// As chunks are processed, the running summary can grow unbounded.
90    /// This limit ensures the summary is truncated to avoid exceeding
91    /// the model's context window during multi-chunk compaction.
92    pub max_summary_tokens: usize,
93    /// Instructions for the summarizer session.
94    pub instructions: String,
95    /// Options used for summary generation.
96    pub summary_options: GenerationOptions,
97    /// Estimated characters per token.
98    pub chars_per_token: usize,
99}
100
101impl Default for CompactionConfig {
102    fn default() -> Self {
103        Self {
104            chunk_tokens: 800,
105            max_summary_tokens: 400,
106            instructions: "Summarize the conversation for future context. Preserve user intent, key facts, decisions, and open questions. Keep the summary concise."
107                .to_string(),
108            summary_options: GenerationOptions::builder()
109                .temperature(0.2)
110                .max_response_tokens(256)
111                .build(),
112            chars_per_token: 4,
113        }
114    }
115}
116
117/// Estimates token usage for the session transcript JSON.
118pub fn context_usage_from_transcript(
119    transcript_json: &str,
120    limit: &ContextLimit,
121) -> Result<ContextUsage> {
122    let transcript_text = transcript_to_text(transcript_json)?;
123    let estimated_tokens = estimate_tokens(&transcript_text, limit.chars_per_token);
124    let available_tokens = limit
125        .max_tokens
126        .saturating_sub(limit.reserved_response_tokens);
127    let utilization = if limit.max_tokens == 0 {
128        0.0
129    } else {
130        estimated_tokens as f32 / limit.max_tokens as f32
131    };
132    let over_limit = estimated_tokens > available_tokens;
133
134    Ok(ContextUsage {
135        estimated_tokens,
136        max_tokens: limit.max_tokens,
137        reserved_response_tokens: limit.reserved_response_tokens,
138        available_tokens,
139        utilization,
140        over_limit,
141    })
142}
143
144/// Compacts a transcript into a summary using the on-device model.
145pub fn compact_transcript(
146    model: &SystemLanguageModel,
147    transcript_json: &str,
148    config: &CompactionConfig,
149) -> Result<String> {
150    let transcript_text = transcript_to_text(transcript_json)?;
151    if transcript_text.trim().is_empty() {
152        return Ok(String::new());
153    }
154
155    let chunks = chunk_text(
156        &transcript_text,
157        config.chunk_tokens,
158        config.chars_per_token,
159    );
160
161    let mut summary = String::new();
162
163    for chunk in chunks {
164        let session = Session::with_instructions(model, &config.instructions)?;
165        let prompt = build_summary_prompt(
166            &summary,
167            &chunk,
168            config.max_summary_tokens,
169            config.chars_per_token,
170        );
171        let response = session.respond(&prompt, &config.summary_options)?;
172        summary = response.into_content();
173    }
174
175    Ok(summary)
176}
177
178/// Extracts readable text from transcript JSON.
179pub fn transcript_to_text(transcript_json: &str) -> Result<String> {
180    let value: Value = serde_json::from_str(transcript_json)?;
181    let mut lines = Vec::new();
182    collect_transcript_lines(&value, &mut lines);
183
184    if lines.is_empty() {
185        Ok(transcript_json.to_string())
186    } else {
187        Ok(lines.join("\n"))
188    }
189}
190
191/// Estimates tokens based on a characters-per-token heuristic.
192pub fn estimate_tokens(text: &str, chars_per_token: usize) -> usize {
193    let denom = chars_per_token.max(1);
194    let chars = text.chars().count();
195    chars.div_ceil(denom)
196}
197
198fn build_summary_prompt(
199    current_summary: &str,
200    chunk: &str,
201    max_summary_tokens: usize,
202    chars_per_token: usize,
203) -> String {
204    if current_summary.trim().is_empty() {
205        format!(
206            "Summarize the following conversation transcript:\n\n{chunk}\n\nReturn a concise summary."
207        )
208    } else {
209        // Truncate summary if it exceeds the token limit to prevent unbounded growth
210        let summary_tokens = estimate_tokens(current_summary, chars_per_token);
211        let truncated_summary = if summary_tokens > max_summary_tokens {
212            // Keep the end of the summary to preserve recent context
213            let max_chars = max_summary_tokens.saturating_mul(chars_per_token.max(1));
214            let char_count = current_summary.chars().count();
215            if char_count > max_chars {
216                let skip = char_count - max_chars;
217                format!(
218                    "..{}",
219                    current_summary.chars().skip(skip).collect::<String>()
220                )
221            } else {
222                current_summary.to_string()
223            }
224        } else {
225            current_summary.to_string()
226        };
227
228        format!(
229            "Update the summary with new conversation content.\n\nCurrent summary:\n{truncated_summary}\n\nNew transcript chunk:\n{chunk}\n\nReturn the updated concise summary."
230        )
231    }
232}
233
234fn chunk_text(text: &str, chunk_tokens: usize, chars_per_token: usize) -> Vec<String> {
235    let max_chars = chunk_tokens.max(1).saturating_mul(chars_per_token.max(1));
236    let mut chunks = Vec::new();
237    let mut current = String::new();
238
239    for line in text.lines() {
240        let line_len = line.chars().count() + 1;
241        if !current.is_empty() && current.chars().count() + line_len > max_chars {
242            chunks.push(current.trim_end().to_string());
243            current.clear();
244        }
245        current.push_str(line);
246        current.push('\n');
247    }
248
249    if !current.trim().is_empty() {
250        chunks.push(current.trim_end().to_string());
251    }
252
253    if chunks.is_empty() {
254        chunks.push(text.to_string());
255    }
256
257    chunks
258}
259
260fn collect_transcript_lines(value: &Value, out: &mut Vec<String>) {
261    match value {
262        Value::Array(items) => {
263            for item in items {
264                collect_transcript_lines(item, out);
265            }
266        }
267        Value::Object(map) => {
268            // Track which keys we've already processed to avoid double-counting
269            let mut processed_content = false;
270
271            // If this is a message with role+content, add as "{role}: {content}"
272            if let Some(role) = map.get("role").and_then(Value::as_str) {
273                let content = map
274                    .get("content")
275                    .and_then(Value::as_str)
276                    .or_else(|| map.get("text").and_then(Value::as_str));
277                if let Some(content) = content {
278                    out.push(format!("{role}: {content}"));
279                    processed_content = true;
280                }
281            }
282
283            // Add standalone text fields, skipping content/text if already included above
284            for key in ["content", "text", "prompt", "response", "instructions"] {
285                if processed_content && matches!(key, "content" | "text") {
286                    continue;
287                }
288                if let Some(text) = map.get(key).and_then(Value::as_str) {
289                    out.push(text.to_string());
290                }
291            }
292
293            // Recurse into other fields
294            for (key, value) in map {
295                if matches!(
296                    key.as_str(),
297                    "role" | "content" | "text" | "prompt" | "response" | "instructions"
298                ) {
299                    continue;
300                }
301                collect_transcript_lines(value, out);
302            }
303        }
304        _ => {}
305    }
306}
307
308#[cfg(test)]
309mod tests {
310    use super::*;
311
312    #[test]
313    fn test_estimate_tokens() {
314        let text = "abcd";
315        assert_eq!(estimate_tokens(text, 4), 1);
316        assert_eq!(estimate_tokens(text, 3), 2);
317    }
318
319    #[test]
320    fn test_chunk_text() {
321        let text = "Line one\nLine two\nLine three";
322        let chunks = chunk_text(text, 2, 4);
323        assert!(!chunks.is_empty());
324    }
325}