fm_rs/
context.rs

1//! Context window tracking and compaction helpers.
2
3use serde_json::Value;
4
5use crate::error::Result;
6use crate::model::SystemLanguageModel;
7use crate::options::GenerationOptions;
8use crate::session::Session;
9
10/// Default context window size for Apple's on-device Foundation Models.
11///
12/// This value is based on observed behavior during WWDC 2025 sessions and early
13/// developer testing. Apple has not officially documented the context window size.
14/// The actual limit may vary by device, model version, or available memory.
15///
16/// For production use, monitor [`ContextUsage::utilization`] and implement
17/// compaction strategies when approaching the limit.
18pub const DEFAULT_CONTEXT_TOKENS: usize = 4096;
19
20/// Configuration for estimating context usage.
21#[derive(Debug, Clone, Copy)]
22pub struct ContextLimit {
23    /// Maximum tokens available in the session context window.
24    pub max_tokens: usize,
25    /// Tokens reserved for the model's next response.
26    pub reserved_response_tokens: usize,
27    /// Estimated characters per token (English ~3-4, CJK ~1).
28    pub chars_per_token: usize,
29}
30
31impl ContextLimit {
32    /// Creates a new context limit with a max token budget.
33    pub fn new(max_tokens: usize) -> Self {
34        Self {
35            max_tokens,
36            reserved_response_tokens: 0,
37            chars_per_token: 4,
38        }
39    }
40
41    /// Creates a default configuration for on-device models.
42    pub fn default_on_device() -> Self {
43        Self {
44            max_tokens: DEFAULT_CONTEXT_TOKENS,
45            reserved_response_tokens: 512,
46            chars_per_token: 4,
47        }
48    }
49
50    /// Sets the reserved response tokens.
51    pub fn with_reserved_response_tokens(mut self, tokens: usize) -> Self {
52        self.reserved_response_tokens = tokens;
53        self
54    }
55
56    /// Sets the character-per-token estimate.
57    pub fn with_chars_per_token(mut self, chars: usize) -> Self {
58        if chars > 0 {
59            self.chars_per_token = chars;
60        }
61        self
62    }
63}
64
65/// Estimated context usage for a session.
66#[derive(Debug, Clone, Copy)]
67pub struct ContextUsage {
68    /// Estimated number of tokens consumed by the transcript.
69    pub estimated_tokens: usize,
70    /// Maximum tokens configured for the session.
71    pub max_tokens: usize,
72    /// Tokens reserved for the next response.
73    pub reserved_response_tokens: usize,
74    /// Estimated tokens available for prompts before hitting the limit.
75    pub available_tokens: usize,
76    /// Estimated utilization ratio (0.0 - 1.0+).
77    pub utilization: f32,
78    /// Whether the estimate exceeds the available budget.
79    pub over_limit: bool,
80}
81
82/// Configuration for transcript compaction.
83#[derive(Debug, Clone)]
84pub struct CompactionConfig {
85    /// Estimated tokens per chunk sent to the summarizer.
86    pub chunk_tokens: usize,
87    /// Instructions for the summarizer session.
88    pub instructions: String,
89    /// Options used for summary generation.
90    pub summary_options: GenerationOptions,
91    /// Estimated characters per token.
92    pub chars_per_token: usize,
93}
94
95impl Default for CompactionConfig {
96    fn default() -> Self {
97        Self {
98            chunk_tokens: 800,
99            instructions: "Summarize the conversation for future context. Preserve user intent, key facts, decisions, and open questions. Keep the summary concise."
100                .to_string(),
101            summary_options: GenerationOptions::builder()
102                .temperature(0.2)
103                .max_response_tokens(256)
104                .build(),
105            chars_per_token: 4,
106        }
107    }
108}
109
110/// Estimates token usage for the session transcript JSON.
111pub fn context_usage_from_transcript(
112    transcript_json: &str,
113    limit: &ContextLimit,
114) -> Result<ContextUsage> {
115    let transcript_text = transcript_to_text(transcript_json)?;
116    let estimated_tokens = estimate_tokens(&transcript_text, limit.chars_per_token);
117    let available_tokens = limit
118        .max_tokens
119        .saturating_sub(limit.reserved_response_tokens);
120    let utilization = if limit.max_tokens == 0 {
121        0.0
122    } else {
123        estimated_tokens as f32 / limit.max_tokens as f32
124    };
125    let over_limit = estimated_tokens > available_tokens;
126
127    Ok(ContextUsage {
128        estimated_tokens,
129        max_tokens: limit.max_tokens,
130        reserved_response_tokens: limit.reserved_response_tokens,
131        available_tokens,
132        utilization,
133        over_limit,
134    })
135}
136
137/// Compacts a transcript into a summary using the on-device model.
138pub fn compact_transcript(
139    model: &SystemLanguageModel,
140    transcript_json: &str,
141    config: &CompactionConfig,
142) -> Result<String> {
143    let transcript_text = transcript_to_text(transcript_json)?;
144    if transcript_text.trim().is_empty() {
145        return Ok(String::new());
146    }
147
148    let chunks = chunk_text(
149        &transcript_text,
150        config.chunk_tokens,
151        config.chars_per_token,
152    );
153
154    let mut summary = String::new();
155
156    for chunk in chunks {
157        let session = Session::with_instructions(model, &config.instructions)?;
158        let prompt = build_summary_prompt(&summary, &chunk);
159        let response = session.respond(&prompt, &config.summary_options)?;
160        summary = response.into_content();
161    }
162
163    Ok(summary)
164}
165
166/// Extracts readable text from transcript JSON.
167pub fn transcript_to_text(transcript_json: &str) -> Result<String> {
168    let value: Value = serde_json::from_str(transcript_json)?;
169    let mut lines = Vec::new();
170    collect_transcript_lines(&value, &mut lines);
171
172    if lines.is_empty() {
173        Ok(transcript_json.to_string())
174    } else {
175        Ok(lines.join("\n"))
176    }
177}
178
179/// Estimates tokens based on a characters-per-token heuristic.
180pub fn estimate_tokens(text: &str, chars_per_token: usize) -> usize {
181    let denom = chars_per_token.max(1);
182    let chars = text.chars().count();
183    chars.div_ceil(denom)
184}
185
186fn build_summary_prompt(current_summary: &str, chunk: &str) -> String {
187    if current_summary.trim().is_empty() {
188        format!(
189            "Summarize the following conversation transcript:\n\n{chunk}\n\nReturn a concise summary."
190        )
191    } else {
192        format!(
193            "Update the summary with new conversation content.\n\nCurrent summary:\n{current_summary}\n\nNew transcript chunk:\n{chunk}\n\nReturn the updated concise summary."
194        )
195    }
196}
197
198fn chunk_text(text: &str, chunk_tokens: usize, chars_per_token: usize) -> Vec<String> {
199    let max_chars = chunk_tokens.max(1).saturating_mul(chars_per_token.max(1));
200    let mut chunks = Vec::new();
201    let mut current = String::new();
202
203    for line in text.lines() {
204        let line_len = line.chars().count() + 1;
205        if !current.is_empty() && current.chars().count() + line_len > max_chars {
206            chunks.push(current.trim_end().to_string());
207            current.clear();
208        }
209        current.push_str(line);
210        current.push('\n');
211    }
212
213    if !current.trim().is_empty() {
214        chunks.push(current.trim_end().to_string());
215    }
216
217    if chunks.is_empty() {
218        chunks.push(text.to_string());
219    }
220
221    chunks
222}
223
224fn collect_transcript_lines(value: &Value, out: &mut Vec<String>) {
225    match value {
226        Value::Array(items) => {
227            for item in items {
228                collect_transcript_lines(item, out);
229            }
230        }
231        Value::Object(map) => {
232            // Track which keys we've already processed to avoid double-counting
233            let mut processed_content = false;
234
235            // If this is a message with role+content, add as "{role}: {content}"
236            if let Some(role) = map.get("role").and_then(Value::as_str) {
237                let content = map
238                    .get("content")
239                    .and_then(Value::as_str)
240                    .or_else(|| map.get("text").and_then(Value::as_str));
241                if let Some(content) = content {
242                    out.push(format!("{role}: {content}"));
243                    processed_content = true;
244                }
245            }
246
247            // Add standalone text fields, skipping content/text if already included above
248            for key in ["content", "text", "prompt", "response", "instructions"] {
249                if processed_content && matches!(key, "content" | "text") {
250                    continue;
251                }
252                if let Some(text) = map.get(key).and_then(Value::as_str) {
253                    out.push(text.to_string());
254                }
255            }
256
257            // Recurse into other fields
258            for (key, value) in map {
259                if matches!(
260                    key.as_str(),
261                    "role" | "content" | "text" | "prompt" | "response" | "instructions"
262                ) {
263                    continue;
264                }
265                collect_transcript_lines(value, out);
266            }
267        }
268        _ => {}
269    }
270}
271
272#[cfg(test)]
273mod tests {
274    use super::*;
275
276    #[test]
277    fn test_estimate_tokens() {
278        let text = "abcd";
279        assert_eq!(estimate_tokens(text, 4), 1);
280        assert_eq!(estimate_tokens(text, 3), 2);
281    }
282
283    #[test]
284    fn test_chunk_text() {
285        let text = "Line one\nLine two\nLine three";
286        let chunks = chunk_text(text, 2, 4);
287        assert!(!chunks.is_empty());
288    }
289}