Skip to main content

koda_core/
model_context.rs

1//! Model context window lookup.
2//!
3//! Maps model names to their known context window sizes (in tokens).
4//! Falls back to a conservative default (100K) for unknown models.
5//!
6//! Context window size drives:
7//! - Output caps (see [`crate::output_caps`]) — larger windows get more tool output
8//! - Auto-compaction threshold — triggers at ~80% of context window
9//! - Microcompact aggressiveness — scales with remaining capacity
10
11/// Default context window when the model is unknown.
12const DEFAULT_CONTEXT: usize = 128_000;
13
14/// Minimum context window (safety floor for local/unknown models).
15const MIN_CONTEXT: usize = 4_096;
16
17/// Look up the context window size for a model by name.
18///
19/// Matches known models by prefix/pattern. Returns the full context window
20/// in tokens. The caller should apply a usage budget (e.g., 95%) to leave
21/// room for the response.
22///
23/// # Examples
24///
25/// ```
26/// use koda_core::model_context::context_window_for_model;
27///
28/// assert_eq!(context_window_for_model("claude-sonnet-4-6"), 200_000);
29/// assert_eq!(context_window_for_model("gpt-4o"), 128_000);
30/// assert_eq!(context_window_for_model("gemini-2.5-pro"), 1_048_576);
31///
32/// // Unknown models get a conservative default:
33/// assert_eq!(context_window_for_model("mystery-model"), 128_000);
34/// ```
35pub fn context_window_for_model(model: &str) -> usize {
36    let m = model.to_lowercase();
37
38    // ── Anthropic ─────────────────────────────────────
39    // Default context window for all Claude models is 200K tokens.
40    //
41    // Claude 4.x models (opus-4-6, sonnet-4-6, sonnet-4-5, sonnet-4)
42    // support 1M context via opt-in beta header:
43    //   anthropic-beta: context-1m-2025-08-07
44    // Virtual "-1m" suffix (e.g. claude-sonnet-4-6-1m) selects the
45    // extended context variant. Only eligible models get 1M;
46    // ineligible models with -1m fall through to their normal 200K.
47    if m.ends_with("-1m") && m.contains("claude") {
48        let base = &m[..m.len() - 3]; // strip "-1m"
49        if base.starts_with("claude-opus-4-6")
50            || base.starts_with("claude-sonnet-4-6")
51            || base.starts_with("claude-sonnet-4-5")
52            || base.starts_with("claude-sonnet-4-2")
53            || base.starts_with("claude-sonnet-4")
54        {
55            return 1_000_000;
56        }
57        // Ineligible model with -1m suffix — fall through to 200K
58        tracing::warn!(
59            "Model '{}' does not support 1M extended context; using 200K.",
60            model
61        );
62        return 200_000;
63    }
64    if m.starts_with("claude-opus-4")
65        || m.starts_with("claude-sonnet-4")
66        || m.starts_with("claude-haiku-4")
67    {
68        return 200_000;
69    }
70    if m.starts_with("claude-3.5") || m.starts_with("claude-3-5") {
71        return 200_000;
72    }
73    if m.starts_with("claude-3") {
74        return 200_000;
75    }
76    // Catch-all for future Claude models we haven't seen yet.
77    if m.contains("claude") {
78        tracing::warn!(
79            "Unknown Claude model '{}' — assuming 200K context. \
80             Update model_context.rs if this model has a different context window.",
81            model
82        );
83        return 200_000;
84    }
85
86    // ── OpenAI ────────────────────────────────────────────
87    if m.starts_with("gpt-4o") || m.starts_with("gpt-4.1") || m.starts_with("chatgpt-4o") {
88        return 128_000;
89    }
90    if m.starts_with("gpt-4-turbo") || m.starts_with("gpt-4-1106") || m.starts_with("gpt-4-0125") {
91        return 128_000;
92    }
93    if m.starts_with("gpt-4") {
94        return 8_192;
95    }
96    if m.starts_with("gpt-3.5-turbo-16k") {
97        return 16_384;
98    }
99    if m.starts_with("gpt-3.5") {
100        return 16_384;
101    }
102    if m.starts_with("o1") || m.starts_with("o3") || m.starts_with("o4") {
103        return 200_000;
104    }
105
106    // ── Google Gemini ─────────────────────────────────────
107    if m.contains("gemini-2.5") {
108        return 1_048_576;
109    }
110    if m.contains("gemini-2.0") {
111        return 1_048_576;
112    }
113    if m.contains("gemini-1.5-pro") {
114        return 2_097_152;
115    }
116    if m.contains("gemini-1.5-flash") {
117        return 1_048_576;
118    }
119    if m.contains("gemini") {
120        return 1_048_576;
121    }
122
123    // ── Grok (xAI) ────────────────────────────────────────
124    if m.starts_with("grok-3") {
125        return 131_072;
126    }
127    if m.starts_with("grok") {
128        return 131_072;
129    }
130
131    // ── DeepSeek ──────────────────────────────────────────
132    if m.contains("deepseek") {
133        return 128_000;
134    }
135
136    // ── Mistral ───────────────────────────────────────────
137    if m.contains("mistral-large") {
138        return 128_000;
139    }
140    if m.contains("mistral-medium") {
141        return 32_000;
142    }
143    if m.contains("mistral-small") || m.contains("mistral-7b") {
144        return 32_000;
145    }
146    if m.contains("mixtral") || m.contains("mistral") {
147        return 32_000;
148    }
149
150    // ── Meta Llama ────────────────────────────────────────
151    if m.contains("llama-3.3") || m.contains("llama-3.1") {
152        return 128_000;
153    }
154    if m.contains("llama-3") || m.contains("llama3") {
155        return 8_192;
156    }
157    if m.contains("llama") {
158        return 4_096;
159    }
160
161    // ── Qwen ──────────────────────────────────────────────
162    if m.contains("qwen-2.5") || m.contains("qwen2.5") {
163        return 128_000;
164    }
165    if m.contains("qwen") {
166        return 32_000;
167    }
168
169    // ── Local / auto-detect ───────────────────────────────
170    if m == "auto-detect" {
171        return MIN_CONTEXT;
172    }
173
174    DEFAULT_CONTEXT
175}
176
177#[cfg(test)]
178mod tests {
179    use super::*;
180
181    #[test]
182    fn test_claude_models() {
183        // Claude 4.x: 200K default (1M available via beta header opt-in)
184        assert_eq!(context_window_for_model("claude-sonnet-4-6"), 200_000);
185        assert_eq!(context_window_for_model("claude-opus-4-6"), 200_000);
186        assert_eq!(
187            context_window_for_model("claude-opus-4-5-20251101"),
188            200_000
189        );
190        assert_eq!(
191            context_window_for_model("claude-haiku-4-5-20251001"),
192            200_000
193        );
194        assert_eq!(
195            context_window_for_model("claude-sonnet-4-5-20250929"),
196            200_000
197        );
198        assert_eq!(context_window_for_model("claude-opus-4-20250514"), 200_000);
199        assert_eq!(
200            context_window_for_model("claude-sonnet-4-20250514"),
201            200_000
202        );
203        // Claude 3.x: 200K context
204        assert_eq!(context_window_for_model("claude-3-opus-20240229"), 200_000);
205        assert_eq!(context_window_for_model("claude-3-haiku-20240307"), 200_000);
206        assert_eq!(
207            context_window_for_model("claude-3-5-sonnet-20240620"),
208            200_000
209        );
210    }
211
212    #[test]
213    fn test_claude_1m_virtual_models() {
214        // Eligible models with -1m suffix get 1M
215        assert_eq!(context_window_for_model("claude-sonnet-4-6-1m"), 1_000_000);
216        assert_eq!(context_window_for_model("claude-opus-4-6-1m"), 1_000_000);
217        assert_eq!(
218            context_window_for_model("claude-sonnet-4-5-20250929-1m"),
219            1_000_000
220        );
221        assert_eq!(
222            context_window_for_model("claude-sonnet-4-20250514-1m"),
223            1_000_000
224        );
225    }
226
227    #[test]
228    fn test_claude_1m_ineligible_models() {
229        // Ineligible models with -1m suffix stay at 200K
230        assert_eq!(
231            context_window_for_model("claude-opus-4-5-20251101-1m"),
232            200_000
233        );
234        assert_eq!(
235            context_window_for_model("claude-haiku-4-5-20251001-1m"),
236            200_000
237        );
238        assert_eq!(
239            context_window_for_model("claude-3-opus-20240229-1m"),
240            200_000
241        );
242    }
243
244    #[test]
245    fn test_gpt4o_models() {
246        assert_eq!(context_window_for_model("gpt-4o"), 128_000);
247        assert_eq!(context_window_for_model("gpt-4o-mini"), 128_000);
248        assert_eq!(context_window_for_model("gpt-4.1"), 128_000);
249    }
250
251    #[test]
252    fn test_gpt4_legacy() {
253        assert_eq!(context_window_for_model("gpt-4"), 8_192);
254        assert_eq!(context_window_for_model("gpt-4-0613"), 8_192);
255    }
256
257    #[test]
258    fn test_gpt4_turbo() {
259        assert_eq!(context_window_for_model("gpt-4-turbo"), 128_000);
260        assert_eq!(context_window_for_model("gpt-4-turbo-preview"), 128_000);
261    }
262
263    #[test]
264    fn test_o_series() {
265        assert_eq!(context_window_for_model("o1"), 200_000);
266        assert_eq!(context_window_for_model("o1-preview"), 200_000);
267        assert_eq!(context_window_for_model("o3-mini"), 200_000);
268        assert_eq!(context_window_for_model("o4-mini"), 200_000);
269    }
270
271    #[test]
272    fn test_gemini_models() {
273        assert_eq!(context_window_for_model("gemini-2.0-flash"), 1_048_576);
274        assert_eq!(context_window_for_model("gemini-2.5-pro"), 1_048_576);
275        assert_eq!(context_window_for_model("gemini-1.5-pro"), 2_097_152);
276    }
277
278    #[test]
279    fn test_deepseek() {
280        assert_eq!(context_window_for_model("deepseek-chat"), 128_000);
281        assert_eq!(context_window_for_model("deepseek-coder"), 128_000);
282    }
283
284    #[test]
285    fn test_llama_models() {
286        assert_eq!(context_window_for_model("llama-3.3-70b-versatile"), 128_000);
287        assert_eq!(context_window_for_model("llama-3-8b"), 8_192);
288    }
289
290    #[test]
291    fn test_auto_detect_is_conservative() {
292        assert_eq!(context_window_for_model("auto-detect"), MIN_CONTEXT);
293    }
294
295    #[test]
296    fn test_unknown_model_gets_default() {
297        assert_eq!(
298            context_window_for_model("some-random-model"),
299            DEFAULT_CONTEXT
300        );
301    }
302
303    #[test]
304    fn test_case_insensitive() {
305        assert_eq!(context_window_for_model("Claude-Sonnet-4-6"), 200_000);
306        assert_eq!(context_window_for_model("GPT-4O"), 128_000);
307    }
308}