agtrace_providers/
token_limits.rs

1// NOTE: Architecture decision - Provider-specific model definitions
2// Model specifications are distributed across provider modules (claude/models.rs,
3// codex/models.rs, gemini/models.rs) rather than centralized here because:
4// 1. Maintainability: Each provider can be updated independently without touching other providers
5// 2. Extensibility: Adding a new provider only requires creating a new module and adding one line here
6// 3. Separation of concerns: Provider-specific knowledge stays with the provider
7// This follows the "distributed definition, centralized resolution" pattern.
8
9use crate::claude::models as claude_models;
10use crate::codex::models as codex_models;
11use crate::gemini::models as gemini_models;
12use agtrace_types::{ModelLimitResolver, ModelSpec};
13use std::collections::HashMap;
14
15pub struct ProviderModelLimitResolver;
16
17impl ModelLimitResolver for ProviderModelLimitResolver {
18    fn resolve_model_limit(&self, model: &str) -> Option<ModelSpec> {
19        resolve_model_limit(model)
20    }
21}
22
23/// Resolve model context window limit using longest prefix matching
24///
25/// NOTE: Why longest prefix matching instead of exact matching?
26/// Model providers release new minor versions frequently (e.g., claude-sonnet-4-5-20250929).
27/// Exact matching would require updating our codebase for every minor release, which is:
28/// - High maintenance burden for OSS contributors
29/// - Fragile (breaks on unknown versions)
30/// - Unnecessary (minor versions rarely change context limits)
31///
32/// Longest prefix matching allows us to:
33/// - Define "claude-sonnet-4-5" once and match all dated variants (20250929, 20260101, etc.)
34/// - Gracefully handle unknown models (return None instead of incorrect data)
35/// - Reduce false positives by preferring more specific matches
36///
37/// Resolution strategy:
38/// 1. Collect all provider-defined model prefixes
39/// 2. Find the longest prefix match for the given model name
40/// 3. Return the corresponding limit, or None if no match found
41///
42/// Example:
43/// - "claude-sonnet-4-5-20250929" matches "claude-sonnet-4-5" (200K)
44/// - "gpt-5.1-codex-max-2025" matches "gpt-5.1-codex-max" (400K)
45/// - "gemini-2.5-flash-exp" matches "gemini-2.5-flash" (1M)
46fn resolve_model_limit(model_name: &str) -> Option<ModelSpec> {
47    // NOTE: Why aggregate on every call instead of using lazy_static?
48    // The aggregation overhead is negligible (< 100 entries, ~microseconds) compared to
49    // the benefits of simplicity and testability. If profiling shows this is a bottleneck,
50    // we can optimize with lazy_static/OnceCell later. YAGNI principle applies here.
51    let all_limits: HashMap<&str, (u64, f64)> = [
52        claude_models::get_model_limits(),
53        codex_models::get_model_limits(),
54        gemini_models::get_model_limits(),
55    ]
56    .into_iter()
57    .flat_map(|map| map.into_iter())
58    .collect();
59
60    // Longest prefix matching algorithm
61    // NOTE: This is O(n) where n = number of defined model prefixes (~30-50).
62    // We prefer readability over premature optimization (e.g., trie structures).
63    let mut best_match: Option<(u64, f64)> = None;
64    let mut best_len = 0;
65
66    for (prefix, &(limit, buffer)) in &all_limits {
67        if model_name.starts_with(prefix) && prefix.len() > best_len {
68            best_match = Some((limit, buffer));
69            best_len = prefix.len();
70        }
71    }
72
73    best_match.map(|(max_tokens, compaction_buffer_pct)| ModelSpec {
74        max_tokens,
75        compaction_buffer_pct,
76    })
77}
78
79#[cfg(test)]
80mod tests {
81    use super::*;
82
83    #[test]
84    fn test_claude_models() {
85        // Exact match
86        assert_eq!(
87            resolve_model_limit("claude-sonnet-4-5"),
88            Some(ModelSpec {
89                max_tokens: 200_000,
90                compaction_buffer_pct: 22.5
91            })
92        );
93
94        // Prefix match (minor version)
95        assert_eq!(
96            resolve_model_limit("claude-sonnet-4-5-20250929"),
97            Some(ModelSpec {
98                max_tokens: 200_000,
99                compaction_buffer_pct: 22.5
100            })
101        );
102        assert_eq!(
103            resolve_model_limit("claude-haiku-4-5-20251001"),
104            Some(ModelSpec {
105                max_tokens: 200_000,
106                compaction_buffer_pct: 22.5
107            })
108        );
109
110        // Claude 3.5 series
111        assert_eq!(
112            resolve_model_limit("claude-3-5-sonnet-20241022"),
113            Some(ModelSpec {
114                max_tokens: 200_000,
115                compaction_buffer_pct: 22.5
116            })
117        );
118
119        // Claude 3 fallback
120        assert_eq!(
121            resolve_model_limit("claude-3-opus-20240229"),
122            Some(ModelSpec {
123                max_tokens: 200_000,
124                compaction_buffer_pct: 22.5
125            })
126        );
127    }
128
129    #[test]
130    fn test_codex_models() {
131        // GPT-5.2
132        assert_eq!(
133            resolve_model_limit("gpt-5.2"),
134            Some(ModelSpec {
135                max_tokens: 400_000,
136                compaction_buffer_pct: 0.0
137            })
138        );
139
140        // GPT-5.1 series
141        assert_eq!(
142            resolve_model_limit("gpt-5.1-codex-max"),
143            Some(ModelSpec {
144                max_tokens: 400_000,
145                compaction_buffer_pct: 0.0
146            })
147        );
148        assert_eq!(
149            resolve_model_limit("gpt-5.1-codex"),
150            Some(ModelSpec {
151                max_tokens: 400_000,
152                compaction_buffer_pct: 0.0
153            })
154        );
155
156        // GPT-5 series
157        assert_eq!(
158            resolve_model_limit("gpt-5-codex"),
159            Some(ModelSpec {
160                max_tokens: 400_000,
161                compaction_buffer_pct: 0.0
162            })
163        );
164        assert_eq!(
165            resolve_model_limit("gpt-5"),
166            Some(ModelSpec {
167                max_tokens: 400_000,
168                compaction_buffer_pct: 0.0
169            })
170        );
171    }
172
173    #[test]
174    fn test_gemini_models() {
175        // Gemini 2.5 series
176        assert_eq!(
177            resolve_model_limit("gemini-2.5-pro"),
178            Some(ModelSpec {
179                max_tokens: 1_048_576,
180                compaction_buffer_pct: 0.0
181            })
182        );
183        assert_eq!(
184            resolve_model_limit("gemini-2.5-flash"),
185            Some(ModelSpec {
186                max_tokens: 1_048_576,
187                compaction_buffer_pct: 0.0
188            })
189        );
190
191        // Gemini 2.0 series
192        assert_eq!(
193            resolve_model_limit("gemini-2.0-flash"),
194            Some(ModelSpec {
195                max_tokens: 1_048_576,
196                compaction_buffer_pct: 0.0
197            })
198        );
199    }
200
201    #[test]
202    fn test_unknown_model() {
203        assert_eq!(resolve_model_limit("unknown-model"), None);
204        assert_eq!(resolve_model_limit("gpt-3"), None);
205        assert_eq!(resolve_model_limit("claude-2"), None);
206    }
207
208    #[test]
209    fn test_longest_prefix_matching() {
210        // Should match "gpt-5.1-codex-max" (400K) not "gpt-5.1" (400K)
211        // In this case both have the same value, but the algorithm should prefer the longest match
212        let spec = resolve_model_limit("gpt-5.1-codex-max-2025");
213        assert_eq!(
214            spec,
215            Some(ModelSpec {
216                max_tokens: 400_000,
217                compaction_buffer_pct: 0.0
218            })
219        );
220
221        // Should match "claude-sonnet-4-5" not "claude-sonnet-4"
222        let spec = resolve_model_limit("claude-sonnet-4-5-20250929");
223        assert_eq!(
224            spec,
225            Some(ModelSpec {
226                max_tokens: 200_000,
227                compaction_buffer_pct: 22.5
228            })
229        );
230    }
231
232    #[test]
233    fn test_prefix_match_with_suffix() {
234        // Prefix match should work with any suffix
235        assert_eq!(
236            resolve_model_limit("claude-3-5-sonnet-custom-version"),
237            Some(ModelSpec {
238                max_tokens: 200_000,
239                compaction_buffer_pct: 22.5
240            })
241        );
242        assert_eq!(
243            resolve_model_limit("gpt-5.1-codex-experimental"),
244            Some(ModelSpec {
245                max_tokens: 400_000,
246                compaction_buffer_pct: 0.0
247            })
248        );
249    }
250}