agtrace_providers/token_limits.rs
1// NOTE: Architecture decision - Provider-specific model definitions
2// Model specifications are distributed across provider modules (claude/models.rs,
3// codex/models.rs, gemini/models.rs) rather than centralized here because:
4// 1. Maintainability: Each provider can be updated independently without touching other providers
5// 2. Extensibility: Adding a new provider only requires creating a new module and adding one line here
6// 3. Separation of concerns: Provider-specific knowledge stays with the provider
7// This follows the "distributed definition, centralized resolution" pattern.
8
9use crate::claude::models as claude_models;
10use crate::codex::models as codex_models;
11use crate::gemini::models as gemini_models;
12use agtrace_types::{ModelLimitResolver, ModelSpec};
13use std::collections::HashMap;
14
15pub struct ProviderModelLimitResolver;
16
17impl ModelLimitResolver for ProviderModelLimitResolver {
18 fn resolve_model_limit(&self, model: &str) -> Option<ModelSpec> {
19 resolve_model_limit(model)
20 }
21}
22
23/// Resolve model context window limit using longest prefix matching
24///
25/// NOTE: Why longest prefix matching instead of exact matching?
26/// Model providers release new minor versions frequently (e.g., claude-sonnet-4-5-20250929).
27/// Exact matching would require updating our codebase for every minor release, which is:
28/// - High maintenance burden for OSS contributors
29/// - Fragile (breaks on unknown versions)
30/// - Unnecessary (minor versions rarely change context limits)
31///
32/// Longest prefix matching allows us to:
33/// - Define "claude-sonnet-4-5" once and match all dated variants (20250929, 20260101, etc.)
34/// - Gracefully handle unknown models (return None instead of incorrect data)
35/// - Reduce false positives by preferring more specific matches
36///
37/// Resolution strategy:
38/// 1. Collect all provider-defined model prefixes
39/// 2. Find the longest prefix match for the given model name
40/// 3. Return the corresponding limit, or None if no match found
41///
42/// Example:
43/// - "claude-sonnet-4-5-20250929" matches "claude-sonnet-4-5" (200K)
44/// - "gpt-5.1-codex-max-2025" matches "gpt-5.1-codex-max" (400K)
45/// - "gemini-2.5-flash-exp" matches "gemini-2.5-flash" (1M)
46fn resolve_model_limit(model_name: &str) -> Option<ModelSpec> {
47 // NOTE: Why aggregate on every call instead of using lazy_static?
48 // The aggregation overhead is negligible (< 100 entries, ~microseconds) compared to
49 // the benefits of simplicity and testability. If profiling shows this is a bottleneck,
50 // we can optimize with lazy_static/OnceCell later. YAGNI principle applies here.
51 let all_limits: HashMap<&str, (u64, f64)> = [
52 claude_models::get_model_limits(),
53 codex_models::get_model_limits(),
54 gemini_models::get_model_limits(),
55 ]
56 .into_iter()
57 .flat_map(|map| map.into_iter())
58 .collect();
59
60 // Longest prefix matching algorithm
61 // NOTE: This is O(n) where n = number of defined model prefixes (~30-50).
62 // We prefer readability over premature optimization (e.g., trie structures).
63 let mut best_match: Option<(u64, f64)> = None;
64 let mut best_len = 0;
65
66 for (prefix, &(limit, buffer)) in &all_limits {
67 if model_name.starts_with(prefix) && prefix.len() > best_len {
68 best_match = Some((limit, buffer));
69 best_len = prefix.len();
70 }
71 }
72
73 best_match.map(|(max_tokens, compaction_buffer_pct)| ModelSpec {
74 max_tokens,
75 compaction_buffer_pct,
76 })
77}
78
79#[cfg(test)]
80mod tests {
81 use super::*;
82
83 #[test]
84 fn test_claude_models() {
85 // Exact match
86 assert_eq!(
87 resolve_model_limit("claude-sonnet-4-5"),
88 Some(ModelSpec {
89 max_tokens: 200_000,
90 compaction_buffer_pct: 22.5
91 })
92 );
93
94 // Prefix match (minor version)
95 assert_eq!(
96 resolve_model_limit("claude-sonnet-4-5-20250929"),
97 Some(ModelSpec {
98 max_tokens: 200_000,
99 compaction_buffer_pct: 22.5
100 })
101 );
102 assert_eq!(
103 resolve_model_limit("claude-haiku-4-5-20251001"),
104 Some(ModelSpec {
105 max_tokens: 200_000,
106 compaction_buffer_pct: 22.5
107 })
108 );
109
110 // Claude 3.5 series
111 assert_eq!(
112 resolve_model_limit("claude-3-5-sonnet-20241022"),
113 Some(ModelSpec {
114 max_tokens: 200_000,
115 compaction_buffer_pct: 22.5
116 })
117 );
118
119 // Claude 3 fallback
120 assert_eq!(
121 resolve_model_limit("claude-3-opus-20240229"),
122 Some(ModelSpec {
123 max_tokens: 200_000,
124 compaction_buffer_pct: 22.5
125 })
126 );
127 }
128
129 #[test]
130 fn test_codex_models() {
131 // GPT-5.2
132 assert_eq!(
133 resolve_model_limit("gpt-5.2"),
134 Some(ModelSpec {
135 max_tokens: 400_000,
136 compaction_buffer_pct: 0.0
137 })
138 );
139
140 // GPT-5.1 series
141 assert_eq!(
142 resolve_model_limit("gpt-5.1-codex-max"),
143 Some(ModelSpec {
144 max_tokens: 400_000,
145 compaction_buffer_pct: 0.0
146 })
147 );
148 assert_eq!(
149 resolve_model_limit("gpt-5.1-codex"),
150 Some(ModelSpec {
151 max_tokens: 400_000,
152 compaction_buffer_pct: 0.0
153 })
154 );
155
156 // GPT-5 series
157 assert_eq!(
158 resolve_model_limit("gpt-5-codex"),
159 Some(ModelSpec {
160 max_tokens: 400_000,
161 compaction_buffer_pct: 0.0
162 })
163 );
164 assert_eq!(
165 resolve_model_limit("gpt-5"),
166 Some(ModelSpec {
167 max_tokens: 400_000,
168 compaction_buffer_pct: 0.0
169 })
170 );
171 }
172
173 #[test]
174 fn test_gemini_models() {
175 // Gemini 2.5 series
176 assert_eq!(
177 resolve_model_limit("gemini-2.5-pro"),
178 Some(ModelSpec {
179 max_tokens: 1_048_576,
180 compaction_buffer_pct: 0.0
181 })
182 );
183 assert_eq!(
184 resolve_model_limit("gemini-2.5-flash"),
185 Some(ModelSpec {
186 max_tokens: 1_048_576,
187 compaction_buffer_pct: 0.0
188 })
189 );
190
191 // Gemini 2.0 series
192 assert_eq!(
193 resolve_model_limit("gemini-2.0-flash"),
194 Some(ModelSpec {
195 max_tokens: 1_048_576,
196 compaction_buffer_pct: 0.0
197 })
198 );
199 }
200
201 #[test]
202 fn test_unknown_model() {
203 assert_eq!(resolve_model_limit("unknown-model"), None);
204 assert_eq!(resolve_model_limit("gpt-3"), None);
205 assert_eq!(resolve_model_limit("claude-2"), None);
206 }
207
208 #[test]
209 fn test_longest_prefix_matching() {
210 // Should match "gpt-5.1-codex-max" (400K) not "gpt-5.1" (400K)
211 // In this case both have the same value, but the algorithm should prefer the longest match
212 let spec = resolve_model_limit("gpt-5.1-codex-max-2025");
213 assert_eq!(
214 spec,
215 Some(ModelSpec {
216 max_tokens: 400_000,
217 compaction_buffer_pct: 0.0
218 })
219 );
220
221 // Should match "claude-sonnet-4-5" not "claude-sonnet-4"
222 let spec = resolve_model_limit("claude-sonnet-4-5-20250929");
223 assert_eq!(
224 spec,
225 Some(ModelSpec {
226 max_tokens: 200_000,
227 compaction_buffer_pct: 22.5
228 })
229 );
230 }
231
232 #[test]
233 fn test_prefix_match_with_suffix() {
234 // Prefix match should work with any suffix
235 assert_eq!(
236 resolve_model_limit("claude-3-5-sonnet-custom-version"),
237 Some(ModelSpec {
238 max_tokens: 200_000,
239 compaction_buffer_pct: 22.5
240 })
241 );
242 assert_eq!(
243 resolve_model_limit("gpt-5.1-codex-experimental"),
244 Some(ModelSpec {
245 max_tokens: 400_000,
246 compaction_buffer_pct: 0.0
247 })
248 );
249 }
250}