lean_ctx/core/
model_registry.rs1use std::collections::HashMap;
2use std::sync::OnceLock;
3
4static BUNDLED_REGISTRY: &str = include_str!("../../data/model_registry.json");
5
6static PARSED_BUNDLED: OnceLock<Registry> = OnceLock::new();
7static PARSED_LOCAL: OnceLock<Option<Registry>> = OnceLock::new();
8
9#[derive(Debug, Clone)]
10struct ModelEntry {
11 context_window: usize,
12}
13
14#[derive(Debug, Clone, Default)]
15struct Registry {
16 models: HashMap<String, ModelEntry>,
17 families: HashMap<String, usize>,
18}
19
20fn parse_registry(json: &str) -> Option<Registry> {
21 let v: serde_json::Value = serde_json::from_str(json).ok()?;
22 let mut models = HashMap::new();
23 if let Some(obj) = v.get("models").and_then(|m| m.as_object()) {
24 for (key, entry) in obj {
25 if let Some(window) = entry
26 .get("context_window")
27 .and_then(serde_json::Value::as_u64)
28 {
29 models.insert(
30 key.to_lowercase(),
31 ModelEntry {
32 context_window: window as usize,
33 },
34 );
35 }
36 }
37 }
38 let mut families = HashMap::new();
39 if let Some(obj) = v.get("families").and_then(|f| f.as_object()) {
40 for (key, val) in obj {
41 if let Some(window) = val.as_u64() {
42 families.insert(key.to_lowercase(), window as usize);
43 }
44 }
45 }
46 Some(Registry { models, families })
47}
48
49fn bundled() -> &'static Registry {
50 PARSED_BUNDLED.get_or_init(|| parse_registry(BUNDLED_REGISTRY).unwrap_or_default())
51}
52
53fn local_registry() -> Option<&'static Registry> {
54 PARSED_LOCAL
55 .get_or_init(|| {
56 let data_dir = crate::core::data_dir::lean_ctx_data_dir().ok()?;
57 let path = data_dir.join("model_registry.json");
58 let content = std::fs::read_to_string(path).ok()?;
59 parse_registry(&content)
60 })
61 .as_ref()
62}
63
64fn user_config_override(model: &str) -> Option<usize> {
65 let cfg = crate::core::config::Config::load();
66 cfg.model_context_windows
67 .get(model)
68 .or_else(|| cfg.model_context_windows.get(&model.to_lowercase()))
69 .copied()
70}
71
72fn registry_lookup(model: &str, registry: &Registry) -> Option<usize> {
73 let m = model.to_lowercase();
74
75 if let Some(entry) = registry.models.get(&m) {
77 return Some(entry.context_window);
78 }
79
80 let mut best_match: Option<(usize, usize)> = None; for (key, entry) in ®istry.models {
83 if m.starts_with(key.as_str()) && m[key.len()..].starts_with(['-', '_', '.']) || m == *key {
84 let key_len = key.len();
85 if best_match.is_none_or(|(bl, _)| key_len > bl) {
86 best_match = Some((key_len, entry.context_window));
87 }
88 }
89 }
90 if let Some((_, window)) = best_match {
91 return Some(window);
92 }
93
94 let mut best_family: Option<(usize, usize)> = None;
96 for (family, window) in ®istry.families {
97 if m.contains(family.as_str()) {
98 let flen = family.len();
99 if best_family.is_none_or(|(bl, _)| flen > bl) {
100 best_family = Some((flen, *window));
101 }
102 }
103 }
104 best_family.map(|(_, w)| w)
105}
106
107pub fn context_window_for_model(model: &str) -> usize {
110 if let Some(w) = user_config_override(model) {
112 return w;
113 }
114
115 if let Some(local) = local_registry() {
117 if let Some(w) = registry_lookup(model, local) {
118 return w;
119 }
120 }
121
122 if let Some(w) = registry_lookup(model, bundled()) {
124 return w;
125 }
126
127 200_000
129}
130
131#[cfg(test)]
132mod tests {
133 use super::*;
134
135 #[test]
136 fn bundled_registry_parses() {
137 let reg = bundled();
138 assert!(!reg.models.is_empty());
139 assert!(!reg.families.is_empty());
140 }
141
142 #[test]
143 fn exact_match_gpt55() {
144 assert_eq!(context_window_for_model("gpt-5.5"), 1_048_576);
145 }
146
147 #[test]
148 fn prefix_match_gpt55_variant() {
149 assert_eq!(context_window_for_model("gpt-5.5-0513"), 1_048_576);
150 }
151
152 #[test]
153 fn exact_match_gpt41() {
154 assert_eq!(context_window_for_model("gpt-4.1"), 1_047_576);
155 }
156
157 #[test]
158 fn family_match_gpt5() {
159 assert_eq!(context_window_for_model("gpt-5.3-turbo"), 128_000);
160 }
161
162 #[test]
163 fn family_match_claude() {
164 assert_eq!(context_window_for_model("claude-unknown-version"), 200_000);
165 }
166
167 #[test]
168 fn family_match_gemini() {
169 assert_eq!(context_window_for_model("gemini-future-model"), 1_048_576);
170 }
171
172 #[test]
173 fn unknown_model_returns_default() {
174 assert_eq!(
175 context_window_for_model("totally-unknown-model-xyz"),
176 200_000
177 );
178 }
179}