Skip to main content

lean_ctx/core/
model_registry.rs

1use std::collections::HashMap;
2use std::sync::OnceLock;
3
4static BUNDLED_REGISTRY: &str = include_str!("../../data/model_registry.json");
5
6static PARSED_BUNDLED: OnceLock<Registry> = OnceLock::new();
7static PARSED_LOCAL: OnceLock<Option<Registry>> = OnceLock::new();
8
9#[derive(Debug, Clone)]
10struct ModelEntry {
11    context_window: usize,
12}
13
14#[derive(Debug, Clone, Default)]
15struct Registry {
16    models: HashMap<String, ModelEntry>,
17    families: HashMap<String, usize>,
18}
19
20fn parse_registry(json: &str) -> Option<Registry> {
21    let v: serde_json::Value = serde_json::from_str(json).ok()?;
22    let mut models = HashMap::new();
23    if let Some(obj) = v.get("models").and_then(|m| m.as_object()) {
24        for (key, entry) in obj {
25            if let Some(window) = entry
26                .get("context_window")
27                .and_then(serde_json::Value::as_u64)
28            {
29                models.insert(
30                    key.to_lowercase(),
31                    ModelEntry {
32                        context_window: window as usize,
33                    },
34                );
35            }
36        }
37    }
38    let mut families = HashMap::new();
39    if let Some(obj) = v.get("families").and_then(|f| f.as_object()) {
40        for (key, val) in obj {
41            if let Some(window) = val.as_u64() {
42                families.insert(key.to_lowercase(), window as usize);
43            }
44        }
45    }
46    Some(Registry { models, families })
47}
48
49fn bundled() -> &'static Registry {
50    PARSED_BUNDLED.get_or_init(|| parse_registry(BUNDLED_REGISTRY).unwrap_or_default())
51}
52
53fn local_registry() -> Option<&'static Registry> {
54    PARSED_LOCAL
55        .get_or_init(|| {
56            let data_dir = crate::core::data_dir::lean_ctx_data_dir().ok()?;
57            let path = data_dir.join("model_registry.json");
58            let content = std::fs::read_to_string(path).ok()?;
59            parse_registry(&content)
60        })
61        .as_ref()
62}
63
64fn user_config_override(model: &str) -> Option<usize> {
65    let cfg = crate::core::config::Config::load();
66    cfg.model_context_windows
67        .get(model)
68        .or_else(|| cfg.model_context_windows.get(&model.to_lowercase()))
69        .copied()
70}
71
72fn registry_lookup(model: &str, registry: &Registry) -> Option<usize> {
73    let m = model.to_lowercase();
74
75    // Exact match
76    if let Some(entry) = registry.models.get(&m) {
77        return Some(entry.context_window);
78    }
79
80    // Prefix match: "gpt-5.5-0513" should match "gpt-5.5"
81    let mut best_match: Option<(usize, usize)> = None; // (key_len, window)
82    for (key, entry) in &registry.models {
83        if m.starts_with(key.as_str()) && m[key.len()..].starts_with(['-', '_', '.']) || m == *key {
84            let key_len = key.len();
85            if best_match.is_none_or(|(bl, _)| key_len > bl) {
86                best_match = Some((key_len, entry.context_window));
87            }
88        }
89    }
90    if let Some((_, window)) = best_match {
91        return Some(window);
92    }
93
94    // Family match (substring)
95    let mut best_family: Option<(usize, usize)> = None;
96    for (family, window) in &registry.families {
97        if m.contains(family.as_str()) {
98            let flen = family.len();
99            if best_family.is_none_or(|(bl, _)| flen > bl) {
100                best_family = Some((flen, *window));
101            }
102        }
103    }
104    best_family.map(|(_, w)| w)
105}
106
107/// Look up context window for a model name.
108/// Layers: User Config → Local Registry → Bundled Registry → 200k default.
109pub fn context_window_for_model(model: &str) -> usize {
110    // Layer 1: User config override
111    if let Some(w) = user_config_override(model) {
112        return w;
113    }
114
115    // Layer 2: Local registry (auto-updated via lean-ctx update)
116    if let Some(local) = local_registry() {
117        if let Some(w) = registry_lookup(model, local) {
118            return w;
119        }
120    }
121
122    // Layer 3: Bundled registry (compiled into binary)
123    if let Some(w) = registry_lookup(model, bundled()) {
124        return w;
125    }
126
127    // Fallback
128    200_000
129}
130
131#[cfg(test)]
132mod tests {
133    use super::*;
134
135    #[test]
136    fn bundled_registry_parses() {
137        let reg = bundled();
138        assert!(!reg.models.is_empty());
139        assert!(!reg.families.is_empty());
140    }
141
142    #[test]
143    fn exact_match_gpt55() {
144        assert_eq!(context_window_for_model("gpt-5.5"), 1_048_576);
145    }
146
147    #[test]
148    fn prefix_match_gpt55_variant() {
149        assert_eq!(context_window_for_model("gpt-5.5-0513"), 1_048_576);
150    }
151
152    #[test]
153    fn exact_match_gpt41() {
154        assert_eq!(context_window_for_model("gpt-4.1"), 1_047_576);
155    }
156
157    #[test]
158    fn family_match_gpt5() {
159        assert_eq!(context_window_for_model("gpt-5.3-turbo"), 128_000);
160    }
161
162    #[test]
163    fn family_match_claude() {
164        assert_eq!(context_window_for_model("claude-unknown-version"), 200_000);
165    }
166
167    #[test]
168    fn family_match_gemini() {
169        assert_eq!(context_window_for_model("gemini-future-model"), 1_048_576);
170    }
171
172    #[test]
173    fn unknown_model_returns_default() {
174        assert_eq!(
175            context_window_for_model("totally-unknown-model-xyz"),
176            200_000
177        );
178    }
179}