Skip to main content

mnemo_core/budget/
models.rs

1//! Per-model context-window table (v0.4.1 P1-4).
2//!
3//! Operators override via a `models.toml` shipped alongside their
4//! deployment; the constant table here is the fallback used when
5//! none is provided. Drift in vendor numbers is the main risk —
6//! the table is small and keyed by stable `ModelId` so a single
7//! rebase fixes the whole crate.
8
9use serde::{Deserialize, Serialize};
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
12pub enum ModelId {
13    Gpt5_1_400k,
14    Gpt5_1_128k,
15    Claude3_7Sonnet1m,
16    Claude3_7Sonnet200k,
17    Gemini2_5Pro2m,
18    Gemini2_5Pro1m,
19    DeepSeekV4_1m,
20    DeepSeekV3_128k,
21}
22
23impl ModelId {
24    pub fn as_str(&self) -> &'static str {
25        match self {
26            ModelId::Gpt5_1_400k => "gpt-5.1-400k",
27            ModelId::Gpt5_1_128k => "gpt-5.1-128k",
28            ModelId::Claude3_7Sonnet1m => "claude-3.7-sonnet-1m",
29            ModelId::Claude3_7Sonnet200k => "claude-3.7-sonnet-200k",
30            ModelId::Gemini2_5Pro2m => "gemini-2.5-pro-2m",
31            ModelId::Gemini2_5Pro1m => "gemini-2.5-pro-1m",
32            ModelId::DeepSeekV4_1m => "deepseek-v4-1m",
33            ModelId::DeepSeekV3_128k => "deepseek-v3-128k",
34        }
35    }
36}
37
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
39pub struct ContextWindow {
40    pub total_tokens: u32,
41    /// Recommended system-prompt reserve.
42    pub system_reserve: u32,
43    /// Recommended response reserve.
44    pub response_reserve: u32,
45}
46
47/// Default per-model windows the planner uses when no override is
48/// provided. Numbers checked against vendor docs as of 2026-04-28;
49/// the planner is parameterised by `ContextBudget` so deployments
50/// with different reserves don't need to ship a code change.
51pub const MODEL_TABLE: &[(ModelId, ContextWindow)] = &[
52    (
53        ModelId::Gpt5_1_400k,
54        ContextWindow {
55            total_tokens: 400_000,
56            system_reserve: 8_000,
57            response_reserve: 16_000,
58        },
59    ),
60    (
61        ModelId::Gpt5_1_128k,
62        ContextWindow {
63            total_tokens: 128_000,
64            system_reserve: 4_000,
65            response_reserve: 8_000,
66        },
67    ),
68    (
69        ModelId::Claude3_7Sonnet1m,
70        ContextWindow {
71            total_tokens: 1_000_000,
72            system_reserve: 16_000,
73            response_reserve: 32_000,
74        },
75    ),
76    (
77        ModelId::Claude3_7Sonnet200k,
78        ContextWindow {
79            total_tokens: 200_000,
80            system_reserve: 8_000,
81            response_reserve: 16_000,
82        },
83    ),
84    (
85        ModelId::Gemini2_5Pro2m,
86        ContextWindow {
87            total_tokens: 2_000_000,
88            system_reserve: 16_000,
89            response_reserve: 32_000,
90        },
91    ),
92    (
93        ModelId::Gemini2_5Pro1m,
94        ContextWindow {
95            total_tokens: 1_000_000,
96            system_reserve: 8_000,
97            response_reserve: 16_000,
98        },
99    ),
100    (
101        ModelId::DeepSeekV4_1m,
102        ContextWindow {
103            total_tokens: 1_000_000,
104            system_reserve: 8_000,
105            response_reserve: 24_000,
106        },
107    ),
108    (
109        ModelId::DeepSeekV3_128k,
110        ContextWindow {
111            total_tokens: 128_000,
112            system_reserve: 4_000,
113            response_reserve: 8_000,
114        },
115    ),
116];
117
118pub fn lookup(model: ModelId) -> ContextWindow {
119    MODEL_TABLE
120        .iter()
121        .find(|(m, _)| *m == model)
122        .map(|(_, w)| *w)
123        .unwrap_or(ContextWindow {
124            total_tokens: 128_000,
125            system_reserve: 4_000,
126            response_reserve: 8_000,
127        })
128}
129
130#[cfg(test)]
131mod tests {
132    use super::*;
133
134    #[test]
135    fn deepseek_v4_table_entry_is_1m() {
136        let w = lookup(ModelId::DeepSeekV4_1m);
137        assert_eq!(w.total_tokens, 1_000_000);
138    }
139
140    #[test]
141    fn every_model_has_distinct_string_id() {
142        let mut seen = std::collections::HashSet::new();
143        for (m, _) in MODEL_TABLE {
144            assert!(
145                seen.insert(m.as_str()),
146                "duplicate model id: {}",
147                m.as_str()
148            );
149        }
150    }
151
152    #[test]
153    fn unknown_model_falls_back_safely() {
154        // Function takes ModelId by value; unknown models would only
155        // appear if the enum gained a variant we forgot to wire.
156        // The fallback path is exercised by `lookup` returning the
157        // 128k default when iteration misses.
158        // (Sanity: every enumerated variant returns a non-default
159        // window.)
160        for (m, _) in MODEL_TABLE {
161            let w = lookup(*m);
162            assert!(w.total_tokens > 0);
163        }
164    }
165}