Skip to main content

bamboo_engine/
model_areas.rs

1//! Single, explicit boundary between **global** and **session-bound** model
2//! configuration.
3//!
4//! The product configures models for many distinct *areas* (chat, fast,
5//! task-summary, memory-background, vision, sub-agent, …). The scope rules are:
6//!
7//! - **Session-bound** — ONLY the main *chat* model + *reasoning effort*. A
8//!   session may override these; they cascade `session → request → provider
9//!   default` (see `session_app::execute`).
10//! - **Global** — every *auxiliary* area (fast, task-summary, memory-background,
11//!   vision, sub-agent). These are read from server config (`defaults.<area>`
12//!   with a provider/global fallback) and **must never be read from a session**.
13//!
14//! This module is the one place that enforces that split. The resolver here
15//! takes no [`Session`](bamboo_domain::Session) and *cannot* — so an auxiliary
16//! area model can't accidentally start tracking a per-session override. The
17//! underlying per-area logic lives in [`crate::model_config_helper`]; this layer
18//! groups the auxiliary trio that ~8 call sites previously resolved by hand.
19
20use std::sync::Arc;
21
22use bamboo_domain::reasoning::ReasoningEffort;
23use bamboo_domain::ProviderModelRef;
24use bamboo_llm::{Config, ProviderRegistry, ResolvedModel};
25
26use crate::model_config_helper::{
27    resolve_background_model, resolve_fast_model, resolve_subagent_model,
28    resolve_task_summary_model, resolve_vision_model,
29};
30
31/// The auxiliary (non-chat) models, all resolved from **global** config for a
32/// given provider routing key. None of these are session-bound.
33///
34/// Each `*_ref` is the configured `defaults.<area>` [`ProviderModelRef`] (or
35/// `None` in legacy mode), kept alongside the resolved model so callers that
36/// snapshot the reference (e.g. the execute config snapshot) don't re-read it.
37pub struct GlobalAreaModels {
38    /// Fast/cheap model — title generation, lightweight tasks.
39    pub fast: Option<ResolvedModel>,
40    pub fast_ref: Option<ProviderModelRef>,
41    /// Memory/background model — reflection, background memory work.
42    pub background: Option<ResolvedModel>,
43    pub background_ref: Option<ProviderModelRef>,
44    /// Task-summary model — conversation/task summarization and compression.
45    pub summarization: Option<ResolvedModel>,
46    pub summarization_ref: Option<ProviderModelRef>,
47}
48
49/// Resolve the auxiliary area models from **global** config.
50///
51/// `provider_name` is only the routing/fallback key (request's provider, or the
52/// globally active provider). It selects *which provider's* global config to
53/// fall back to when an area is unconfigured — it is never a session value, and
54/// `defaults.<area>` (when set) wins regardless of it.
55///
56/// Deliberately takes no `Session`: auxiliary models are global by design.
57pub fn resolve_global_area_models(
58    config: &Config,
59    provider_name: &str,
60    provider_registry: &Arc<ProviderRegistry>,
61) -> GlobalAreaModels {
62    let defaults = config.defaults.as_ref();
63    GlobalAreaModels {
64        fast: resolve_fast_model(config, provider_name, provider_registry),
65        fast_ref: defaults.and_then(|d| d.fast.clone()),
66        background: resolve_background_model(config, provider_name, provider_registry),
67        background_ref: defaults.and_then(|d| d.memory_background.clone()),
68        summarization: resolve_task_summary_model(config, provider_name, provider_registry),
69        summarization_ref: defaults.and_then(|d| d.task_summary.clone()),
70    }
71}
72
73/// Vision model — **global**. Resolved on demand (only when a request actually
74/// carries an image), so it is not part of [`GlobalAreaModels`]'s eager trio.
75/// Still global-only: takes no session. `provider_name` is the fallback key.
76pub fn resolve_global_vision_model(
77    config: &Config,
78    provider_name: &str,
79    provider_registry: &Arc<ProviderRegistry>,
80) -> Option<ResolvedModel> {
81    resolve_vision_model(config, provider_name, provider_registry)
82}
83
84/// Sub-agent model for a given subagent type — **global**. Resolved on demand
85/// at spawn time. Global-only: takes no session. `provider_name` is the
86/// fallback key; `subagent_type` selects a per-type override under
87/// `defaults.subagent_models`.
88pub fn resolve_global_subagent_model(
89    config: &Config,
90    provider_name: &str,
91    provider_registry: &Arc<ProviderRegistry>,
92    subagent_type: &str,
93) -> Option<ResolvedModel> {
94    resolve_subagent_model(config, provider_name, provider_registry, subagent_type)
95}
96
97/// The source layer a resolved reasoning effort came from. Surfaced in session
98/// metadata (`reasoning_effort_source`) for observability.
99#[derive(Debug, Clone, Copy, PartialEq, Eq)]
100pub enum ReasoningEffortSource {
101    Session,
102    Request,
103    ProviderDefault,
104    None,
105}
106
107impl ReasoningEffortSource {
108    pub fn as_str(self) -> &'static str {
109        match self {
110            Self::Session => "session",
111            Self::Request => "request",
112            Self::ProviderDefault => "provider_default",
113            Self::None => "none",
114        }
115    }
116}
117
118/// The single reasoning-effort cascade: `session → request → provider default`.
119///
120/// Returns `None` when nothing is configured (so non-reasoning models send no
121/// reasoning parameter). When a *concrete* terminal value is required (e.g. the
122/// UI display), callers should fall back to
123/// [`bamboo_domain::DEFAULT_REASONING_EFFORT`] — that is the one canonical
124/// `"medium"`, not a level hardcoded at the call site.
125pub fn resolve_effective_reasoning_effort(
126    session_effort: Option<ReasoningEffort>,
127    request_effort: Option<ReasoningEffort>,
128    provider_default: Option<ReasoningEffort>,
129) -> (Option<ReasoningEffort>, ReasoningEffortSource) {
130    if let Some(effort) = session_effort {
131        (Some(effort), ReasoningEffortSource::Session)
132    } else if let Some(effort) = request_effort {
133        (Some(effort), ReasoningEffortSource::Request)
134    } else if let Some(effort) = provider_default {
135        (Some(effort), ReasoningEffortSource::ProviderDefault)
136    } else {
137        (None, ReasoningEffortSource::None)
138    }
139}
140
141#[cfg(test)]
142mod tests {
143    use super::*;
144    use bamboo_agent_core::tools::ToolSchema;
145    use bamboo_agent_core::Message;
146    use bamboo_config::{DefaultsConfig, FeatureFlags};
147    use bamboo_config::{OpenAIConfig, ProviderConfigs};
148    use bamboo_domain::{Session, DEFAULT_REASONING_EFFORT};
149    use bamboo_llm::{LLMError, LLMProvider, LLMStream};
150    use std::collections::HashMap;
151
152    struct NoopProvider;
153
154    #[async_trait::async_trait]
155    impl LLMProvider for NoopProvider {
156        async fn chat_stream(
157            &self,
158            _messages: &[Message],
159            _tools: &[ToolSchema],
160            _max_output_tokens: Option<u32>,
161            _model: &str,
162        ) -> Result<LLMStream, LLMError> {
163            Err(LLMError::Api("noop".to_string()))
164        }
165    }
166
167    fn test_registry() -> Arc<ProviderRegistry> {
168        let mut providers: HashMap<String, Arc<dyn LLMProvider>> = HashMap::new();
169        providers.insert("openai".to_string(), Arc::new(NoopProvider));
170        Arc::new(ProviderRegistry::new(providers, "openai".to_string()))
171    }
172
173    fn defaults_with_all_areas() -> DefaultsConfig {
174        DefaultsConfig {
175            chat: ProviderModelRef::new("openai", "gpt-chat"),
176            fast: Some(ProviderModelRef::new("openai", "gpt-fast")),
177            task_summary: Some(ProviderModelRef::new("openai", "gpt-summary")),
178            vision: Some(ProviderModelRef::new("openai", "gpt-vision")),
179            memory_background: Some(ProviderModelRef::new("openai", "gpt-memory")),
180            planning: None,
181            search: None,
182            code_review: None,
183            sub_agent: Some(ProviderModelRef::new("openai", "gpt-sub")),
184            subagent_models: HashMap::new(),
185        }
186    }
187
188    fn config_with_defaults(defaults: DefaultsConfig) -> Config {
189        Config {
190            provider: "openai".to_string(),
191            features: FeatureFlags {
192                provider_model_ref: true,
193                ..Default::default()
194            },
195            defaults: Some(defaults),
196            ..Config::default()
197        }
198    }
199
200    // ---- Global area models read from defaults.<area> ----
201
202    #[test]
203    fn global_area_models_read_each_area_from_its_own_default() {
204        let config = config_with_defaults(defaults_with_all_areas());
205        let areas = resolve_global_area_models(&config, "openai", &test_registry());
206
207        assert_eq!(
208            areas.fast.as_ref().map(|m| m.model_name.as_str()),
209            Some("gpt-fast")
210        );
211        assert_eq!(
212            areas.summarization.as_ref().map(|m| m.model_name.as_str()),
213            Some("gpt-summary")
214        );
215        assert_eq!(
216            areas.background.as_ref().map(|m| m.model_name.as_str()),
217            Some("gpt-memory")
218        );
219        // The kept refs match the configured defaults.
220        assert_eq!(
221            areas.fast_ref,
222            Some(ProviderModelRef::new("openai", "gpt-fast"))
223        );
224        assert_eq!(
225            areas.summarization_ref,
226            Some(ProviderModelRef::new("openai", "gpt-summary"))
227        );
228        assert_eq!(
229            areas.background_ref,
230            Some(ProviderModelRef::new("openai", "gpt-memory"))
231        );
232    }
233
234    /// The core invariant the user asked for: auxiliary models are GLOBAL —
235    /// they do not change with the session. We resolve once, then again after
236    /// constructing a session that picks a totally different chat model, and
237    /// assert the auxiliary models are byte-for-byte identical. (The resolver
238    /// has no `Session` parameter, so this is also enforced at compile time;
239    /// this test guards against a future signature change.)
240    #[test]
241    fn global_area_models_are_independent_of_any_session() {
242        let config = config_with_defaults(defaults_with_all_areas());
243        let registry = test_registry();
244
245        let before = resolve_global_area_models(&config, "openai", &registry);
246
247        // A session whose chat model is something exotic must not influence aux.
248        let mut session = Session::new("s1", "some-exotic-session-model");
249        session.model_ref = Some(ProviderModelRef::new("openai", "some-exotic-session-model"));
250        session.reasoning_effort = Some(ReasoningEffort::Max);
251        let _ = &session; // it is intentionally NOT passed to the resolver
252
253        let after = resolve_global_area_models(&config, "openai", &registry);
254
255        assert_eq!(
256            before.fast.as_ref().map(|m| m.model_name.clone()),
257            after.fast.as_ref().map(|m| m.model_name.clone())
258        );
259        assert_eq!(
260            before.background.as_ref().map(|m| m.model_name.clone()),
261            after.background.as_ref().map(|m| m.model_name.clone())
262        );
263        assert_eq!(
264            before.summarization.as_ref().map(|m| m.model_name.clone()),
265            after.summarization.as_ref().map(|m| m.model_name.clone())
266        );
267        // And definitely not the session's chat model.
268        assert_ne!(
269            after.fast.as_ref().map(|m| m.model_name.as_str()),
270            Some("some-exotic-session-model")
271        );
272    }
273
274    #[test]
275    fn vision_model_is_global_from_defaults() {
276        let config = config_with_defaults(defaults_with_all_areas());
277        let vision = resolve_global_vision_model(&config, "openai", &test_registry());
278        assert_eq!(
279            vision.as_ref().map(|m| m.model_name.as_str()),
280            Some("gpt-vision")
281        );
282    }
283
284    #[test]
285    fn subagent_model_is_global_from_defaults() {
286        let config = config_with_defaults(defaults_with_all_areas());
287        // No per-type override → falls back to defaults.sub_agent (global).
288        let sub = resolve_global_subagent_model(&config, "openai", &test_registry(), "coder");
289        assert_eq!(sub.as_ref().map(|m| m.model_name.as_str()), Some("gpt-sub"));
290    }
291
292    #[test]
293    fn background_falls_back_to_fast_when_memory_background_unset() {
294        let mut defaults = defaults_with_all_areas();
295        defaults.memory_background = None;
296        let config = config_with_defaults(defaults);
297
298        let areas = resolve_global_area_models(&config, "openai", &test_registry());
299        // memory_background unset → falls back to defaults.fast.
300        assert_eq!(
301            areas.background.as_ref().map(|m| m.model_name.as_str()),
302            Some("gpt-fast")
303        );
304    }
305
306    #[test]
307    fn legacy_mode_resolves_fast_from_provider_config() {
308        // Flag OFF: no `defaults`, fast comes from the provider's global config.
309        let config = Config {
310            provider: "openai".to_string(),
311            features: FeatureFlags {
312                provider_model_ref: false,
313                ..Default::default()
314            },
315            defaults: None,
316            providers: ProviderConfigs {
317                openai: Some(OpenAIConfig {
318                    api_key: "test".to_string(),
319                    api_key_encrypted: None,
320                    base_url: None,
321                    model: Some("gpt-4o".to_string()),
322                    fast_model: Some("gpt-4o-mini".to_string()),
323                    vision_model: None,
324                    reasoning_effort: None,
325                    responses_only_models: vec![],
326                    request_overrides: None,
327                    extra: Default::default(),
328                }),
329                ..ProviderConfigs::default()
330            },
331            ..Config::default()
332        };
333
334        let areas = resolve_global_area_models(&config, "openai", &test_registry());
335        assert_eq!(
336            areas.fast.as_ref().map(|m| m.model_name.as_str()),
337            Some("gpt-4o-mini")
338        );
339    }
340
341    // ---- reasoning effort cascade ----
342
343    #[test]
344    fn reasoning_prefers_session_then_request_then_provider() {
345        assert_eq!(
346            resolve_effective_reasoning_effort(
347                Some(ReasoningEffort::Max),
348                Some(ReasoningEffort::High),
349                Some(ReasoningEffort::Low),
350            ),
351            (Some(ReasoningEffort::Max), ReasoningEffortSource::Session)
352        );
353        assert_eq!(
354            resolve_effective_reasoning_effort(
355                None,
356                Some(ReasoningEffort::High),
357                Some(ReasoningEffort::Low),
358            ),
359            (Some(ReasoningEffort::High), ReasoningEffortSource::Request)
360        );
361        assert_eq!(
362            resolve_effective_reasoning_effort(None, None, Some(ReasoningEffort::Low)),
363            (
364                Some(ReasoningEffort::Low),
365                ReasoningEffortSource::ProviderDefault
366            )
367        );
368    }
369
370    #[test]
371    fn reasoning_none_when_nothing_configured() {
372        let (effort, source) = resolve_effective_reasoning_effort(None, None, None);
373        assert_eq!(effort, None);
374        assert_eq!(source, ReasoningEffortSource::None);
375    }
376
377    #[test]
378    fn canonical_default_is_medium_and_used_as_terminal() {
379        // The one place "medium" is defined; callers needing a concrete value
380        // use this rather than hardcoding a level.
381        assert_eq!(DEFAULT_REASONING_EFFORT, ReasoningEffort::Medium);
382        let (effort, _) = resolve_effective_reasoning_effort(None, None, None);
383        assert_eq!(
384            effort.unwrap_or(DEFAULT_REASONING_EFFORT),
385            ReasoningEffort::Medium
386        );
387    }
388}