Skip to main content

bamboo_engine/runtime/
config.rs

1use std::collections::BTreeSet;
2use std::path::PathBuf;
3use std::sync::Arc;
4
5use bamboo_agent_core::composition::CompositionExecutor;
6use bamboo_agent_core::storage::AttachmentReader;
7use bamboo_agent_core::storage::Storage;
8use bamboo_agent_core::tools::ToolSchema;
9use bamboo_agent_core::GoldConfidence;
10use bamboo_compression::TokenBudget;
11use bamboo_config::MemoryConfig;
12use bamboo_config::PermissionMode;
13use bamboo_domain::ReasoningEffort;
14use bamboo_domain::RuntimeSessionPersistence;
15use bamboo_llm::LLMProvider;
16use bamboo_metrics::MetricsCollector;
17use bamboo_skills::SkillManager;
18use bamboo_tools::ToolRegistry;
19use serde::{Deserialize, Serialize};
20
21#[derive(Clone, Default)]
22pub struct AuxiliaryModelConfig {
23    pub fast_model_name: Option<String>,
24    pub fast_model_provider: Option<Arc<dyn LLMProvider>>,
25    pub background_model_name: Option<String>,
26    pub planning_model_name: Option<String>,
27    pub search_model_name: Option<String>,
28    pub summarization_model_name: Option<String>,
29    pub background_model_provider: Option<Arc<dyn LLMProvider>>,
30    pub summarization_model_provider: Option<Arc<dyn LLMProvider>>,
31}
32
33fn default_gold_max_output_tokens() -> u32 {
34    1024
35}
36
37fn default_gold_max_auto_continuations() -> u32 {
38    3
39}
40
41fn default_gold_min_confidence() -> GoldConfidence {
42    GoldConfidence::Medium
43}
44
45#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
46#[serde(default)]
47pub struct GoldConfig {
48    /// Master switch for Gold observe-only evaluation.
49    #[serde(default)]
50    pub enabled: bool,
51    /// Independent switch for Phase 2 low-risk auto-answer.
52    ///
53    /// Kept separate from `enabled` so Phase 1 observe-only users do not
54    /// implicitly opt into automatic clarification responses.
55    #[serde(default)]
56    pub auto_answer_enabled: bool,
57    /// Independent switch for Phase 3 server-side auto-continue.
58    ///
59    /// Kept separate from both `enabled` and `auto_answer_enabled` so users can
60    /// opt into terminal auto-resume explicitly without enabling other Gold
61    /// automation behaviors.
62    #[serde(default)]
63    pub auto_continue_enabled: bool,
64    /// Optional dedicated model for Gold evaluation. Falls back to fast model,
65    /// then the main chat model when absent.
66    #[serde(default, skip_serializing_if = "Option::is_none")]
67    pub model_name: Option<String>,
68    /// The user's goal for this session.
69    ///
70    /// Unlike `evaluation_prompt` (which only tunes the *judge*), the goal is
71    /// surfaced to the *main* executing agent as a persistent system-prompt
72    /// block so it actively works toward it. The Gold evaluator also measures
73    /// progress against this text.
74    #[serde(default, skip_serializing_if = "Option::is_none")]
75    pub goal: Option<String>,
76    /// Optional custom prompt suffix appended to the built-in Gold evaluator
77    /// prompt. This tunes the judge only; it does not set the goal.
78    #[serde(default, skip_serializing_if = "Option::is_none")]
79    pub evaluation_prompt: Option<String>,
80    /// Output token limit for the Gold evaluator call.
81    #[serde(default = "default_gold_max_output_tokens")]
82    pub max_output_tokens: u32,
83    /// Maximum number of automatic Gold continuations allowed per session.
84    #[serde(default = "default_gold_max_auto_continuations")]
85    pub max_auto_continuations: u32,
86    /// Minimum evaluator confidence required before Gold auto-continues or
87    /// auto-answers. Defaults to `medium` so the loop fires on reasonably
88    /// confident verdicts rather than only `high`.
89    #[serde(default = "default_gold_min_confidence")]
90    pub min_auto_continue_confidence: GoldConfidence,
91}
92
93impl Default for GoldConfig {
94    fn default() -> Self {
95        Self {
96            enabled: false,
97            auto_answer_enabled: false,
98            auto_continue_enabled: false,
99            model_name: None,
100            goal: None,
101            evaluation_prompt: None,
102            max_output_tokens: default_gold_max_output_tokens(),
103            max_auto_continuations: default_gold_max_auto_continuations(),
104            min_auto_continue_confidence: default_gold_min_confidence(),
105        }
106    }
107}
108
109impl GoldConfig {
110    /// The session goal text, falling back to the legacy `evaluation_prompt`
111    /// for sessions created before the dedicated `goal` field existed.
112    ///
113    /// Returns `None` when neither field holds non-empty text.
114    pub fn effective_goal(&self) -> Option<&str> {
115        self.goal
116            .as_deref()
117            .or(self.evaluation_prompt.as_deref())
118            .map(str::trim)
119            .filter(|value| !value.is_empty())
120    }
121}
122
123#[derive(Debug, Clone, Copy, PartialEq, Eq)]
124pub enum ImageFallbackMode {
125    Placeholder,
126    Error,
127    Ocr,
128    /// Use a vision-capable LLM to describe the image, then replace the image
129    /// with the textual description so that text-only models can understand
130    /// the content.
131    Vision,
132}
133
134#[derive(Debug, Clone, PartialEq, Eq)]
135pub struct ImageFallbackConfig {
136    pub mode: ImageFallbackMode,
137    /// Vision model name for `Vision` mode. Falls back to the session's main model
138    /// when `None`.
139    pub vision_model: Option<String>,
140}
141
142#[derive(Debug, Clone, Copy, PartialEq, Eq)]
143pub struct PromptMemoryFlags {
144    pub project_prompt_injection: bool,
145    pub relevant_recall: bool,
146    pub relevant_recall_rerank: bool,
147    pub project_first_dream: bool,
148}
149
150impl Default for PromptMemoryFlags {
151    fn default() -> Self {
152        Self {
153            project_prompt_injection: true,
154            relevant_recall: true,
155            relevant_recall_rerank: false,
156            project_first_dream: true,
157        }
158    }
159}
160
161impl From<&MemoryConfig> for PromptMemoryFlags {
162    fn from(value: &MemoryConfig) -> Self {
163        Self {
164            project_prompt_injection: value.project_prompt_injection,
165            relevant_recall: value.relevant_recall,
166            relevant_recall_rerank: value.relevant_recall_rerank,
167            project_first_dream: value.project_first_dream,
168        }
169    }
170}
171
172/// Configuration for the agent loop.
173#[non_exhaustive]
174pub struct AgentLoopConfig {
175    pub(crate) max_rounds: usize,
176    pub(crate) system_prompt: Option<String>,
177    /// Skill IDs that are disabled globally for this execution.
178    pub(crate) disabled_skill_ids: BTreeSet<String>,
179    /// Optional explicit skill selection for this execution.
180    /// When set, only these skill IDs are considered for skill context and allowlists.
181    pub(crate) selected_skill_ids: Option<Vec<String>>,
182    /// Optional active skill mode for this execution.
183    ///
184    /// When set, skill discovery prefers `skills-<mode>` directories over generic
185    /// directories for the same skill id.
186    pub(crate) selected_skill_mode: Option<String>,
187    pub(crate) additional_tool_schemas: Vec<ToolSchema>,
188    pub(crate) tool_registry: Arc<ToolRegistry>,
189    pub(crate) composition_executor: Option<Arc<CompositionExecutor>>,
190    pub(crate) skill_manager: Option<Arc<SkillManager>>,
191    /// If true, skip appending the initial user message (already present in session).
192    pub(crate) skip_initial_user_message: bool,
193    /// Optional storage for persisting session changes
194    pub(crate) storage: Option<Arc<dyn Storage>>,
195    /// Optional runtime persistence for non-authoritative session saves.
196    /// When set, engine save sites use this instead of `storage` for writes.
197    pub(crate) persistence: Option<Arc<dyn RuntimeSessionPersistence>>,
198    /// Optional attachment reader for resolving `bamboo-attachment://...` references
199    /// into `data:` URLs for upstream providers. This must not mutate session storage.
200    pub(crate) attachment_reader: Option<Arc<dyn AttachmentReader>>,
201    /// Optional asynchronous metrics collector
202    pub(crate) metrics_collector: Option<MetricsCollector>,
203    /// Model name used for metrics attribution
204    pub(crate) model_name: Option<String>,
205    /// Fast/cheap model for lightweight tasks (task evaluation, search, etc.).
206    ///
207    /// Call sites may fall back to `model_name` when this is unset.
208    pub(crate) fast_model_name: Option<String>,
209    /// Optional provider override for lightweight fast-model LLM calls.
210    pub(crate) fast_model_provider: Option<Arc<dyn LLMProvider>>,
211    /// Fast/cheap model for memory/background tasks.
212    ///
213    /// This must not silently fall back to the main interaction model.
214    pub(crate) background_model_name: Option<String>,
215
216    /// Model for planning/coordination tasks (task decomposition, architecture).
217    /// Falls back to `model_name` when unset.
218    pub(crate) planning_model_name: Option<String>,
219    /// Model for search/navigation tasks (grep, file listing, symbol resolution).
220    /// Falls back to `fast_model_name` when unset.
221    pub(crate) search_model_name: Option<String>,
222    /// Custom instructions for conversation summarization, injected into the
223    /// LLM summary prompt. Lets users control what the summary focuses on.
224    ///
225    /// Resolution order: session-level > config-level > built-in defaults.
226    pub(crate) compression_instructions: Option<String>,
227    /// Dedicated model for summarization. Falls back to `background_model_name`.
228    pub(crate) summarization_model_name: Option<String>,
229    /// Optional provider override for memory/background model LLM calls.
230    ///
231    /// When set, memory recall rerank and other memory/background tasks use this
232    /// provider instead of the shared agent loop provider.
233    pub(crate) background_model_provider: Option<Arc<dyn LLMProvider>>,
234    /// Optional provider override for summarization / context compression calls.
235    ///
236    /// When set, conversation/task summarization uses this provider instead of
237    /// the shared agent loop provider.
238    pub(crate) summarization_model_provider: Option<Arc<dyn LLMProvider>>,
239    /// Provider routing key used for provider-specific request behavior.
240    ///
241    /// In multi-instance mode this may be the instance id.
242    pub(crate) provider_name: Option<String>,
243    /// Underlying provider type (for example `openai`, `anthropic`, `copilot`).
244    ///
245    /// This is distinct from `provider_name` so provider-specific behavior can
246    /// remain correct when routing keys are instance ids.
247    pub(crate) provider_type: Option<String>,
248    /// Optional request-time reasoning effort override.
249    pub(crate) reasoning_effort: Option<ReasoningEffort>,
250    /// Bamboo application data directory (typically `~/.bamboo`).
251    ///
252    /// Used by runtime features that persist auxiliary artifacts outside the
253    /// session store, such as durable plan mode files under `~/.bamboo/plan`.
254    pub(crate) app_data_dir: Option<PathBuf>,
255    /// Tool names that should be excluded from schemas sent to the LLM.
256    pub(crate) disabled_tools: BTreeSet<String>,
257    /// Token budget for context management (optional, defaults to model's limits)
258    pub(crate) token_budget: Option<TokenBudget>,
259    /// Optional image fallback behavior applied to *LLM requests only* (never persisted).
260    ///
261    /// This is intended for text-only provider paths where image parts must be degraded
262    /// (placeholder / OCR / error) without leaking into stored session history or UI.
263    pub(crate) image_fallback: Option<ImageFallbackConfig>,
264    /// Feature flags controlling prompt-time memory injection behavior.
265    pub(crate) prompt_memory_flags: PromptMemoryFlags,
266    /// Maximum tool calls allowed per round (default: 80).
267    pub(crate) max_tool_calls_per_round: usize,
268    /// Maximum consecutive failures per tool before circuit breaker (default: 3).
269    pub(crate) max_consecutive_failures_per_tool: usize,
270    /// Tool names that require strict argument validation.
271    pub(crate) strict_argument_tool_names: Vec<String>,
272    /// Per-tool execution timeout in seconds (default: 120).
273    pub(crate) per_tool_timeout_secs: u64,
274    /// Parallel batch execution timeout in seconds (default: 300).
275    pub(crate) parallel_batch_timeout_secs: u64,
276    /// Permission mode for this execution (default: None = use PermissionConfig's mode).
277    pub(crate) permission_mode: Option<PermissionMode>,
278    /// Optional Gold observe-only evaluator configuration.
279    ///
280    /// When `None` or `enabled == false`, Gold evaluation is disabled and the
281    /// existing execute/respond/resume loop remains unchanged.
282    pub(crate) gold_config: Option<GoldConfig>,
283    /// Enable dynamic per-round model routing based on task complexity.
284    /// When true, the pipeline classifies complexity at each round end and
285    /// stores the result in session metadata.
286    pub(crate) features_dynamic_model_routing: bool,
287    /// Optional per-round resolver for auxiliary model settings that should
288    /// follow live global config rather than stay frozen for the whole run.
289    ///
290    /// The main chat model remains session/request scoped; this hook is only
291    /// for fast/background/planning/search/summarization helpers.
292    pub(crate) auxiliary_model_resolver:
293        Option<Arc<dyn Fn() -> AuxiliaryModelConfig + Send + Sync>>,
294    /// Server-level usage guidance contributed by the run's tool executor —
295    /// chiefly the `instructions` connected MCP servers return from `initialize`.
296    /// Captured once at config construction (from `ToolExecutor::tool_guidance`)
297    /// and appended to the tool-guide section of the system prompt, so a server's
298    /// own how-to-use notes appear only while that server is loaded for the run.
299    pub(crate) mcp_tool_guidance: Option<String>,
300}
301
302impl Default for AgentLoopConfig {
303    fn default() -> Self {
304        Self {
305            max_rounds: 200,
306            system_prompt: None,
307            disabled_skill_ids: BTreeSet::new(),
308            selected_skill_ids: None,
309            selected_skill_mode: None,
310            additional_tool_schemas: Vec::new(),
311            tool_registry: Arc::new(ToolRegistry::new()),
312            composition_executor: None,
313            skill_manager: None,
314            skip_initial_user_message: false,
315            storage: None,
316            persistence: None,
317            attachment_reader: None,
318            metrics_collector: None,
319            model_name: None,
320            fast_model_name: None,
321            fast_model_provider: None,
322            background_model_name: None,
323            planning_model_name: None,
324            search_model_name: None,
325            compression_instructions: None,
326            summarization_model_name: None,
327            background_model_provider: None,
328            summarization_model_provider: None,
329            provider_name: None,
330            provider_type: None,
331            reasoning_effort: None,
332            app_data_dir: None,
333            disabled_tools: BTreeSet::new(),
334            token_budget: None,
335            image_fallback: None,
336            prompt_memory_flags: PromptMemoryFlags::default(),
337            max_tool_calls_per_round: 80,
338            max_consecutive_failures_per_tool: 3,
339            strict_argument_tool_names: vec![
340                "Write".into(),
341                "Edit".into(),
342                "NotebookEdit".into(),
343                "apply_patch".into(),
344                "Bash".into(),
345                "Task".into(),
346                "SubAgent".into(),
347                "scheduler".into(),
348                "sub_session_manager".into(),
349                "session_note".into(),
350                "memory_note".into(),
351            ],
352            per_tool_timeout_secs: 120,
353            parallel_batch_timeout_secs: 300,
354            permission_mode: None,
355            gold_config: None,
356            features_dynamic_model_routing: false,
357            auxiliary_model_resolver: None,
358            mcp_tool_guidance: None,
359        }
360    }
361}
362
363impl AgentLoopConfig {
364    /// The active session goal to surface to the main agent, or `None` when
365    /// Gold is disabled or no goal is set. Falls back to the legacy
366    /// `evaluation_prompt` for back-compat via [`GoldConfig::effective_goal`].
367    pub fn active_goal(&self) -> Option<&str> {
368        self.gold_config
369            .as_ref()
370            .filter(|cfg| cfg.enabled)
371            .and_then(GoldConfig::effective_goal)
372    }
373
374    /// Whether the Codex-style autonomous goal loop is active for this run.
375    ///
376    /// This requires Gold to be enabled, a goal to be set, AND auto-continue to
377    /// be on. Only then is the `update_goal` self-report tool surfaced to the
378    /// model and the terminal double-check allowed to veto a premature stop.
379    /// When Gold is enabled without auto-continue, the evaluator stays purely
380    /// observational (legacy behavior).
381    pub fn goal_loop_active(&self) -> bool {
382        self.gold_config.as_ref().is_some_and(|cfg| {
383            cfg.enabled && cfg.auto_continue_enabled && cfg.effective_goal().is_some()
384        })
385    }
386}
387
388#[cfg(test)]
389mod tests;