Skip to main content

bamboo_engine/runtime/
config.rs

1use std::collections::BTreeSet;
2use std::path::PathBuf;
3use std::sync::Arc;
4
5use bamboo_agent_core::composition::CompositionExecutor;
6use bamboo_agent_core::storage::AttachmentReader;
7use bamboo_agent_core::storage::Storage;
8use bamboo_agent_core::tools::ToolSchema;
9use bamboo_agent_core::GoldConfidence;
10use bamboo_compression::TokenBudget;
11use bamboo_config::MemoryConfig;
12use bamboo_config::PermissionMode;
13use bamboo_domain::ReasoningEffort;
14use bamboo_domain::RuntimeSessionPersistence;
15use bamboo_llm::LLMProvider;
16use bamboo_metrics::MetricsCollector;
17use bamboo_skills::SkillManager;
18use bamboo_tools::ToolRegistry;
19use serde::{Deserialize, Serialize};
20
21#[derive(Clone, Default)]
22pub struct AuxiliaryModelConfig {
23    pub fast_model_name: Option<String>,
24    pub fast_model_provider: Option<Arc<dyn LLMProvider>>,
25    pub background_model_name: Option<String>,
26    pub planning_model_name: Option<String>,
27    pub search_model_name: Option<String>,
28    pub summarization_model_name: Option<String>,
29    pub background_model_provider: Option<Arc<dyn LLMProvider>>,
30    pub summarization_model_provider: Option<Arc<dyn LLMProvider>>,
31}
32
33fn default_gold_max_output_tokens() -> u32 {
34    1024
35}
36
37fn default_gold_max_auto_continuations() -> u32 {
38    3
39}
40
41fn default_gold_min_confidence() -> GoldConfidence {
42    GoldConfidence::Medium
43}
44
45#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
46#[serde(default)]
47pub struct GoldConfig {
48    /// Master switch for Gold observe-only evaluation.
49    #[serde(default)]
50    pub enabled: bool,
51    /// Independent switch for Phase 2 low-risk auto-answer.
52    ///
53    /// Kept separate from `enabled` so Phase 1 observe-only users do not
54    /// implicitly opt into automatic clarification responses.
55    #[serde(default)]
56    pub auto_answer_enabled: bool,
57    /// Independent switch for Phase 3 server-side auto-continue.
58    ///
59    /// Kept separate from both `enabled` and `auto_answer_enabled` so users can
60    /// opt into terminal auto-resume explicitly without enabling other Gold
61    /// automation behaviors.
62    #[serde(default)]
63    pub auto_continue_enabled: bool,
64    /// Optional dedicated model for Gold evaluation. Falls back to fast model,
65    /// then the main chat model when absent.
66    #[serde(default, skip_serializing_if = "Option::is_none")]
67    pub model_name: Option<String>,
68    /// The user's goal for this session.
69    ///
70    /// Unlike `evaluation_prompt` (which only tunes the *judge*), the goal is
71    /// surfaced to the *main* executing agent as a persistent system-prompt
72    /// block so it actively works toward it. The Gold evaluator also measures
73    /// progress against this text.
74    #[serde(default, skip_serializing_if = "Option::is_none")]
75    pub goal: Option<String>,
76    /// Optional custom prompt suffix appended to the built-in Gold evaluator
77    /// prompt. This tunes the judge only; it does not set the goal.
78    #[serde(default, skip_serializing_if = "Option::is_none")]
79    pub evaluation_prompt: Option<String>,
80    /// Output token limit for the Gold evaluator call.
81    #[serde(default = "default_gold_max_output_tokens")]
82    pub max_output_tokens: u32,
83    /// Maximum number of automatic Gold continuations allowed per session.
84    #[serde(default = "default_gold_max_auto_continuations")]
85    pub max_auto_continuations: u32,
86    /// Minimum evaluator confidence required before Gold auto-continues or
87    /// auto-answers. Defaults to `medium` so the loop fires on reasonably
88    /// confident verdicts rather than only `high`.
89    #[serde(default = "default_gold_min_confidence")]
90    pub min_auto_continue_confidence: GoldConfidence,
91}
92
93impl Default for GoldConfig {
94    fn default() -> Self {
95        Self {
96            enabled: false,
97            auto_answer_enabled: false,
98            auto_continue_enabled: false,
99            model_name: None,
100            goal: None,
101            evaluation_prompt: None,
102            max_output_tokens: default_gold_max_output_tokens(),
103            max_auto_continuations: default_gold_max_auto_continuations(),
104            min_auto_continue_confidence: default_gold_min_confidence(),
105        }
106    }
107}
108
109impl GoldConfig {
110    /// The session goal text, falling back to the legacy `evaluation_prompt`
111    /// for sessions created before the dedicated `goal` field existed.
112    ///
113    /// Returns `None` when neither field holds non-empty text.
114    pub fn effective_goal(&self) -> Option<&str> {
115        self.goal
116            .as_deref()
117            .or(self.evaluation_prompt.as_deref())
118            .map(str::trim)
119            .filter(|value| !value.is_empty())
120    }
121}
122
123fn default_guardian_max_reviews() -> u32 {
124    2
125}
126
127/// Configuration for the guardian adversarial-review terminal gate.
128///
129/// Mirrors [`GoldConfig`]: a plain, serde-defaulting struct surfaced per run.
130/// When `enabled` is false (the default) the guardian gate is inactive and the
131/// terminal completion path is unchanged.
132#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
133#[serde(default)]
134pub struct GuardianConfig {
135    /// Master switch for the guardian review gate.
136    #[serde(default)]
137    pub enabled: bool,
138    /// Optional dedicated reviewer model. Falls back to the run's main model.
139    #[serde(default, skip_serializing_if = "Option::is_none")]
140    pub model_name: Option<String>,
141    /// Maximum guardian review passes per run (budget; mirrors
142    /// [`GoldConfig::max_auto_continuations`]).
143    #[serde(default = "default_guardian_max_reviews")]
144    pub max_reviews: u32,
145}
146
147impl Default for GuardianConfig {
148    fn default() -> Self {
149        Self {
150            enabled: false,
151            model_name: None,
152            max_reviews: default_guardian_max_reviews(),
153        }
154    }
155}
156
157/// Late-bound spawner for the guardian reviewer child.
158///
159/// The runner cannot construct a child directly: the `SpawnScheduler` is built
160/// *after* the `Agent` that drives the runner (a construction-order cycle), so
161/// the terminal gate spawns the reviewer through this trait object, injected
162/// per-request on [`AgentLoopConfig`] exactly like `auxiliary_model_resolver`.
163/// The implementation lives in the server (it captures the already-built
164/// scheduler + child-session adapter); the engine holds only the trait, keeping
165/// the engine free of any dependency on server/AppState types.
166#[async_trait::async_trait]
167pub trait GuardianSpawner: Send + Sync {
168    /// Create a read-only reviewer child for `parent_session_id`, seeded with
169    /// `review_prompt`, enqueue it to run, and return its session id so the
170    /// caller can register a wait on it.
171    async fn spawn_guardian_review(
172        &self,
173        parent_session: &bamboo_agent_core::Session,
174        review_prompt: String,
175        model: String,
176        disabled_tools: Option<BTreeSet<String>>,
177    ) -> Result<String, String>;
178}
179
180/// A child sub-agent's request to have a gated tool approved by its parent.
181///
182/// A non-bypassed child cannot answer its own permission prompt (no human is
183/// attached to a child session), so the request is delegated up to the parent.
184#[derive(Debug, Clone)]
185pub struct ChildApprovalRequest {
186    pub child_session_id: String,
187    pub parent_session_id: String,
188    /// The gated tool call on the child to re-execute once approved.
189    pub child_tool_call_id: String,
190    pub tool_name: String,
191    /// Permission type as a string (e.g. "WriteFile", "ExecuteCommand").
192    pub permission_type: String,
193    /// The concrete resource the permission applies to (path, command, …).
194    pub resource: String,
195    /// Human-facing approval question to surface on the parent.
196    pub question: String,
197    /// The raw `awaiting_permission_approval` payload the child's executor built,
198    /// so the parent can reuse the existing grant-extraction path verbatim.
199    pub approval_payload: serde_json::Value,
200}
201
202/// What the executor should do after delegating a child's approval upward.
203#[derive(Debug, Clone, Copy, PartialEq, Eq)]
204pub enum ChildApprovalOutcome {
205    /// Registered on the parent; the child must SUSPEND and await the decision.
206    Delegated,
207    /// Parent policy auto-approved (bypass / existing grant); proceed to execute.
208    AutoApproved,
209    /// Parent policy auto-denied; the executor must deny the tool.
210    AutoDenied,
211}
212
213/// Late-bound delegate that routes a child's approval request up to its parent.
214///
215/// Injected per-request on [`AgentLoopConfig`] exactly like [`GuardianSpawner`];
216/// the trait lives in the engine, the implementation in the server (it owns the
217/// parent session store + pending-question + notification machinery).
218#[async_trait::async_trait]
219pub trait ApprovalDelegate: Send + Sync {
220    /// Register `request` on its parent (or auto-resolve by policy) and report
221    /// what the child's executor should do next.
222    async fn delegate_child_approval(
223        &self,
224        request: ChildApprovalRequest,
225    ) -> Result<ChildApprovalOutcome, String>;
226}
227
228#[derive(Debug, Clone, Copy, PartialEq, Eq)]
229pub enum ImageFallbackMode {
230    Placeholder,
231    Error,
232    Ocr,
233    /// Use a vision-capable LLM to describe the image, then replace the image
234    /// with the textual description so that text-only models can understand
235    /// the content.
236    Vision,
237}
238
239#[derive(Debug, Clone, PartialEq, Eq)]
240pub struct ImageFallbackConfig {
241    pub mode: ImageFallbackMode,
242    /// Vision model name for `Vision` mode. Falls back to the session's main model
243    /// when `None`.
244    pub vision_model: Option<String>,
245}
246
247#[derive(Debug, Clone, Copy, PartialEq, Eq)]
248pub struct PromptMemoryFlags {
249    pub project_prompt_injection: bool,
250    pub relevant_recall: bool,
251    pub relevant_recall_rerank: bool,
252    pub project_first_dream: bool,
253}
254
255impl Default for PromptMemoryFlags {
256    fn default() -> Self {
257        Self {
258            project_prompt_injection: true,
259            relevant_recall: true,
260            relevant_recall_rerank: false,
261            project_first_dream: true,
262        }
263    }
264}
265
266impl From<&MemoryConfig> for PromptMemoryFlags {
267    fn from(value: &MemoryConfig) -> Self {
268        Self {
269            project_prompt_injection: value.project_prompt_injection,
270            relevant_recall: value.relevant_recall,
271            relevant_recall_rerank: value.relevant_recall_rerank,
272            project_first_dream: value.project_first_dream,
273        }
274    }
275}
276
277/// Configuration for the agent loop.
278#[non_exhaustive]
279pub struct AgentLoopConfig {
280    pub(crate) max_rounds: usize,
281    pub(crate) system_prompt: Option<String>,
282    /// Skill IDs that are disabled globally for this execution.
283    pub(crate) disabled_skill_ids: BTreeSet<String>,
284    /// Optional explicit skill selection for this execution.
285    /// When set, only these skill IDs are considered for skill context and allowlists.
286    pub(crate) selected_skill_ids: Option<Vec<String>>,
287    /// Optional active skill mode for this execution.
288    ///
289    /// When set, skill discovery prefers `skills-<mode>` directories over generic
290    /// directories for the same skill id.
291    pub(crate) selected_skill_mode: Option<String>,
292    pub(crate) additional_tool_schemas: Vec<ToolSchema>,
293    pub(crate) tool_registry: Arc<ToolRegistry>,
294    pub(crate) composition_executor: Option<Arc<CompositionExecutor>>,
295    pub(crate) skill_manager: Option<Arc<SkillManager>>,
296    /// If true, skip appending the initial user message (already present in session).
297    pub(crate) skip_initial_user_message: bool,
298    /// Optional storage for persisting session changes
299    pub(crate) storage: Option<Arc<dyn Storage>>,
300    /// Optional runtime persistence for non-authoritative session saves.
301    /// When set, engine save sites use this instead of `storage` for writes.
302    pub(crate) persistence: Option<Arc<dyn RuntimeSessionPersistence>>,
303    /// Optional attachment reader for resolving `bamboo-attachment://...` references
304    /// into `data:` URLs for upstream providers. This must not mutate session storage.
305    pub(crate) attachment_reader: Option<Arc<dyn AttachmentReader>>,
306    /// Optional asynchronous metrics collector
307    pub(crate) metrics_collector: Option<MetricsCollector>,
308    /// Model name used for metrics attribution
309    pub(crate) model_name: Option<String>,
310    /// Fast/cheap model for lightweight tasks (task evaluation, search, etc.).
311    ///
312    /// Call sites may fall back to `model_name` when this is unset.
313    pub(crate) fast_model_name: Option<String>,
314    /// Optional provider override for lightweight fast-model LLM calls.
315    pub(crate) fast_model_provider: Option<Arc<dyn LLMProvider>>,
316    /// Fast/cheap model for memory/background tasks.
317    ///
318    /// This must not silently fall back to the main interaction model.
319    pub(crate) background_model_name: Option<String>,
320
321    /// Model for planning/coordination tasks (task decomposition, architecture).
322    /// Falls back to `model_name` when unset.
323    pub(crate) planning_model_name: Option<String>,
324    /// Model for search/navigation tasks (grep, file listing, symbol resolution).
325    /// Falls back to `fast_model_name` when unset.
326    pub(crate) search_model_name: Option<String>,
327    /// Custom instructions for conversation summarization, injected into the
328    /// LLM summary prompt. Lets users control what the summary focuses on.
329    ///
330    /// Resolution order: session-level > config-level > built-in defaults.
331    pub(crate) compression_instructions: Option<String>,
332    /// Dedicated model for summarization. Falls back to `background_model_name`.
333    pub(crate) summarization_model_name: Option<String>,
334    /// Optional provider override for memory/background model LLM calls.
335    ///
336    /// When set, memory recall rerank and other memory/background tasks use this
337    /// provider instead of the shared agent loop provider.
338    pub(crate) background_model_provider: Option<Arc<dyn LLMProvider>>,
339    /// Optional provider override for summarization / context compression calls.
340    ///
341    /// When set, conversation/task summarization uses this provider instead of
342    /// the shared agent loop provider.
343    pub(crate) summarization_model_provider: Option<Arc<dyn LLMProvider>>,
344    /// Provider routing key used for provider-specific request behavior.
345    ///
346    /// In multi-instance mode this may be the instance id.
347    pub(crate) provider_name: Option<String>,
348    /// Underlying provider type (for example `openai`, `anthropic`, `copilot`).
349    ///
350    /// This is distinct from `provider_name` so provider-specific behavior can
351    /// remain correct when routing keys are instance ids.
352    pub(crate) provider_type: Option<String>,
353    /// Optional request-time reasoning effort override.
354    pub(crate) reasoning_effort: Option<ReasoningEffort>,
355    /// Bamboo application data directory (typically `~/.bamboo`).
356    ///
357    /// Used by runtime features that persist auxiliary artifacts outside the
358    /// session store, such as durable plan mode files under `~/.bamboo/plan`.
359    pub(crate) app_data_dir: Option<PathBuf>,
360    /// Tool names that should be excluded from schemas sent to the LLM.
361    pub(crate) disabled_tools: BTreeSet<String>,
362    /// Token budget for context management (optional, defaults to model's limits)
363    pub(crate) token_budget: Option<TokenBudget>,
364    /// Optional image fallback behavior applied to *LLM requests only* (never persisted).
365    ///
366    /// This is intended for text-only provider paths where image parts must be degraded
367    /// (placeholder / OCR / error) without leaking into stored session history or UI.
368    pub(crate) image_fallback: Option<ImageFallbackConfig>,
369    /// Feature flags controlling prompt-time memory injection behavior.
370    pub(crate) prompt_memory_flags: PromptMemoryFlags,
371    /// Maximum tool calls allowed per round (default: 80).
372    pub(crate) max_tool_calls_per_round: usize,
373    /// Maximum consecutive failures per tool before circuit breaker (default: 3).
374    pub(crate) max_consecutive_failures_per_tool: usize,
375    /// Tool names that require strict argument validation.
376    pub(crate) strict_argument_tool_names: Vec<String>,
377    /// Per-tool execution timeout in seconds (default: 120).
378    pub(crate) per_tool_timeout_secs: u64,
379    /// Parallel batch execution timeout in seconds (default: 300).
380    pub(crate) parallel_batch_timeout_secs: u64,
381    /// Permission mode for this execution (default: None = use PermissionConfig's mode).
382    pub(crate) permission_mode: Option<PermissionMode>,
383    /// Optional Gold observe-only evaluator configuration.
384    ///
385    /// When `None` or `enabled == false`, Gold evaluation is disabled and the
386    /// existing execute/respond/resume loop remains unchanged.
387    pub(crate) gold_config: Option<GoldConfig>,
388    /// Optional guardian adversarial-review gate configuration. When `None` or
389    /// `enabled == false`, the guardian terminal gate is inactive.
390    pub(crate) guardian_config: Option<GuardianConfig>,
391    /// Late-bound spawner for the guardian reviewer child. `None` (the default)
392    /// leaves the guardian gate inert even when `guardian_config.enabled` is set,
393    /// since the runner cannot create a child without it. Wired by the server.
394    pub(crate) guardian_spawner: Option<Arc<dyn GuardianSpawner>>,
395    /// Late-bound delegate that routes a child's gated-tool approval request up
396    /// to its parent (Phase 2). `None` (the default) leaves child gating on its
397    /// legacy path. Wired by the server.
398    pub(crate) approval_delegate: Option<Arc<dyn ApprovalDelegate>>,
399    /// Enable dynamic per-round model routing based on task complexity.
400    /// When true, the pipeline classifies complexity at each round end and
401    /// stores the result in session metadata.
402    pub(crate) features_dynamic_model_routing: bool,
403    /// Optional per-round resolver for auxiliary model settings that should
404    /// follow live global config rather than stay frozen for the whole run.
405    ///
406    /// The main chat model remains session/request scoped; this hook is only
407    /// for fast/background/planning/search/summarization helpers.
408    pub(crate) auxiliary_model_resolver:
409        Option<Arc<dyn Fn() -> AuxiliaryModelConfig + Send + Sync>>,
410    /// Server-level usage guidance contributed by the run's tool executor —
411    /// chiefly the `instructions` connected MCP servers return from `initialize`.
412    /// Captured once at config construction (from `ToolExecutor::tool_guidance`)
413    /// and appended to the tool-guide section of the system prompt, so a server's
414    /// own how-to-use notes appear only while that server is loaded for the run.
415    pub(crate) mcp_tool_guidance: Option<String>,
416}
417
418impl Default for AgentLoopConfig {
419    fn default() -> Self {
420        Self {
421            max_rounds: 200,
422            system_prompt: None,
423            disabled_skill_ids: BTreeSet::new(),
424            selected_skill_ids: None,
425            selected_skill_mode: None,
426            additional_tool_schemas: Vec::new(),
427            tool_registry: Arc::new(ToolRegistry::new()),
428            composition_executor: None,
429            skill_manager: None,
430            skip_initial_user_message: false,
431            storage: None,
432            persistence: None,
433            attachment_reader: None,
434            metrics_collector: None,
435            model_name: None,
436            fast_model_name: None,
437            fast_model_provider: None,
438            background_model_name: None,
439            planning_model_name: None,
440            search_model_name: None,
441            compression_instructions: None,
442            summarization_model_name: None,
443            background_model_provider: None,
444            summarization_model_provider: None,
445            provider_name: None,
446            provider_type: None,
447            reasoning_effort: None,
448            app_data_dir: None,
449            disabled_tools: BTreeSet::new(),
450            token_budget: None,
451            image_fallback: None,
452            prompt_memory_flags: PromptMemoryFlags::default(),
453            max_tool_calls_per_round: 80,
454            max_consecutive_failures_per_tool: 3,
455            strict_argument_tool_names: vec![
456                "Write".into(),
457                "Edit".into(),
458                "NotebookEdit".into(),
459                "apply_patch".into(),
460                "Bash".into(),
461                "Task".into(),
462                "SubAgent".into(),
463                "scheduler".into(),
464                "sub_session_manager".into(),
465                "session_note".into(),
466                "memory_note".into(),
467            ],
468            per_tool_timeout_secs: 120,
469            parallel_batch_timeout_secs: 300,
470            permission_mode: None,
471            gold_config: None,
472            guardian_config: None,
473            guardian_spawner: None,
474            approval_delegate: None,
475            features_dynamic_model_routing: false,
476            auxiliary_model_resolver: None,
477            mcp_tool_guidance: None,
478        }
479    }
480}
481
482impl AgentLoopConfig {
483    /// The active session goal to surface to the main agent, or `None` when
484    /// Gold is disabled or no goal is set. Falls back to the legacy
485    /// `evaluation_prompt` for back-compat via [`GoldConfig::effective_goal`].
486    pub fn active_goal(&self) -> Option<&str> {
487        self.gold_config
488            .as_ref()
489            .filter(|cfg| cfg.enabled)
490            .and_then(GoldConfig::effective_goal)
491    }
492
493    /// Whether the Codex-style autonomous goal loop is active for this run.
494    ///
495    /// This requires Gold to be enabled, a goal to be set, AND auto-continue to
496    /// be on. Only then is the `update_goal` self-report tool surfaced to the
497    /// model and the terminal double-check allowed to veto a premature stop.
498    /// When Gold is enabled without auto-continue, the evaluator stays purely
499    /// observational (legacy behavior).
500    pub fn goal_loop_active(&self) -> bool {
501        self.gold_config.as_ref().is_some_and(|cfg| {
502            cfg.enabled && cfg.auto_continue_enabled && cfg.effective_goal().is_some()
503        })
504    }
505
506    /// Whether the guardian review gate is active for this run: a spawner is
507    /// wired (so the runner can actually create the reviewer child) AND the
508    /// config is present and enabled.
509    pub fn guardian_active(&self) -> bool {
510        self.guardian_spawner.is_some()
511            && self.guardian_config.as_ref().is_some_and(|cfg| cfg.enabled)
512    }
513
514    /// Maximum guardian review passes for this run (the budget). `0` when no
515    /// guardian config is set.
516    pub fn guardian_max_reviews(&self) -> u32 {
517        self.guardian_config
518            .as_ref()
519            .map_or(0, |cfg| cfg.max_reviews)
520    }
521
522    /// The reviewer model override, if a guardian config sets one.
523    pub fn guardian_model(&self) -> Option<&str> {
524        self.guardian_config
525            .as_ref()
526            .and_then(|cfg| cfg.model_name.as_deref())
527    }
528
529    /// Whether child→parent approval delegation is wired for this run.
530    pub fn delegation_active(&self) -> bool {
531        self.approval_delegate.is_some()
532    }
533}
534
535#[cfg(test)]
536mod tests;