bamboo_engine/runtime/config.rs
1use std::collections::BTreeSet;
2use std::path::PathBuf;
3use std::sync::Arc;
4
5use bamboo_agent_core::composition::CompositionExecutor;
6use bamboo_agent_core::storage::AttachmentReader;
7use bamboo_agent_core::storage::Storage;
8use bamboo_agent_core::tools::ToolSchema;
9use bamboo_agent_core::GoldConfidence;
10use bamboo_compression::TokenBudget;
11use bamboo_config::MemoryConfig;
12use bamboo_config::PermissionMode;
13use bamboo_domain::ReasoningEffort;
14use bamboo_domain::RuntimeSessionPersistence;
15use bamboo_llm::LLMProvider;
16use bamboo_metrics::MetricsCollector;
17use bamboo_skills::SkillManager;
18use bamboo_tools::ToolRegistry;
19use serde::{Deserialize, Serialize};
20
21#[derive(Clone, Default)]
22pub struct AuxiliaryModelConfig {
23 pub fast_model_name: Option<String>,
24 pub fast_model_provider: Option<Arc<dyn LLMProvider>>,
25 pub background_model_name: Option<String>,
26 pub planning_model_name: Option<String>,
27 pub search_model_name: Option<String>,
28 pub summarization_model_name: Option<String>,
29 pub background_model_provider: Option<Arc<dyn LLMProvider>>,
30 pub summarization_model_provider: Option<Arc<dyn LLMProvider>>,
31}
32
33fn default_gold_max_output_tokens() -> u32 {
34 1024
35}
36
37fn default_gold_max_auto_continuations() -> u32 {
38 3
39}
40
41fn default_gold_min_confidence() -> GoldConfidence {
42 GoldConfidence::Medium
43}
44
45#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
46#[serde(default)]
47pub struct GoldConfig {
48 /// Master switch for Gold observe-only evaluation.
49 #[serde(default)]
50 pub enabled: bool,
51 /// Independent switch for Phase 2 low-risk auto-answer.
52 ///
53 /// Kept separate from `enabled` so Phase 1 observe-only users do not
54 /// implicitly opt into automatic clarification responses.
55 #[serde(default)]
56 pub auto_answer_enabled: bool,
57 /// Independent switch for Phase 3 server-side auto-continue.
58 ///
59 /// Kept separate from both `enabled` and `auto_answer_enabled` so users can
60 /// opt into terminal auto-resume explicitly without enabling other Gold
61 /// automation behaviors.
62 #[serde(default)]
63 pub auto_continue_enabled: bool,
64 /// Optional dedicated model for Gold evaluation. Falls back to fast model,
65 /// then the main chat model when absent.
66 #[serde(default, skip_serializing_if = "Option::is_none")]
67 pub model_name: Option<String>,
68 /// The user's goal for this session.
69 ///
70 /// Unlike `evaluation_prompt` (which only tunes the *judge*), the goal is
71 /// surfaced to the *main* executing agent as a persistent system-prompt
72 /// block so it actively works toward it. The Gold evaluator also measures
73 /// progress against this text.
74 #[serde(default, skip_serializing_if = "Option::is_none")]
75 pub goal: Option<String>,
76 /// Optional custom prompt suffix appended to the built-in Gold evaluator
77 /// prompt. This tunes the judge only; it does not set the goal.
78 #[serde(default, skip_serializing_if = "Option::is_none")]
79 pub evaluation_prompt: Option<String>,
80 /// Output token limit for the Gold evaluator call.
81 #[serde(default = "default_gold_max_output_tokens")]
82 pub max_output_tokens: u32,
83 /// Maximum number of automatic Gold continuations allowed per session.
84 #[serde(default = "default_gold_max_auto_continuations")]
85 pub max_auto_continuations: u32,
86 /// Minimum evaluator confidence required before Gold auto-continues or
87 /// auto-answers. Defaults to `medium` so the loop fires on reasonably
88 /// confident verdicts rather than only `high`.
89 #[serde(default = "default_gold_min_confidence")]
90 pub min_auto_continue_confidence: GoldConfidence,
91}
92
93impl Default for GoldConfig {
94 fn default() -> Self {
95 Self {
96 enabled: false,
97 auto_answer_enabled: false,
98 auto_continue_enabled: false,
99 model_name: None,
100 goal: None,
101 evaluation_prompt: None,
102 max_output_tokens: default_gold_max_output_tokens(),
103 max_auto_continuations: default_gold_max_auto_continuations(),
104 min_auto_continue_confidence: default_gold_min_confidence(),
105 }
106 }
107}
108
109impl GoldConfig {
110 /// The session goal text, falling back to the legacy `evaluation_prompt`
111 /// for sessions created before the dedicated `goal` field existed.
112 ///
113 /// Returns `None` when neither field holds non-empty text.
114 pub fn effective_goal(&self) -> Option<&str> {
115 self.goal
116 .as_deref()
117 .or(self.evaluation_prompt.as_deref())
118 .map(str::trim)
119 .filter(|value| !value.is_empty())
120 }
121}
122
123fn default_guardian_max_reviews() -> u32 {
124 2
125}
126
127/// Configuration for the guardian adversarial-review terminal gate.
128///
129/// Mirrors [`GoldConfig`]: a plain, serde-defaulting struct surfaced per run.
130/// When `enabled` is false (the default) the guardian gate is inactive and the
131/// terminal completion path is unchanged.
132#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
133#[serde(default)]
134pub struct GuardianConfig {
135 /// Master switch for the guardian review gate.
136 #[serde(default)]
137 pub enabled: bool,
138 /// Optional dedicated reviewer model. Falls back to the run's main model.
139 #[serde(default, skip_serializing_if = "Option::is_none")]
140 pub model_name: Option<String>,
141 /// Maximum guardian review passes per run (budget; mirrors
142 /// [`GoldConfig::max_auto_continuations`]).
143 #[serde(default = "default_guardian_max_reviews")]
144 pub max_reviews: u32,
145}
146
147impl Default for GuardianConfig {
148 fn default() -> Self {
149 Self {
150 enabled: false,
151 model_name: None,
152 max_reviews: default_guardian_max_reviews(),
153 }
154 }
155}
156
157/// Late-bound spawner for the guardian reviewer child.
158///
159/// The runner cannot construct a child directly: the `SpawnScheduler` is built
160/// *after* the `Agent` that drives the runner (a construction-order cycle), so
161/// the terminal gate spawns the reviewer through this trait object, injected
162/// per-request on [`AgentLoopConfig`] exactly like `auxiliary_model_resolver`.
163/// The implementation lives in the server (it captures the already-built
164/// scheduler + child-session adapter); the engine holds only the trait, keeping
165/// the engine free of any dependency on server/AppState types.
166#[async_trait::async_trait]
167pub trait GuardianSpawner: Send + Sync {
168 /// Create a read-only reviewer child for `parent_session_id`, seeded with
169 /// `review_prompt`, enqueue it to run, and return its session id so the
170 /// caller can register a wait on it.
171 async fn spawn_guardian_review(
172 &self,
173 parent_session: &bamboo_agent_core::Session,
174 review_prompt: String,
175 model: String,
176 disabled_tools: Option<BTreeSet<String>>,
177 ) -> Result<String, String>;
178}
179
180/// A child sub-agent's request to have a gated tool approved by its parent.
181///
182/// A non-bypassed child cannot answer its own permission prompt (no human is
183/// attached to a child session), so the request is delegated up to the parent.
184#[derive(Debug, Clone)]
185pub struct ChildApprovalRequest {
186 pub child_session_id: String,
187 pub parent_session_id: String,
188 /// The gated tool call on the child to re-execute once approved.
189 pub child_tool_call_id: String,
190 pub tool_name: String,
191 /// Permission type as a string (e.g. "WriteFile", "ExecuteCommand").
192 pub permission_type: String,
193 /// The concrete resource the permission applies to (path, command, …).
194 pub resource: String,
195 /// Human-facing approval question to surface on the parent.
196 pub question: String,
197 /// The raw `awaiting_permission_approval` payload the child's executor built,
198 /// so the parent can reuse the existing grant-extraction path verbatim.
199 pub approval_payload: serde_json::Value,
200}
201
202/// What the executor should do after delegating a child's approval upward.
203#[derive(Debug, Clone, Copy, PartialEq, Eq)]
204pub enum ChildApprovalOutcome {
205 /// Registered on the parent; the child must SUSPEND and await the decision.
206 Delegated,
207 /// Parent policy auto-approved (bypass / existing grant); proceed to execute.
208 AutoApproved,
209 /// Parent policy auto-denied; the executor must deny the tool.
210 AutoDenied,
211}
212
213/// Late-bound delegate that routes a child's approval request up to its parent.
214///
215/// Injected per-request on [`AgentLoopConfig`] exactly like [`GuardianSpawner`];
216/// the trait lives in the engine, the implementation in the server (it owns the
217/// parent session store + pending-question + notification machinery).
218#[async_trait::async_trait]
219pub trait ApprovalDelegate: Send + Sync {
220 /// Register `request` on its parent (or auto-resolve by policy) and report
221 /// what the child's executor should do next.
222 async fn delegate_child_approval(
223 &self,
224 request: ChildApprovalRequest,
225 ) -> Result<ChildApprovalOutcome, String>;
226}
227
228#[derive(Debug, Clone, Copy, PartialEq, Eq)]
229pub enum ImageFallbackMode {
230 Placeholder,
231 Error,
232 Ocr,
233 /// Use a vision-capable LLM to describe the image, then replace the image
234 /// with the textual description so that text-only models can understand
235 /// the content.
236 Vision,
237}
238
239#[derive(Debug, Clone, PartialEq, Eq)]
240pub struct ImageFallbackConfig {
241 pub mode: ImageFallbackMode,
242 /// Vision model name for `Vision` mode. Falls back to the session's main model
243 /// when `None`.
244 pub vision_model: Option<String>,
245}
246
247#[derive(Debug, Clone, Copy, PartialEq, Eq)]
248pub struct PromptMemoryFlags {
249 pub project_prompt_injection: bool,
250 pub relevant_recall: bool,
251 pub relevant_recall_rerank: bool,
252 pub project_first_dream: bool,
253}
254
255impl Default for PromptMemoryFlags {
256 fn default() -> Self {
257 Self {
258 project_prompt_injection: true,
259 relevant_recall: true,
260 relevant_recall_rerank: false,
261 project_first_dream: true,
262 }
263 }
264}
265
266impl From<&MemoryConfig> for PromptMemoryFlags {
267 fn from(value: &MemoryConfig) -> Self {
268 Self {
269 project_prompt_injection: value.project_prompt_injection,
270 relevant_recall: value.relevant_recall,
271 relevant_recall_rerank: value.relevant_recall_rerank,
272 project_first_dream: value.project_first_dream,
273 }
274 }
275}
276
277/// Configuration for the agent loop.
278#[non_exhaustive]
279pub struct AgentLoopConfig {
280 pub(crate) max_rounds: usize,
281 pub(crate) system_prompt: Option<String>,
282 /// Skill IDs that are disabled globally for this execution.
283 pub(crate) disabled_skill_ids: BTreeSet<String>,
284 /// Optional explicit skill selection for this execution.
285 /// When set, only these skill IDs are considered for skill context and allowlists.
286 pub(crate) selected_skill_ids: Option<Vec<String>>,
287 /// Optional active skill mode for this execution.
288 ///
289 /// When set, skill discovery prefers `skills-<mode>` directories over generic
290 /// directories for the same skill id.
291 pub(crate) selected_skill_mode: Option<String>,
292 pub(crate) additional_tool_schemas: Vec<ToolSchema>,
293 pub(crate) tool_registry: Arc<ToolRegistry>,
294 pub(crate) composition_executor: Option<Arc<CompositionExecutor>>,
295 pub(crate) skill_manager: Option<Arc<SkillManager>>,
296 /// If true, skip appending the initial user message (already present in session).
297 pub(crate) skip_initial_user_message: bool,
298 /// Optional storage for persisting session changes
299 pub(crate) storage: Option<Arc<dyn Storage>>,
300 /// Optional runtime persistence for non-authoritative session saves.
301 /// When set, engine save sites use this instead of `storage` for writes.
302 pub(crate) persistence: Option<Arc<dyn RuntimeSessionPersistence>>,
303 /// Optional attachment reader for resolving `bamboo-attachment://...` references
304 /// into `data:` URLs for upstream providers. This must not mutate session storage.
305 pub(crate) attachment_reader: Option<Arc<dyn AttachmentReader>>,
306 /// Optional asynchronous metrics collector
307 pub(crate) metrics_collector: Option<MetricsCollector>,
308 /// Model name used for metrics attribution
309 pub(crate) model_name: Option<String>,
310 /// Fast/cheap model for lightweight tasks (task evaluation, search, etc.).
311 ///
312 /// Call sites may fall back to `model_name` when this is unset.
313 pub(crate) fast_model_name: Option<String>,
314 /// Optional provider override for lightweight fast-model LLM calls.
315 pub(crate) fast_model_provider: Option<Arc<dyn LLMProvider>>,
316 /// Fast/cheap model for memory/background tasks.
317 ///
318 /// This must not silently fall back to the main interaction model.
319 pub(crate) background_model_name: Option<String>,
320
321 /// Model for planning/coordination tasks (task decomposition, architecture).
322 /// Falls back to `model_name` when unset.
323 pub(crate) planning_model_name: Option<String>,
324 /// Model for search/navigation tasks (grep, file listing, symbol resolution).
325 /// Falls back to `fast_model_name` when unset.
326 pub(crate) search_model_name: Option<String>,
327 /// Custom instructions for conversation summarization, injected into the
328 /// LLM summary prompt. Lets users control what the summary focuses on.
329 ///
330 /// Resolution order: session-level > config-level > built-in defaults.
331 pub(crate) compression_instructions: Option<String>,
332 /// Dedicated model for summarization. Falls back to `background_model_name`.
333 pub(crate) summarization_model_name: Option<String>,
334 /// Optional provider override for memory/background model LLM calls.
335 ///
336 /// When set, memory recall rerank and other memory/background tasks use this
337 /// provider instead of the shared agent loop provider.
338 pub(crate) background_model_provider: Option<Arc<dyn LLMProvider>>,
339 /// Optional provider override for summarization / context compression calls.
340 ///
341 /// When set, conversation/task summarization uses this provider instead of
342 /// the shared agent loop provider.
343 pub(crate) summarization_model_provider: Option<Arc<dyn LLMProvider>>,
344 /// Provider routing key used for provider-specific request behavior.
345 ///
346 /// In multi-instance mode this may be the instance id.
347 pub(crate) provider_name: Option<String>,
348 /// Underlying provider type (for example `openai`, `anthropic`, `copilot`).
349 ///
350 /// This is distinct from `provider_name` so provider-specific behavior can
351 /// remain correct when routing keys are instance ids.
352 pub(crate) provider_type: Option<String>,
353 /// Optional request-time reasoning effort override.
354 pub(crate) reasoning_effort: Option<ReasoningEffort>,
355 /// Bamboo application data directory (typically `~/.bamboo`).
356 ///
357 /// Used by runtime features that persist auxiliary artifacts outside the
358 /// session store, such as durable plan mode files under `~/.bamboo/plan`.
359 pub(crate) app_data_dir: Option<PathBuf>,
360 /// Tool names that should be excluded from schemas sent to the LLM.
361 pub(crate) disabled_tools: BTreeSet<String>,
362 /// Token budget for context management (optional, defaults to model's limits)
363 pub(crate) token_budget: Option<TokenBudget>,
364 /// Optional image fallback behavior applied to *LLM requests only* (never persisted).
365 ///
366 /// This is intended for text-only provider paths where image parts must be degraded
367 /// (placeholder / OCR / error) without leaking into stored session history or UI.
368 pub(crate) image_fallback: Option<ImageFallbackConfig>,
369 /// Feature flags controlling prompt-time memory injection behavior.
370 pub(crate) prompt_memory_flags: PromptMemoryFlags,
371 /// Maximum tool calls allowed per round (default: 80).
372 pub(crate) max_tool_calls_per_round: usize,
373 /// Maximum consecutive failures per tool before circuit breaker (default: 3).
374 pub(crate) max_consecutive_failures_per_tool: usize,
375 /// Tool names that require strict argument validation.
376 pub(crate) strict_argument_tool_names: Vec<String>,
377 /// Per-tool execution timeout in seconds (default: 120).
378 pub(crate) per_tool_timeout_secs: u64,
379 /// Parallel batch execution timeout in seconds (default: 300).
380 pub(crate) parallel_batch_timeout_secs: u64,
381 /// Permission mode for this execution (default: None = use PermissionConfig's mode).
382 pub(crate) permission_mode: Option<PermissionMode>,
383 /// Optional Gold observe-only evaluator configuration.
384 ///
385 /// When `None` or `enabled == false`, Gold evaluation is disabled and the
386 /// existing execute/respond/resume loop remains unchanged.
387 pub(crate) gold_config: Option<GoldConfig>,
388 /// Optional guardian adversarial-review gate configuration. When `None` or
389 /// `enabled == false`, the guardian terminal gate is inactive.
390 pub(crate) guardian_config: Option<GuardianConfig>,
391 /// Late-bound spawner for the guardian reviewer child. `None` (the default)
392 /// leaves the guardian gate inert even when `guardian_config.enabled` is set,
393 /// since the runner cannot create a child without it. Wired by the server.
394 pub(crate) guardian_spawner: Option<Arc<dyn GuardianSpawner>>,
395 /// Late-bound delegate that routes a child's gated-tool approval request up
396 /// to its parent (Phase 2). `None` (the default) leaves child gating on its
397 /// legacy path. Wired by the server.
398 pub(crate) approval_delegate: Option<Arc<dyn ApprovalDelegate>>,
399 /// Enable dynamic per-round model routing based on task complexity.
400 /// When true, the pipeline classifies complexity at each round end and
401 /// stores the result in session metadata.
402 pub(crate) features_dynamic_model_routing: bool,
403 /// Optional per-round resolver for auxiliary model settings that should
404 /// follow live global config rather than stay frozen for the whole run.
405 ///
406 /// The main chat model remains session/request scoped; this hook is only
407 /// for fast/background/planning/search/summarization helpers.
408 pub(crate) auxiliary_model_resolver:
409 Option<Arc<dyn Fn() -> AuxiliaryModelConfig + Send + Sync>>,
410 /// Server-level usage guidance contributed by the run's tool executor —
411 /// chiefly the `instructions` connected MCP servers return from `initialize`.
412 /// Captured once at config construction (from `ToolExecutor::tool_guidance`)
413 /// and appended to the tool-guide section of the system prompt, so a server's
414 /// own how-to-use notes appear only while that server is loaded for the run.
415 pub(crate) mcp_tool_guidance: Option<String>,
416}
417
418impl Default for AgentLoopConfig {
419 fn default() -> Self {
420 Self {
421 max_rounds: 200,
422 system_prompt: None,
423 disabled_skill_ids: BTreeSet::new(),
424 selected_skill_ids: None,
425 selected_skill_mode: None,
426 additional_tool_schemas: Vec::new(),
427 tool_registry: Arc::new(ToolRegistry::new()),
428 composition_executor: None,
429 skill_manager: None,
430 skip_initial_user_message: false,
431 storage: None,
432 persistence: None,
433 attachment_reader: None,
434 metrics_collector: None,
435 model_name: None,
436 fast_model_name: None,
437 fast_model_provider: None,
438 background_model_name: None,
439 planning_model_name: None,
440 search_model_name: None,
441 compression_instructions: None,
442 summarization_model_name: None,
443 background_model_provider: None,
444 summarization_model_provider: None,
445 provider_name: None,
446 provider_type: None,
447 reasoning_effort: None,
448 app_data_dir: None,
449 disabled_tools: BTreeSet::new(),
450 token_budget: None,
451 image_fallback: None,
452 prompt_memory_flags: PromptMemoryFlags::default(),
453 max_tool_calls_per_round: 80,
454 max_consecutive_failures_per_tool: 3,
455 strict_argument_tool_names: vec![
456 "Write".into(),
457 "Edit".into(),
458 "NotebookEdit".into(),
459 "apply_patch".into(),
460 "Bash".into(),
461 "Task".into(),
462 "SubAgent".into(),
463 "scheduler".into(),
464 "sub_session_manager".into(),
465 "session_note".into(),
466 "memory_note".into(),
467 ],
468 per_tool_timeout_secs: 120,
469 parallel_batch_timeout_secs: 300,
470 permission_mode: None,
471 gold_config: None,
472 guardian_config: None,
473 guardian_spawner: None,
474 approval_delegate: None,
475 features_dynamic_model_routing: false,
476 auxiliary_model_resolver: None,
477 mcp_tool_guidance: None,
478 }
479 }
480}
481
482impl AgentLoopConfig {
483 /// The active session goal to surface to the main agent, or `None` when
484 /// Gold is disabled or no goal is set. Falls back to the legacy
485 /// `evaluation_prompt` for back-compat via [`GoldConfig::effective_goal`].
486 pub fn active_goal(&self) -> Option<&str> {
487 self.gold_config
488 .as_ref()
489 .filter(|cfg| cfg.enabled)
490 .and_then(GoldConfig::effective_goal)
491 }
492
493 /// Whether the Codex-style autonomous goal loop is active for this run.
494 ///
495 /// This requires Gold to be enabled, a goal to be set, AND auto-continue to
496 /// be on. Only then is the `update_goal` self-report tool surfaced to the
497 /// model and the terminal double-check allowed to veto a premature stop.
498 /// When Gold is enabled without auto-continue, the evaluator stays purely
499 /// observational (legacy behavior).
500 pub fn goal_loop_active(&self) -> bool {
501 self.gold_config.as_ref().is_some_and(|cfg| {
502 cfg.enabled && cfg.auto_continue_enabled && cfg.effective_goal().is_some()
503 })
504 }
505
506 /// Whether the guardian review gate is active for this run: a spawner is
507 /// wired (so the runner can actually create the reviewer child) AND the
508 /// config is present and enabled.
509 pub fn guardian_active(&self) -> bool {
510 self.guardian_spawner.is_some()
511 && self.guardian_config.as_ref().is_some_and(|cfg| cfg.enabled)
512 }
513
514 /// Maximum guardian review passes for this run (the budget). `0` when no
515 /// guardian config is set.
516 pub fn guardian_max_reviews(&self) -> u32 {
517 self.guardian_config
518 .as_ref()
519 .map_or(0, |cfg| cfg.max_reviews)
520 }
521
522 /// The reviewer model override, if a guardian config sets one.
523 pub fn guardian_model(&self) -> Option<&str> {
524 self.guardian_config
525 .as_ref()
526 .and_then(|cfg| cfg.model_name.as_deref())
527 }
528
529 /// Whether child→parent approval delegation is wired for this run.
530 pub fn delegation_active(&self) -> bool {
531 self.approval_delegate.is_some()
532 }
533}
534
535#[cfg(test)]
536mod tests;