bamboo-engine 2026.6.18

Execution engine and orchestration for the Bamboo agent framework
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
use std::collections::BTreeSet;
use std::path::PathBuf;
use std::sync::Arc;

use bamboo_agent_core::composition::CompositionExecutor;
use bamboo_agent_core::storage::AttachmentReader;
use bamboo_agent_core::storage::Storage;
use bamboo_agent_core::tools::ToolSchema;
use bamboo_agent_core::GoldConfidence;
use bamboo_compression::TokenBudget;
use bamboo_config::MemoryConfig;
use bamboo_config::PermissionMode;
use bamboo_domain::ReasoningEffort;
use bamboo_domain::RuntimeSessionPersistence;
use bamboo_llm::LLMProvider;
use bamboo_metrics::MetricsCollector;
use bamboo_skills::SkillManager;
use bamboo_tools::ToolRegistry;
use serde::{Deserialize, Serialize};

#[derive(Clone, Default)]
pub struct AuxiliaryModelConfig {
    pub fast_model_name: Option<String>,
    pub fast_model_provider: Option<Arc<dyn LLMProvider>>,
    pub background_model_name: Option<String>,
    pub planning_model_name: Option<String>,
    pub search_model_name: Option<String>,
    pub summarization_model_name: Option<String>,
    pub background_model_provider: Option<Arc<dyn LLMProvider>>,
    pub summarization_model_provider: Option<Arc<dyn LLMProvider>>,
}

fn default_gold_max_output_tokens() -> u32 {
    1024
}

fn default_gold_max_auto_continuations() -> u32 {
    3
}

fn default_gold_min_confidence() -> GoldConfidence {
    GoldConfidence::Medium
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(default)]
pub struct GoldConfig {
    /// Master switch for Gold observe-only evaluation.
    #[serde(default)]
    pub enabled: bool,
    /// Independent switch for Phase 2 low-risk auto-answer.
    ///
    /// Kept separate from `enabled` so Phase 1 observe-only users do not
    /// implicitly opt into automatic clarification responses.
    #[serde(default)]
    pub auto_answer_enabled: bool,
    /// Independent switch for Phase 3 server-side auto-continue.
    ///
    /// Kept separate from both `enabled` and `auto_answer_enabled` so users can
    /// opt into terminal auto-resume explicitly without enabling other Gold
    /// automation behaviors.
    #[serde(default)]
    pub auto_continue_enabled: bool,
    /// Optional dedicated model for Gold evaluation. Falls back to fast model,
    /// then the main chat model when absent.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub model_name: Option<String>,
    /// The user's goal for this session.
    ///
    /// Unlike `evaluation_prompt` (which only tunes the *judge*), the goal is
    /// surfaced to the *main* executing agent as a persistent system-prompt
    /// block so it actively works toward it. The Gold evaluator also measures
    /// progress against this text.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub goal: Option<String>,
    /// Optional custom prompt suffix appended to the built-in Gold evaluator
    /// prompt. This tunes the judge only; it does not set the goal.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub evaluation_prompt: Option<String>,
    /// Output token limit for the Gold evaluator call.
    #[serde(default = "default_gold_max_output_tokens")]
    pub max_output_tokens: u32,
    /// Maximum number of automatic Gold continuations allowed per session.
    #[serde(default = "default_gold_max_auto_continuations")]
    pub max_auto_continuations: u32,
    /// Minimum evaluator confidence required before Gold auto-continues or
    /// auto-answers. Defaults to `medium` so the loop fires on reasonably
    /// confident verdicts rather than only `high`.
    #[serde(default = "default_gold_min_confidence")]
    pub min_auto_continue_confidence: GoldConfidence,
}

impl Default for GoldConfig {
    fn default() -> Self {
        Self {
            enabled: false,
            auto_answer_enabled: false,
            auto_continue_enabled: false,
            model_name: None,
            goal: None,
            evaluation_prompt: None,
            max_output_tokens: default_gold_max_output_tokens(),
            max_auto_continuations: default_gold_max_auto_continuations(),
            min_auto_continue_confidence: default_gold_min_confidence(),
        }
    }
}

impl GoldConfig {
    /// The session goal text, falling back to the legacy `evaluation_prompt`
    /// for sessions created before the dedicated `goal` field existed.
    ///
    /// Returns `None` when neither field holds non-empty text.
    pub fn effective_goal(&self) -> Option<&str> {
        self.goal
            .as_deref()
            .or(self.evaluation_prompt.as_deref())
            .map(str::trim)
            .filter(|value| !value.is_empty())
    }
}

fn default_guardian_max_reviews() -> u32 {
    2
}

/// Configuration for the guardian adversarial-review terminal gate.
///
/// Mirrors [`GoldConfig`]: a plain, serde-defaulting struct surfaced per run.
/// When `enabled` is false (the default) the guardian gate is inactive and the
/// terminal completion path is unchanged.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(default)]
pub struct GuardianConfig {
    /// Master switch for the guardian review gate.
    #[serde(default)]
    pub enabled: bool,
    /// Optional dedicated reviewer model. Falls back to the run's main model.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub model_name: Option<String>,
    /// Maximum guardian review passes per run (budget; mirrors
    /// [`GoldConfig::max_auto_continuations`]).
    #[serde(default = "default_guardian_max_reviews")]
    pub max_reviews: u32,
}

impl Default for GuardianConfig {
    fn default() -> Self {
        Self {
            enabled: false,
            model_name: None,
            max_reviews: default_guardian_max_reviews(),
        }
    }
}

/// Late-bound spawner for the guardian reviewer child.
///
/// The runner cannot construct a child directly: the `SpawnScheduler` is built
/// *after* the `Agent` that drives the runner (a construction-order cycle), so
/// the terminal gate spawns the reviewer through this trait object, injected
/// per-request on [`AgentLoopConfig`] exactly like `auxiliary_model_resolver`.
/// The implementation lives in the server (it captures the already-built
/// scheduler + child-session adapter); the engine holds only the trait, keeping
/// the engine free of any dependency on server/AppState types.
#[async_trait::async_trait]
pub trait GuardianSpawner: Send + Sync {
    /// Create a read-only reviewer child for `parent_session_id`, seeded with
    /// `review_prompt`, enqueue it to run, and return its session id so the
    /// caller can register a wait on it.
    async fn spawn_guardian_review(
        &self,
        parent_session: &bamboo_agent_core::Session,
        review_prompt: String,
        model: String,
        disabled_tools: Option<BTreeSet<String>>,
    ) -> Result<String, String>;
}

/// A child sub-agent's request to have a gated tool approved by its parent.
///
/// A non-bypassed child cannot answer its own permission prompt (no human is
/// attached to a child session), so the request is delegated up to the parent.
#[derive(Debug, Clone)]
pub struct ChildApprovalRequest {
    pub child_session_id: String,
    pub parent_session_id: String,
    /// The gated tool call on the child to re-execute once approved.
    pub child_tool_call_id: String,
    pub tool_name: String,
    /// Permission type as a string (e.g. "WriteFile", "ExecuteCommand").
    pub permission_type: String,
    /// The concrete resource the permission applies to (path, command, …).
    pub resource: String,
    /// Human-facing approval question to surface on the parent.
    pub question: String,
    /// The raw `awaiting_permission_approval` payload the child's executor built,
    /// so the parent can reuse the existing grant-extraction path verbatim.
    pub approval_payload: serde_json::Value,
}

/// What the executor should do after delegating a child's approval upward.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ChildApprovalOutcome {
    /// Registered on the parent; the child must SUSPEND and await the decision.
    Delegated,
    /// Parent policy auto-approved (bypass / existing grant); proceed to execute.
    AutoApproved,
    /// Parent policy auto-denied; the executor must deny the tool.
    AutoDenied,
}

/// Late-bound delegate that routes a child's approval request up to its parent.
///
/// Injected per-request on [`AgentLoopConfig`] exactly like [`GuardianSpawner`];
/// the trait lives in the engine, the implementation in the server (it owns the
/// parent session store + pending-question + notification machinery).
#[async_trait::async_trait]
pub trait ApprovalDelegate: Send + Sync {
    /// Register `request` on its parent (or auto-resolve by policy) and report
    /// what the child's executor should do next.
    async fn delegate_child_approval(
        &self,
        request: ChildApprovalRequest,
    ) -> Result<ChildApprovalOutcome, String>;
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ImageFallbackMode {
    Placeholder,
    Error,
    Ocr,
    /// Use a vision-capable LLM to describe the image, then replace the image
    /// with the textual description so that text-only models can understand
    /// the content.
    Vision,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ImageFallbackConfig {
    pub mode: ImageFallbackMode,
    /// Vision model name for `Vision` mode. Falls back to the session's main model
    /// when `None`.
    pub vision_model: Option<String>,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct PromptMemoryFlags {
    pub project_prompt_injection: bool,
    pub relevant_recall: bool,
    pub relevant_recall_rerank: bool,
    pub project_first_dream: bool,
}

impl Default for PromptMemoryFlags {
    fn default() -> Self {
        Self {
            project_prompt_injection: true,
            relevant_recall: true,
            relevant_recall_rerank: false,
            project_first_dream: true,
        }
    }
}

impl From<&MemoryConfig> for PromptMemoryFlags {
    fn from(value: &MemoryConfig) -> Self {
        Self {
            project_prompt_injection: value.project_prompt_injection,
            relevant_recall: value.relevant_recall,
            relevant_recall_rerank: value.relevant_recall_rerank,
            project_first_dream: value.project_first_dream,
        }
    }
}

/// Configuration for the agent loop.
#[non_exhaustive]
pub struct AgentLoopConfig {
    pub(crate) max_rounds: usize,
    pub(crate) system_prompt: Option<String>,
    /// Skill IDs that are disabled globally for this execution.
    pub(crate) disabled_skill_ids: BTreeSet<String>,
    /// Optional explicit skill selection for this execution.
    /// When set, only these skill IDs are considered for skill context and allowlists.
    pub(crate) selected_skill_ids: Option<Vec<String>>,
    /// Optional active skill mode for this execution.
    ///
    /// When set, skill discovery prefers `skills-<mode>` directories over generic
    /// directories for the same skill id.
    pub(crate) selected_skill_mode: Option<String>,
    pub(crate) additional_tool_schemas: Vec<ToolSchema>,
    pub(crate) tool_registry: Arc<ToolRegistry>,
    pub(crate) composition_executor: Option<Arc<CompositionExecutor>>,
    pub(crate) skill_manager: Option<Arc<SkillManager>>,
    /// If true, skip appending the initial user message (already present in session).
    pub(crate) skip_initial_user_message: bool,
    /// Optional storage for persisting session changes
    pub(crate) storage: Option<Arc<dyn Storage>>,
    /// Optional runtime persistence for non-authoritative session saves.
    /// When set, engine save sites use this instead of `storage` for writes.
    pub(crate) persistence: Option<Arc<dyn RuntimeSessionPersistence>>,
    /// Optional attachment reader for resolving `bamboo-attachment://...` references
    /// into `data:` URLs for upstream providers. This must not mutate session storage.
    pub(crate) attachment_reader: Option<Arc<dyn AttachmentReader>>,
    /// Optional asynchronous metrics collector
    pub(crate) metrics_collector: Option<MetricsCollector>,
    /// Model name used for metrics attribution
    pub(crate) model_name: Option<String>,
    /// Fast/cheap model for lightweight tasks (task evaluation, search, etc.).
    ///
    /// Call sites may fall back to `model_name` when this is unset.
    pub(crate) fast_model_name: Option<String>,
    /// Optional provider override for lightweight fast-model LLM calls.
    pub(crate) fast_model_provider: Option<Arc<dyn LLMProvider>>,
    /// Fast/cheap model for memory/background tasks.
    ///
    /// This must not silently fall back to the main interaction model.
    pub(crate) background_model_name: Option<String>,

    /// Model for planning/coordination tasks (task decomposition, architecture).
    /// Falls back to `model_name` when unset.
    pub(crate) planning_model_name: Option<String>,
    /// Model for search/navigation tasks (grep, file listing, symbol resolution).
    /// Falls back to `fast_model_name` when unset.
    pub(crate) search_model_name: Option<String>,
    /// Custom instructions for conversation summarization, injected into the
    /// LLM summary prompt. Lets users control what the summary focuses on.
    ///
    /// Resolution order: session-level > config-level > built-in defaults.
    pub(crate) compression_instructions: Option<String>,
    /// Dedicated model for summarization. Falls back to `background_model_name`.
    pub(crate) summarization_model_name: Option<String>,
    /// Optional provider override for memory/background model LLM calls.
    ///
    /// When set, memory recall rerank and other memory/background tasks use this
    /// provider instead of the shared agent loop provider.
    pub(crate) background_model_provider: Option<Arc<dyn LLMProvider>>,
    /// Optional provider override for summarization / context compression calls.
    ///
    /// When set, conversation/task summarization uses this provider instead of
    /// the shared agent loop provider.
    pub(crate) summarization_model_provider: Option<Arc<dyn LLMProvider>>,
    /// Provider routing key used for provider-specific request behavior.
    ///
    /// In multi-instance mode this may be the instance id.
    pub(crate) provider_name: Option<String>,
    /// Underlying provider type (for example `openai`, `anthropic`, `copilot`).
    ///
    /// This is distinct from `provider_name` so provider-specific behavior can
    /// remain correct when routing keys are instance ids.
    pub(crate) provider_type: Option<String>,
    /// Optional request-time reasoning effort override.
    pub(crate) reasoning_effort: Option<ReasoningEffort>,
    /// Bamboo application data directory (typically `~/.bamboo`).
    ///
    /// Used by runtime features that persist auxiliary artifacts outside the
    /// session store, such as durable plan mode files under `~/.bamboo/plan`.
    pub(crate) app_data_dir: Option<PathBuf>,
    /// Tool names that should be excluded from schemas sent to the LLM.
    pub(crate) disabled_tools: BTreeSet<String>,
    /// Token budget for context management (optional, defaults to model's limits)
    pub(crate) token_budget: Option<TokenBudget>,
    /// Optional image fallback behavior applied to *LLM requests only* (never persisted).
    ///
    /// This is intended for text-only provider paths where image parts must be degraded
    /// (placeholder / OCR / error) without leaking into stored session history or UI.
    pub(crate) image_fallback: Option<ImageFallbackConfig>,
    /// Feature flags controlling prompt-time memory injection behavior.
    pub(crate) prompt_memory_flags: PromptMemoryFlags,
    /// Maximum tool calls allowed per round (default: 80).
    pub(crate) max_tool_calls_per_round: usize,
    /// Maximum consecutive failures per tool before circuit breaker (default: 3).
    pub(crate) max_consecutive_failures_per_tool: usize,
    /// Tool names that require strict argument validation.
    pub(crate) strict_argument_tool_names: Vec<String>,
    /// Per-tool execution timeout in seconds (default: 120).
    pub(crate) per_tool_timeout_secs: u64,
    /// Parallel batch execution timeout in seconds (default: 300).
    pub(crate) parallel_batch_timeout_secs: u64,
    /// Permission mode for this execution (default: None = use PermissionConfig's mode).
    pub(crate) permission_mode: Option<PermissionMode>,
    /// Optional Gold observe-only evaluator configuration.
    ///
    /// When `None` or `enabled == false`, Gold evaluation is disabled and the
    /// existing execute/respond/resume loop remains unchanged.
    pub(crate) gold_config: Option<GoldConfig>,
    /// Optional guardian adversarial-review gate configuration. When `None` or
    /// `enabled == false`, the guardian terminal gate is inactive.
    pub(crate) guardian_config: Option<GuardianConfig>,
    /// Late-bound spawner for the guardian reviewer child. `None` (the default)
    /// leaves the guardian gate inert even when `guardian_config.enabled` is set,
    /// since the runner cannot create a child without it. Wired by the server.
    pub(crate) guardian_spawner: Option<Arc<dyn GuardianSpawner>>,
    /// Late-bound delegate that routes a child's gated-tool approval request up
    /// to its parent (Phase 2). `None` (the default) leaves child gating on its
    /// legacy path. Wired by the server.
    pub(crate) approval_delegate: Option<Arc<dyn ApprovalDelegate>>,
    /// Enable dynamic per-round model routing based on task complexity.
    /// When true, the pipeline classifies complexity at each round end and
    /// stores the result in session metadata.
    pub(crate) features_dynamic_model_routing: bool,
    /// Optional per-round resolver for auxiliary model settings that should
    /// follow live global config rather than stay frozen for the whole run.
    ///
    /// The main chat model remains session/request scoped; this hook is only
    /// for fast/background/planning/search/summarization helpers.
    pub(crate) auxiliary_model_resolver:
        Option<Arc<dyn Fn() -> AuxiliaryModelConfig + Send + Sync>>,
    /// Server-level usage guidance contributed by the run's tool executor —
    /// chiefly the `instructions` connected MCP servers return from `initialize`.
    /// Captured once at config construction (from `ToolExecutor::tool_guidance`)
    /// and appended to the tool-guide section of the system prompt, so a server's
    /// own how-to-use notes appear only while that server is loaded for the run.
    pub(crate) mcp_tool_guidance: Option<String>,
}

impl Default for AgentLoopConfig {
    fn default() -> Self {
        Self {
            max_rounds: 200,
            system_prompt: None,
            disabled_skill_ids: BTreeSet::new(),
            selected_skill_ids: None,
            selected_skill_mode: None,
            additional_tool_schemas: Vec::new(),
            tool_registry: Arc::new(ToolRegistry::new()),
            composition_executor: None,
            skill_manager: None,
            skip_initial_user_message: false,
            storage: None,
            persistence: None,
            attachment_reader: None,
            metrics_collector: None,
            model_name: None,
            fast_model_name: None,
            fast_model_provider: None,
            background_model_name: None,
            planning_model_name: None,
            search_model_name: None,
            compression_instructions: None,
            summarization_model_name: None,
            background_model_provider: None,
            summarization_model_provider: None,
            provider_name: None,
            provider_type: None,
            reasoning_effort: None,
            app_data_dir: None,
            disabled_tools: BTreeSet::new(),
            token_budget: None,
            image_fallback: None,
            prompt_memory_flags: PromptMemoryFlags::default(),
            max_tool_calls_per_round: 80,
            max_consecutive_failures_per_tool: 3,
            strict_argument_tool_names: vec![
                "Write".into(),
                "Edit".into(),
                "NotebookEdit".into(),
                "apply_patch".into(),
                "Bash".into(),
                "Task".into(),
                "SubAgent".into(),
                "scheduler".into(),
                "sub_session_manager".into(),
                "session_note".into(),
                "memory_note".into(),
            ],
            per_tool_timeout_secs: 120,
            parallel_batch_timeout_secs: 300,
            permission_mode: None,
            gold_config: None,
            guardian_config: None,
            guardian_spawner: None,
            approval_delegate: None,
            features_dynamic_model_routing: false,
            auxiliary_model_resolver: None,
            mcp_tool_guidance: None,
        }
    }
}

impl AgentLoopConfig {
    /// The active session goal to surface to the main agent, or `None` when
    /// Gold is disabled or no goal is set. Falls back to the legacy
    /// `evaluation_prompt` for back-compat via [`GoldConfig::effective_goal`].
    pub fn active_goal(&self) -> Option<&str> {
        self.gold_config
            .as_ref()
            .filter(|cfg| cfg.enabled)
            .and_then(GoldConfig::effective_goal)
    }

    /// Whether the Codex-style autonomous goal loop is active for this run.
    ///
    /// This requires Gold to be enabled, a goal to be set, AND auto-continue to
    /// be on. Only then is the `update_goal` self-report tool surfaced to the
    /// model and the terminal double-check allowed to veto a premature stop.
    /// When Gold is enabled without auto-continue, the evaluator stays purely
    /// observational (legacy behavior).
    pub fn goal_loop_active(&self) -> bool {
        self.gold_config.as_ref().is_some_and(|cfg| {
            cfg.enabled && cfg.auto_continue_enabled && cfg.effective_goal().is_some()
        })
    }

    /// Whether the guardian review gate is active for this run: a spawner is
    /// wired (so the runner can actually create the reviewer child) AND the
    /// config is present and enabled.
    pub fn guardian_active(&self) -> bool {
        self.guardian_spawner.is_some()
            && self.guardian_config.as_ref().is_some_and(|cfg| cfg.enabled)
    }

    /// Maximum guardian review passes for this run (the budget). `0` when no
    /// guardian config is set.
    pub fn guardian_max_reviews(&self) -> u32 {
        self.guardian_config
            .as_ref()
            .map_or(0, |cfg| cfg.max_reviews)
    }

    /// The reviewer model override, if a guardian config sets one.
    pub fn guardian_model(&self) -> Option<&str> {
        self.guardian_config
            .as_ref()
            .and_then(|cfg| cfg.model_name.as_deref())
    }

    /// Whether child→parent approval delegation is wired for this run.
    pub fn delegation_active(&self) -> bool {
        self.approval_delegate.is_some()
    }
}

#[cfg(test)]
mod tests;