bamboo-subagent 2026.6.21

Sub-agent fleet runtime: project-keyed session store, indices, and Maildir-style mailbox
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
//! ProvisionSpec — the one-shot bootstrap contract between parent and worker.
//!
//! The parent **decides** (model routing, tool policy, storage layout, credentials) and the
//! worker only **executes** the already-resolved result. The spec is fed to the worker over
//! **stdin, once, then the pipe closes** — never argv (visible in `ps`) or env (inherited by
//! grandchildren). Secrets ride in a dedicated envelope so the security story can evolve
//! (proxy mode, short-lived tokens) without touching the bootstrap flow.
//!
//! Forward compatibility: `version` + serde's default of ignoring unknown fields means an
//! older worker can read a newer spec (new fields are skipped) and a newer worker can read
//! an older spec (missing fields default). Parent and worker binaries need not be upgraded
//! in lockstep.

use serde::{Deserialize, Serialize};

use crate::error::{Result, StoreError};

/// Current spec version written by this crate.
pub const PROVISION_VERSION: u32 = 1;

/// Upper bound for a spec read from stdin (defense in depth against a
/// runaway writer; a real spec is a few KB).
pub const MAX_SPEC_BYTES: u64 = 8 * 1024 * 1024;

/// Everything a worker needs to become a functioning actor. Parent-resolved, flat, complete.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ProvisionSpec {
    pub version: u32,
    pub identity: ChildIdentity,
    /// Which execution engine this actor runs (worker maps it via its factory).
    pub executor: ExecutorSpec,
    /// Tier-1 fabric directory the worker self-registers into.
    pub fabric_dir: String,
    /// Isolated storage root for this actor's own session/mailbox files.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub storage_dir: Option<String>,
    /// Working directory for the actor's file operations.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub workspace: Option<String>,
    /// Final, parent-resolved model (explicit pin > per-type routing > defaults).
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub model: Option<ModelRefSpec>,
    /// Tool names to hide from the child (already resolved from the profile policy).
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub disabled_tools: Option<Vec<String>>,
    #[serde(default)]
    pub limits: Limits,
    #[serde(default)]
    pub secrets: SecretsEnvelope,
    /// When true the worker serves connection-after-connection (a warm, reusable
    /// actor) instead of exiting after one run. The parent pools such workers and
    /// reuses an idle one for the next assignment with a matching fingerprint
    /// (role/provider/model/workspace/tools), so N sibling sub-agents no longer
    /// mean N processes. Each run still gets a fresh session rehydrated from the
    /// run's `messages`, so context stays isolated across reuses.
    #[serde(default)]
    pub reusable: bool,
    /// Where this actor runs. `Local` (default) — the parent spawns a local
    /// subprocess. `Remote{endpoint}` — connect to an already-running `wss://`
    /// worker. `Schedulable{pool}` — a control plane assigns an endpoint.
    /// Forward-compatible: an older spec without this field defaults to `Local`,
    /// so behavior is unchanged until a placement is set.
    #[serde(default)]
    pub placement: Placement,
    /// Capabilities synced from the orchestrator so a deployed worker matches its
    /// toolset (MCP servers + user skills). Empty for plain actor children (no
    /// behavior change); a deployed broker-agent fills these.
    #[serde(default)]
    pub capabilities: Capabilities,
}

/// Orchestrator-synced extras for a worker. Forward-compatible (all optional);
/// an older spec without these leaves the worker on builtin tools + isolated
/// skills exactly as before.
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
pub struct Capabilities {
    /// Serialized MCP config — opaque to this leaf crate; the worker deserializes
    /// it into the domain `McpConfig`. Typically the portable (SSE /
    /// streamable-http) subset; host-bound stdio servers are excluded.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub mcp: Option<serde_json::Value>,
    /// Directory of user/project skills the worker should load, instead of an
    /// empty isolated dir.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub skills_dir: Option<String>,
    /// When set, the worker proxies its MCP tool calls to the orchestrator over
    /// the broker (host-bound servers like nova run only there). Mutually
    /// exclusive with `mcp` direct-sync — proxy covers all MCP.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub mcp_proxy: Option<McpProxyConfig>,
    /// When `true`, the worker builds its tool executor WITH a permission
    /// checker, so gated tools hit `ConfirmationRequired` and delegate the
    /// decision to the host via the per-run `ApprovalProxy` (Phase 2:
    /// child → parent approval). Default `false` preserves the legacy behavior
    /// (the worker runs all tools unchecked). Only meaningful when the run has a
    /// host bridge to proxy to — real actor runs always do.
    #[serde(default)]
    pub enforce_permissions: bool,
    /// When `true`, the worker builds its OWN external-child runner + scheduler
    /// + adapter and runs the REAL `SubAgent` tool directly, so a nested worker
    /// can spawn grandchildren in-process (Phase 6: direct nested execution).
    /// Default `false` — the worker has no `SubAgent` tool (a leaf sub-agent).
    #[serde(default)]
    pub nested_spawn: bool,
    /// Max nesting depth a self-orchestrating worker may spawn to (Phase 6:
    /// direct nested execution). A worker (or the root) refuses to spawn a child
    /// when its own `spawn_depth >= max_spawn_depth`. `None` ⇒ the default cap
    /// (4) applies. Carried down so every level enforces the same bound.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub max_spawn_depth: Option<u32>,
    /// Whether this actor runs in "bypass permissions" mode (propagated from the
    /// parent at spawn). Phase 6: when true, a self-orchestrating worker installs
    /// an OFF-LOOP model-reviewer so its CHILDREN's forced-ask (dangerous) gated
    /// actions — which still fire `ConfirmationRequired` even under bypass — get
    /// an LLM reasonableness check instead of a blind pass.
    #[serde(default)]
    pub bypass: bool,
    /// Whether this run has NO interactive human approver (headless `-p`,
    /// scheduled jobs, deployed broker-agents — propagated from the unattended
    /// root). #73: when true, the worker's per-run `ApprovalProxy` decides a
    /// gated action with the OFF-LOOP model-reviewer LOCALLY instead of
    /// escalating to a human who will never answer (which would 300s-deny). When
    /// false (an interactive session) the approval escalates to the human as
    /// usual. Independent of `bypass` (an interactive bypass run still has a
    /// human; a headless default-mode run does not).
    #[serde(default)]
    pub no_human_approver: bool,
    /// Whether this worker is a READ-ONLY Guardian reviewer. #71: a guardian
    /// reviewer keeps `Bash` (its mutating tools are stripped by
    /// `guardian_read_only_disabled_tools`) so it can fetch the diff and run
    /// tests — but an unrestricted `Bash` would let it `rm -rf`, `git push`, or
    /// `curl | sh`, making the read-only guarantee nominal. When `true`, the
    /// worker installs a `GuardianReadOnlyChecker` that DENIES any `Bash`/
    /// `execute_command` whose command is not on the read-only allowlist
    /// (`is_read_only_command`) and runs read-only commands without gating.
    /// Default `false` preserves the unrestricted-Bash behavior for ordinary
    /// sub-agents. Set by the host's `build_spec` from the reviewer's session
    /// marker. Mirrors `no_human_approver` above.
    #[serde(default)]
    pub guardian_read_only: bool,
}

/// How a worker reaches the orchestrator's MCP proxy over the broker.
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
pub struct McpProxyConfig {
    /// The orchestrator's broker mailbox id (proxy requests go here).
    pub orchestrator: String,
    /// Broker WebSocket endpoint.
    pub endpoint: String,
    /// Bearer token for the broker.
    pub token: String,
}

/// Where an actor physically runs — a configurable "temperature", not a baked-in
/// property (see `docs/remote-actor-plan.md` §3.4). Default `Local` keeps today's
/// behavior; the launcher picks the matching `WorkerLauncher` per variant.
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum Placement {
    /// Parent spawns a local subprocess (current behavior).
    #[default]
    Local,
    /// Connect to a resident worker already serving at `endpoint` (e.g.
    /// `wss://gpu-host:8443`).
    Remote { endpoint: String },
    /// Ask a control plane to assign an endpoint from a named pool.
    Schedulable { pool: String },
}

#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ChildIdentity {
    pub child_id: String,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub parent_id: Option<String>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub project_key: Option<String>,
    /// Role/profile id, e.g. "researcher". Also published in the discovery record.
    #[serde(default)]
    pub role: String,
    /// Nesting depth of THIS actor in the spawn tree (root orchestrator = 0, its
    /// direct worker = 1, …). The worker stamps this onto its run session's
    /// `spawn_depth` so in-process children accumulate depth correctly ACROSS the
    /// actor process boundary (each worker otherwise starts at a fresh root).
    /// Used to enforce the max-depth cap (Phase 6: direct nested execution).
    #[serde(default)]
    pub depth: u32,
}

/// Which engine runs the task. The worker's factory maps each variant to a `ChildExecutor`;
/// adding an engine = one new variant + one factory arm, nothing else changes.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum ExecutorSpec {
    /// Dependency-free echo stand-in (testing / smoke runs through the full chain).
    Echo,
    /// The real bamboo agent loop.
    BambooRuntime,
    /// Wrap an external CLI agent as the engine.
    CliAdapter { command: String, args: Vec<String> },
}

/// Provider+model pair, parent-resolved. (Local mirror of `ProviderModelRef`;
/// this crate stays a leaf and does not depend on `bamboo-domain`.)
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ModelRefSpec {
    pub provider: String,
    pub model: String,
}

#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
pub struct Limits {
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub run_timeout_secs: Option<u64>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub idle_timeout_secs: Option<u64>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub max_rounds: Option<u32>,
}

/// Credentials scoped to exactly what this child needs — never the whole config.
/// Held in memory only; the worker must not persist it.
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
pub struct SecretsEnvelope {
    #[serde(default)]
    pub provider_credentials: Vec<ScopedCredential>,
}

#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ScopedCredential {
    /// Routing key as the parent knows it: a legacy provider name
    /// ("anthropic") or a provider-instance id (uuid).
    pub provider: String,
    pub api_key: String,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub base_url: Option<String>,
    /// Concrete provider protocol to construct ("anthropic", "openai", …).
    /// Needed when `provider` is an instance id; defaults to `provider`.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub provider_type: Option<String>,
}

impl ProvisionSpec {
    pub fn new(identity: ChildIdentity, executor: ExecutorSpec, fabric_dir: String) -> Self {
        Self {
            version: PROVISION_VERSION,
            identity,
            executor,
            fabric_dir,
            storage_dir: None,
            workspace: None,
            model: None,
            disabled_tools: None,
            limits: Limits::default(),
            secrets: SecretsEnvelope::default(),
            reusable: false,
            placement: Placement::default(),
            capabilities: Capabilities::default(),
        }
    }

    pub fn to_json(&self) -> Result<String> {
        serde_json::to_string(self)
            .map_err(|e| StoreError::decode(std::path::Path::new("<provision>"), e))
    }

    pub fn from_json(s: &str) -> Result<Self> {
        serde_json::from_str(s)
            .map_err(|e| StoreError::decode(std::path::Path::new("<provision>"), e))
    }

    /// Read a spec from the process's stdin (the parent writes one JSON document and
    /// closes the pipe). Used by worker `main`.
    ///
    /// Defense in depth: the read is capped at [`MAX_SPEC_BYTES`] — the pipe is
    /// trusted (our own parent), but a runaway writer must not OOM the worker.
    pub async fn read_from_stdin() -> Result<Self> {
        use tokio::io::AsyncReadExt;
        let mut buf = Vec::new();
        tokio::io::stdin()
            .take(MAX_SPEC_BYTES)
            .read_to_end(&mut buf)
            .await
            .map_err(|e| StoreError::io("<stdin>", e))?;
        let text = String::from_utf8_lossy(&buf);
        Self::from_json(text.trim())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn spec() -> ProvisionSpec {
        let mut s = ProvisionSpec::new(
            ChildIdentity {
                child_id: "c1".into(),
                parent_id: Some("p1".into()),
                project_key: Some("proj".into()),
                role: "researcher".into(),
                depth: 0,
            },
            ExecutorSpec::Echo,
            "/tmp/fabric".into(),
        );
        s.model = Some(ModelRefSpec {
            provider: "anthropic".into(),
            model: "claude-sonnet-4-6".into(),
        });
        s.secrets.provider_credentials.push(ScopedCredential {
            provider: "anthropic".into(),
            api_key: "sk-test".into(),
            base_url: None,
            provider_type: None,
        });
        s
    }

    #[test]
    fn round_trips() {
        let s = spec();
        let parsed = ProvisionSpec::from_json(&s.to_json().unwrap()).unwrap();
        assert_eq!(parsed, s);
    }

    #[test]
    fn unknown_fields_are_ignored_forward_compat() {
        // A "newer" spec with fields this version doesn't know about.
        let mut v: serde_json::Value = serde_json::from_str(&spec().to_json().unwrap()).unwrap();
        v["future_field"] = serde_json::json!({"x": 1});
        v["identity"]["future_sub"] = serde_json::json!(true);
        let parsed = ProvisionSpec::from_json(&v.to_string()).unwrap();
        assert_eq!(parsed.identity.child_id, "c1");
    }

    #[test]
    fn missing_optional_fields_default_backward_compat() {
        // A minimal "older" spec: only required fields.
        let minimal = serde_json::json!({
            "version": 1,
            "identity": { "child_id": "c9" },
            "executor": { "kind": "echo" },
            "fabric_dir": "/tmp/f",
        });
        let parsed = ProvisionSpec::from_json(&minimal.to_string()).unwrap();
        assert_eq!(parsed.identity.child_id, "c9");
        assert_eq!(parsed.executor, ExecutorSpec::Echo);
        assert!(parsed.model.is_none());
        assert!(parsed.secrets.provider_credentials.is_empty());
        assert_eq!(parsed.limits, Limits::default());
        // Placement defaults to Local for a spec that predates the field.
        assert_eq!(parsed.placement, Placement::Local);
    }

    #[test]
    fn placement_defaults_local_and_remote_round_trips() {
        // Default spec is Local, serialized with kind="local".
        let v: serde_json::Value = serde_json::from_str(&spec().to_json().unwrap()).unwrap();
        assert_eq!(v["placement"]["kind"], "local");

        // Remote round-trips with its endpoint.
        let mut s = spec();
        s.placement = Placement::Remote {
            endpoint: "wss://gpu-host:8443".into(),
        };
        let parsed = ProvisionSpec::from_json(&s.to_json().unwrap()).unwrap();
        assert_eq!(
            parsed.placement,
            Placement::Remote {
                endpoint: "wss://gpu-host:8443".into()
            }
        );
    }

    #[test]
    fn capabilities_default_empty_and_round_trip() {
        // Default spec carries no synced capabilities (actor children unaffected).
        assert_eq!(spec().capabilities, Capabilities::default());

        // Round-trips with content.
        let mut s = spec();
        s.capabilities = Capabilities {
            mcp: Some(serde_json::json!({ "version": 1, "servers": [] })),
            skills_dir: Some("/home/u/.bamboo/skills".into()),
            mcp_proxy: None,
            enforce_permissions: false,
            nested_spawn: false,
            max_spawn_depth: None,
            bypass: false,
            no_human_approver: false,
            guardian_read_only: false,
        };
        let parsed = ProvisionSpec::from_json(&s.to_json().unwrap()).unwrap();
        assert_eq!(
            parsed.capabilities.skills_dir.as_deref(),
            Some("/home/u/.bamboo/skills")
        );
        assert!(parsed.capabilities.mcp.is_some());

        // Backward compat: a spec without `capabilities` defaults to empty.
        let minimal = serde_json::json!({
            "version": 1,
            "identity": { "child_id": "c" },
            "executor": { "kind": "echo" },
            "fabric_dir": "/tmp/f",
        });
        let parsed = ProvisionSpec::from_json(&minimal.to_string()).unwrap();
        assert_eq!(parsed.capabilities, Capabilities::default());
    }

    #[test]
    fn enforce_permissions_defaults_false_and_round_trips() {
        // Absent in JSON ⇒ false (backward compatible with older orchestrators).
        assert!(!Capabilities::default().enforce_permissions);
        // Round-trips when opted in.
        let mut s = spec();
        s.capabilities.enforce_permissions = true;
        let parsed = ProvisionSpec::from_json(&s.to_json().unwrap()).unwrap();
        assert!(parsed.capabilities.enforce_permissions);
    }

    #[test]
    fn executor_tags_are_stable() {
        let v: serde_json::Value = serde_json::from_str(&spec().to_json().unwrap()).unwrap();
        assert_eq!(v["executor"]["kind"], "echo");
        let cli = ExecutorSpec::CliAdapter {
            command: "claude".into(),
            args: vec!["-p".into()],
        };
        let vv = serde_json::to_value(&cli).unwrap();
        assert_eq!(vv["kind"], "cli_adapter");
        assert_eq!(
            serde_json::to_value(ExecutorSpec::BambooRuntime).unwrap()["kind"],
            "bamboo_runtime"
        );
    }
}