bamboo_subagent/provision.rs
1//! ProvisionSpec — the one-shot bootstrap contract between parent and worker.
2//!
3//! The parent **decides** (model routing, tool policy, storage layout, credentials) and the
4//! worker only **executes** the already-resolved result. The spec is fed to the worker over
5//! **stdin, once, then the pipe closes** — never argv (visible in `ps`) or env (inherited by
6//! grandchildren). Secrets ride in a dedicated envelope so the security story can evolve
7//! (proxy mode, short-lived tokens) without touching the bootstrap flow.
8//!
9//! Forward compatibility: `version` + serde's default of ignoring unknown fields means an
10//! older worker can read a newer spec (new fields are skipped) and a newer worker can read
11//! an older spec (missing fields default). Parent and worker binaries need not be upgraded
12//! in lockstep.
13
14use serde::{Deserialize, Serialize};
15
16use crate::error::{Result, StoreError};
17
18/// Current spec version written by this crate.
19pub const PROVISION_VERSION: u32 = 1;
20
21/// Upper bound for a spec read from stdin (defense in depth against a
22/// runaway writer; a real spec is a few KB).
23pub const MAX_SPEC_BYTES: u64 = 8 * 1024 * 1024;
24
25/// Everything a worker needs to become a functioning actor. Parent-resolved, flat, complete.
26#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
27pub struct ProvisionSpec {
28 pub version: u32,
29 pub identity: ChildIdentity,
30 /// Which execution engine this actor runs (worker maps it via its factory).
31 pub executor: ExecutorSpec,
32 /// Tier-1 fabric directory the worker self-registers into.
33 pub fabric_dir: String,
34 /// Isolated storage root for this actor's own session/mailbox files.
35 #[serde(default, skip_serializing_if = "Option::is_none")]
36 pub storage_dir: Option<String>,
37 /// Working directory for the actor's file operations.
38 #[serde(default, skip_serializing_if = "Option::is_none")]
39 pub workspace: Option<String>,
40 /// Final, parent-resolved model (explicit pin > per-type routing > defaults).
41 #[serde(default, skip_serializing_if = "Option::is_none")]
42 pub model: Option<ModelRefSpec>,
43 /// Tool names to hide from the child (already resolved from the profile policy).
44 #[serde(default, skip_serializing_if = "Option::is_none")]
45 pub disabled_tools: Option<Vec<String>>,
46 #[serde(default)]
47 pub limits: Limits,
48 #[serde(default)]
49 pub secrets: SecretsEnvelope,
50 /// When true the worker serves connection-after-connection (a warm, reusable
51 /// actor) instead of exiting after one run. The parent pools such workers and
52 /// reuses an idle one for the next assignment with a matching fingerprint
53 /// (role/provider/model/workspace/tools), so N sibling sub-agents no longer
54 /// mean N processes. Each run still gets a fresh session rehydrated from the
55 /// run's `messages`, so context stays isolated across reuses.
56 #[serde(default)]
57 pub reusable: bool,
58 /// Where this actor runs. `Local` (default) — the parent spawns a local
59 /// subprocess. `Remote{endpoint}` — connect to an already-running `wss://`
60 /// worker. `Schedulable{pool}` — a control plane assigns an endpoint.
61 /// Forward-compatible: an older spec without this field defaults to `Local`,
62 /// so behavior is unchanged until a placement is set.
63 #[serde(default)]
64 pub placement: Placement,
65 /// Capabilities synced from the orchestrator so a deployed worker matches its
66 /// toolset (MCP servers + user skills). Empty for plain actor children (no
67 /// behavior change); a deployed broker-agent fills these.
68 #[serde(default)]
69 pub capabilities: Capabilities,
70}
71
72/// Orchestrator-synced extras for a worker. Forward-compatible (all optional);
73/// an older spec without these leaves the worker on builtin tools + isolated
74/// skills exactly as before.
75#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
76pub struct Capabilities {
77 /// Serialized MCP config — opaque to this leaf crate; the worker deserializes
78 /// it into the domain `McpConfig`. Typically the portable (SSE /
79 /// streamable-http) subset; host-bound stdio servers are excluded.
80 #[serde(default, skip_serializing_if = "Option::is_none")]
81 pub mcp: Option<serde_json::Value>,
82 /// Directory of user/project skills the worker should load, instead of an
83 /// empty isolated dir.
84 #[serde(default, skip_serializing_if = "Option::is_none")]
85 pub skills_dir: Option<String>,
86 /// When set, the worker proxies its MCP tool calls to the orchestrator over
87 /// the broker (host-bound servers like nova run only there). Mutually
88 /// exclusive with `mcp` direct-sync — proxy covers all MCP.
89 #[serde(default, skip_serializing_if = "Option::is_none")]
90 pub mcp_proxy: Option<McpProxyConfig>,
91 /// When `true`, the worker builds its tool executor WITH a permission
92 /// checker, so gated tools hit `ConfirmationRequired` and delegate the
93 /// decision to the host via the per-run `ApprovalProxy` (Phase 2:
94 /// child → parent approval). Default `false` preserves the legacy behavior
95 /// (the worker runs all tools unchecked). Only meaningful when the run has a
96 /// host bridge to proxy to — real actor runs always do.
97 #[serde(default)]
98 pub enforce_permissions: bool,
99 /// When `true`, the worker builds its OWN external-child runner + scheduler
100 /// + adapter and runs the REAL `SubAgent` tool directly, so a nested worker
101 /// can spawn grandchildren in-process (Phase 6: direct nested execution).
102 /// Default `false` — the worker has no `SubAgent` tool (a leaf sub-agent).
103 #[serde(default)]
104 pub nested_spawn: bool,
105 /// Max nesting depth a self-orchestrating worker may spawn to (Phase 6:
106 /// direct nested execution). A worker (or the root) refuses to spawn a child
107 /// when its own `spawn_depth >= max_spawn_depth`. `None` ⇒ the default cap
108 /// (4) applies. Carried down so every level enforces the same bound.
109 #[serde(default, skip_serializing_if = "Option::is_none")]
110 pub max_spawn_depth: Option<u32>,
111 /// Whether this actor runs in "bypass permissions" mode (propagated from the
112 /// parent at spawn). Phase 6: when true, a self-orchestrating worker installs
113 /// an OFF-LOOP model-reviewer so its CHILDREN's forced-ask (dangerous) gated
114 /// actions — which still fire `ConfirmationRequired` even under bypass — get
115 /// an LLM reasonableness check instead of a blind pass.
116 #[serde(default)]
117 pub bypass: bool,
118 /// Whether this run has NO interactive human approver (headless `-p`,
119 /// scheduled jobs, deployed broker-agents — propagated from the unattended
120 /// root). #73: when true, the worker's per-run `ApprovalProxy` decides a
121 /// gated action with the OFF-LOOP model-reviewer LOCALLY instead of
122 /// escalating to a human who will never answer (which would 300s-deny). When
123 /// false (an interactive session) the approval escalates to the human as
124 /// usual. Independent of `bypass` (an interactive bypass run still has a
125 /// human; a headless default-mode run does not).
126 #[serde(default)]
127 pub no_human_approver: bool,
128 /// Whether this worker is a READ-ONLY Guardian reviewer. #71: a guardian
129 /// reviewer keeps `Bash` (its mutating tools are stripped by
130 /// `guardian_read_only_disabled_tools`) so it can fetch the diff and run
131 /// tests — but an unrestricted `Bash` would let it `rm -rf`, `git push`, or
132 /// `curl | sh`, making the read-only guarantee nominal. When `true`, the
133 /// worker installs a `GuardianReadOnlyChecker` that DENIES any `Bash`/
134 /// `execute_command` whose command is not on the read-only allowlist
135 /// (`is_read_only_command`) and runs read-only commands without gating.
136 /// Default `false` preserves the unrestricted-Bash behavior for ordinary
137 /// sub-agents. Set by the host's `build_spec` from the reviewer's session
138 /// marker. Mirrors `no_human_approver` above.
139 #[serde(default)]
140 pub guardian_read_only: bool,
141}
142
143/// How a worker reaches the orchestrator's MCP proxy over the broker.
144#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
145pub struct McpProxyConfig {
146 /// The orchestrator's broker mailbox id (proxy requests go here).
147 pub orchestrator: String,
148 /// Broker WebSocket endpoint.
149 pub endpoint: String,
150 /// Bearer token for the broker.
151 pub token: String,
152}
153
154/// Where an actor physically runs — a configurable "temperature", not a baked-in
155/// property (see `docs/remote-actor-plan.md` §3.4). Default `Local` keeps today's
156/// behavior; the launcher picks the matching `WorkerLauncher` per variant.
157#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
158#[serde(tag = "kind", rename_all = "snake_case")]
159pub enum Placement {
160 /// Parent spawns a local subprocess (current behavior).
161 #[default]
162 Local,
163 /// Connect to a resident worker already serving at `endpoint` (e.g.
164 /// `wss://gpu-host:8443`).
165 Remote { endpoint: String },
166 /// Ask a control plane to assign an endpoint from a named pool.
167 Schedulable { pool: String },
168}
169
170#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
171pub struct ChildIdentity {
172 pub child_id: String,
173 #[serde(default, skip_serializing_if = "Option::is_none")]
174 pub parent_id: Option<String>,
175 #[serde(default, skip_serializing_if = "Option::is_none")]
176 pub project_key: Option<String>,
177 /// Role/profile id, e.g. "researcher". Also published in the discovery record.
178 #[serde(default)]
179 pub role: String,
180 /// Nesting depth of THIS actor in the spawn tree (root orchestrator = 0, its
181 /// direct worker = 1, …). The worker stamps this onto its run session's
182 /// `spawn_depth` so in-process children accumulate depth correctly ACROSS the
183 /// actor process boundary (each worker otherwise starts at a fresh root).
184 /// Used to enforce the max-depth cap (Phase 6: direct nested execution).
185 #[serde(default)]
186 pub depth: u32,
187}
188
189/// Which engine runs the task. The worker's factory maps each variant to a `ChildExecutor`;
190/// adding an engine = one new variant + one factory arm, nothing else changes.
191#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
192#[serde(tag = "kind", rename_all = "snake_case")]
193pub enum ExecutorSpec {
194 /// Dependency-free echo stand-in (testing / smoke runs through the full chain).
195 Echo,
196 /// The real bamboo agent loop.
197 BambooRuntime,
198 /// Wrap an external CLI agent as the engine.
199 CliAdapter { command: String, args: Vec<String> },
200}
201
202/// Provider+model pair, parent-resolved. (Local mirror of `ProviderModelRef`;
203/// this crate stays a leaf and does not depend on `bamboo-domain`.)
204#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
205pub struct ModelRefSpec {
206 pub provider: String,
207 pub model: String,
208}
209
210#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
211pub struct Limits {
212 #[serde(default, skip_serializing_if = "Option::is_none")]
213 pub run_timeout_secs: Option<u64>,
214 #[serde(default, skip_serializing_if = "Option::is_none")]
215 pub idle_timeout_secs: Option<u64>,
216 #[serde(default, skip_serializing_if = "Option::is_none")]
217 pub max_rounds: Option<u32>,
218}
219
220/// Credentials scoped to exactly what this child needs — never the whole config.
221/// Held in memory only; the worker must not persist it.
222#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
223pub struct SecretsEnvelope {
224 #[serde(default)]
225 pub provider_credentials: Vec<ScopedCredential>,
226}
227
228#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
229pub struct ScopedCredential {
230 /// Routing key as the parent knows it: a legacy provider name
231 /// ("anthropic") or a provider-instance id (uuid).
232 pub provider: String,
233 pub api_key: String,
234 #[serde(default, skip_serializing_if = "Option::is_none")]
235 pub base_url: Option<String>,
236 /// Concrete provider protocol to construct ("anthropic", "openai", …).
237 /// Needed when `provider` is an instance id; defaults to `provider`.
238 #[serde(default, skip_serializing_if = "Option::is_none")]
239 pub provider_type: Option<String>,
240}
241
242impl ProvisionSpec {
243 pub fn new(identity: ChildIdentity, executor: ExecutorSpec, fabric_dir: String) -> Self {
244 Self {
245 version: PROVISION_VERSION,
246 identity,
247 executor,
248 fabric_dir,
249 storage_dir: None,
250 workspace: None,
251 model: None,
252 disabled_tools: None,
253 limits: Limits::default(),
254 secrets: SecretsEnvelope::default(),
255 reusable: false,
256 placement: Placement::default(),
257 capabilities: Capabilities::default(),
258 }
259 }
260
261 pub fn to_json(&self) -> Result<String> {
262 serde_json::to_string(self)
263 .map_err(|e| StoreError::decode(std::path::Path::new("<provision>"), e))
264 }
265
266 pub fn from_json(s: &str) -> Result<Self> {
267 serde_json::from_str(s)
268 .map_err(|e| StoreError::decode(std::path::Path::new("<provision>"), e))
269 }
270
271 /// Read a spec from the process's stdin (the parent writes one JSON document and
272 /// closes the pipe). Used by worker `main`.
273 ///
274 /// Defense in depth: the read is capped at [`MAX_SPEC_BYTES`] — the pipe is
275 /// trusted (our own parent), but a runaway writer must not OOM the worker.
276 pub async fn read_from_stdin() -> Result<Self> {
277 use tokio::io::AsyncReadExt;
278 let mut buf = Vec::new();
279 tokio::io::stdin()
280 .take(MAX_SPEC_BYTES)
281 .read_to_end(&mut buf)
282 .await
283 .map_err(|e| StoreError::io("<stdin>", e))?;
284 let text = String::from_utf8_lossy(&buf);
285 Self::from_json(text.trim())
286 }
287}
288
289#[cfg(test)]
290mod tests {
291 use super::*;
292
293 fn spec() -> ProvisionSpec {
294 let mut s = ProvisionSpec::new(
295 ChildIdentity {
296 child_id: "c1".into(),
297 parent_id: Some("p1".into()),
298 project_key: Some("proj".into()),
299 role: "researcher".into(),
300 depth: 0,
301 },
302 ExecutorSpec::Echo,
303 "/tmp/fabric".into(),
304 );
305 s.model = Some(ModelRefSpec {
306 provider: "anthropic".into(),
307 model: "claude-sonnet-4-6".into(),
308 });
309 s.secrets.provider_credentials.push(ScopedCredential {
310 provider: "anthropic".into(),
311 api_key: "sk-test".into(),
312 base_url: None,
313 provider_type: None,
314 });
315 s
316 }
317
318 #[test]
319 fn round_trips() {
320 let s = spec();
321 let parsed = ProvisionSpec::from_json(&s.to_json().unwrap()).unwrap();
322 assert_eq!(parsed, s);
323 }
324
325 #[test]
326 fn unknown_fields_are_ignored_forward_compat() {
327 // A "newer" spec with fields this version doesn't know about.
328 let mut v: serde_json::Value = serde_json::from_str(&spec().to_json().unwrap()).unwrap();
329 v["future_field"] = serde_json::json!({"x": 1});
330 v["identity"]["future_sub"] = serde_json::json!(true);
331 let parsed = ProvisionSpec::from_json(&v.to_string()).unwrap();
332 assert_eq!(parsed.identity.child_id, "c1");
333 }
334
335 #[test]
336 fn missing_optional_fields_default_backward_compat() {
337 // A minimal "older" spec: only required fields.
338 let minimal = serde_json::json!({
339 "version": 1,
340 "identity": { "child_id": "c9" },
341 "executor": { "kind": "echo" },
342 "fabric_dir": "/tmp/f",
343 });
344 let parsed = ProvisionSpec::from_json(&minimal.to_string()).unwrap();
345 assert_eq!(parsed.identity.child_id, "c9");
346 assert_eq!(parsed.executor, ExecutorSpec::Echo);
347 assert!(parsed.model.is_none());
348 assert!(parsed.secrets.provider_credentials.is_empty());
349 assert_eq!(parsed.limits, Limits::default());
350 // Placement defaults to Local for a spec that predates the field.
351 assert_eq!(parsed.placement, Placement::Local);
352 }
353
354 #[test]
355 fn placement_defaults_local_and_remote_round_trips() {
356 // Default spec is Local, serialized with kind="local".
357 let v: serde_json::Value = serde_json::from_str(&spec().to_json().unwrap()).unwrap();
358 assert_eq!(v["placement"]["kind"], "local");
359
360 // Remote round-trips with its endpoint.
361 let mut s = spec();
362 s.placement = Placement::Remote {
363 endpoint: "wss://gpu-host:8443".into(),
364 };
365 let parsed = ProvisionSpec::from_json(&s.to_json().unwrap()).unwrap();
366 assert_eq!(
367 parsed.placement,
368 Placement::Remote {
369 endpoint: "wss://gpu-host:8443".into()
370 }
371 );
372 }
373
374 #[test]
375 fn capabilities_default_empty_and_round_trip() {
376 // Default spec carries no synced capabilities (actor children unaffected).
377 assert_eq!(spec().capabilities, Capabilities::default());
378
379 // Round-trips with content.
380 let mut s = spec();
381 s.capabilities = Capabilities {
382 mcp: Some(serde_json::json!({ "version": 1, "servers": [] })),
383 skills_dir: Some("/home/u/.bamboo/skills".into()),
384 mcp_proxy: None,
385 enforce_permissions: false,
386 nested_spawn: false,
387 max_spawn_depth: None,
388 bypass: false,
389 no_human_approver: false,
390 guardian_read_only: false,
391 };
392 let parsed = ProvisionSpec::from_json(&s.to_json().unwrap()).unwrap();
393 assert_eq!(
394 parsed.capabilities.skills_dir.as_deref(),
395 Some("/home/u/.bamboo/skills")
396 );
397 assert!(parsed.capabilities.mcp.is_some());
398
399 // Backward compat: a spec without `capabilities` defaults to empty.
400 let minimal = serde_json::json!({
401 "version": 1,
402 "identity": { "child_id": "c" },
403 "executor": { "kind": "echo" },
404 "fabric_dir": "/tmp/f",
405 });
406 let parsed = ProvisionSpec::from_json(&minimal.to_string()).unwrap();
407 assert_eq!(parsed.capabilities, Capabilities::default());
408 }
409
410 #[test]
411 fn enforce_permissions_defaults_false_and_round_trips() {
412 // Absent in JSON ⇒ false (backward compatible with older orchestrators).
413 assert!(!Capabilities::default().enforce_permissions);
414 // Round-trips when opted in.
415 let mut s = spec();
416 s.capabilities.enforce_permissions = true;
417 let parsed = ProvisionSpec::from_json(&s.to_json().unwrap()).unwrap();
418 assert!(parsed.capabilities.enforce_permissions);
419 }
420
421 #[test]
422 fn executor_tags_are_stable() {
423 let v: serde_json::Value = serde_json::from_str(&spec().to_json().unwrap()).unwrap();
424 assert_eq!(v["executor"]["kind"], "echo");
425 let cli = ExecutorSpec::CliAdapter {
426 command: "claude".into(),
427 args: vec!["-p".into()],
428 };
429 let vv = serde_json::to_value(&cli).unwrap();
430 assert_eq!(vv["kind"], "cli_adapter");
431 assert_eq!(
432 serde_json::to_value(ExecutorSpec::BambooRuntime).unwrap()["kind"],
433 "bamboo_runtime"
434 );
435 }
436}