1use std::path::{Path, PathBuf};
2use std::process;
3
4use anyhow::Context;
5
6use crate::config::Config;
7use crate::subprocess::Tool;
8
9pub struct WorkerLoop {
11 project_root: PathBuf,
12 agent: String,
13 project: String,
14 model_pool: Vec<String>,
15 timeout: u64,
16 review_enabled: bool,
17 critical_approvers: Vec<String>,
18 dispatched_bone: Option<String>,
19 dispatched_workspace: Option<String>,
20 dispatched_mission: Option<String>,
21 dispatched_siblings: Option<String>,
22 dispatched_mission_outcome: Option<String>,
23 dispatched_file_hints: Option<String>,
24}
25
26impl WorkerLoop {
27 pub fn new(
29 project_root: Option<PathBuf>,
30 agent: Option<String>,
31 model: Option<String>,
32 ) -> anyhow::Result<Self> {
33 let project_root = project_root
34 .or_else(|| std::env::current_dir().ok())
35 .context("determining project root")?;
36
37 let config = load_config(&project_root)?;
39
40 let agent = agent.unwrap_or_default();
42
43 if !agent.is_empty() {
46 unsafe {
47 std::env::set_var("AGENT", &agent);
48 std::env::set_var("BOTBUS_AGENT", &agent);
49 }
50 }
51
52 let resolved_env = config.resolved_env();
54 for (k, v) in &resolved_env {
55 unsafe {
57 std::env::set_var(k, v);
58 }
59 }
60
61 emit_build_env_diagnostic(&resolved_env);
66
67 let project = config.channel();
69
70 let worker_config = config.agents.worker.as_ref();
72 let model_raw = model
73 .or_else(|| worker_config.map(|w| w.model.clone()))
74 .unwrap_or_default();
75 let model_pool = config.resolve_model_pool(&model_raw);
76
77 let timeout = worker_config.map(|w| w.timeout).unwrap_or(900);
78 let review_enabled = config.review.enabled;
79 let critical_approvers = config
80 .project
81 .critical_approvers
82 .clone()
83 .unwrap_or_default();
84
85 let dispatched_bone = std::env::var("EDICT_BONE")
88 .or_else(|_| std::env::var("EDICT_BEAD"))
89 .ok()
90 .filter(|v| {
91 !v.is_empty()
92 && v.len() <= 20
93 && v.bytes().all(|b| b.is_ascii_alphanumeric() || b == b'-')
94 });
95 let dispatched_workspace = std::env::var("EDICT_WORKSPACE").ok().filter(|v| {
97 !v.is_empty()
98 && v.len() <= 64
99 && v.bytes()
100 .all(|b| b.is_ascii_lowercase() || b.is_ascii_digit() || b == b'-')
101 && !v.starts_with('-')
102 && !v.contains("..")
103 });
104 let dispatched_mission = std::env::var("EDICT_MISSION").ok().filter(|v| {
106 !v.is_empty()
107 && v.len() <= 20
108 && v.bytes().all(|b| b.is_ascii_alphanumeric() || b == b'-')
109 });
110 let dispatched_siblings = std::env::var("EDICT_SIBLINGS").ok().map(|v| {
112 if v.len() > 4096 {
113 v[..v.floor_char_boundary(4096)].to_string()
114 } else {
115 v
116 }
117 });
118 let dispatched_mission_outcome = std::env::var("EDICT_MISSION_OUTCOME").ok().map(|v| {
119 if v.len() > 2048 {
120 v[..v.floor_char_boundary(2048)].to_string()
121 } else {
122 v
123 }
124 });
125 let dispatched_file_hints = std::env::var("EDICT_FILE_HINTS").ok().map(|v| {
126 if v.len() > 4096 {
127 v[..v.floor_char_boundary(4096)].to_string()
128 } else {
129 v
130 }
131 });
132
133 Ok(Self {
134 project_root,
135 agent,
136 project,
137 model_pool,
138 timeout,
139 review_enabled,
140 critical_approvers,
141 dispatched_bone,
142 dispatched_workspace,
143 dispatched_mission,
144 dispatched_siblings,
145 dispatched_mission_outcome,
146 dispatched_file_hints,
147 })
148 }
149
150 pub fn run_once(&self) -> anyhow::Result<LoopStatus> {
152 register_cleanup_handlers(&self.agent, &self.project);
154
155 let prompt = self.build_prompt();
157
158 let start = crate::telemetry::metrics::time_start();
160 let output = run_agent_with_fallback(&prompt, &self.model_pool, self.timeout)?;
161 crate::telemetry::metrics::time_record(
162 "edict.worker.agent_run_duration_seconds",
163 start,
164 &[("agent", &self.agent), ("project", &self.project)],
165 );
166
167 let status = parse_completion_signal(&output);
169 crate::telemetry::metrics::counter(
170 "edict.worker.runs_total",
171 1,
172 &[("agent", &self.agent), ("project", &self.project)],
173 );
174
175 Ok(status)
176 }
177
178 fn build_prompt(&self) -> String {
180 let dispatched_section = if let (Some(bone), Some(ws)) =
181 (&self.dispatched_bone, &self.dispatched_workspace)
182 {
183 let mission_section = if let Some(ref mission) = self.dispatched_mission {
184 let outcome = if let Some(ref outcome) = self.dispatched_mission_outcome {
185 format!("Mission outcome: {outcome}")
186 } else {
187 format!("Read mission context: maw exec default -- bn show {mission}")
188 };
189
190 let siblings = if let Some(ref sibs) = self.dispatched_siblings {
191 format!("\nSibling bones (other workers in this mission):\n{sibs}")
192 } else {
193 String::new()
194 };
195
196 let file_hints = if let Some(ref hints) = self.dispatched_file_hints {
197 format!(
198 "\nAdvisory file ownership (avoid editing files owned by siblings):\n{hints}"
199 )
200 } else {
201 String::new()
202 };
203
204 format!("Mission: {mission}\n{outcome}{siblings}{file_hints}")
205 } else {
206 String::new()
207 };
208
209 let ws_path = self.project_root.join("ws").join(ws);
210
211 format!(
212 r#"## DISPATCHED WORKER — FAST PATH
213
214You were dispatched by a lead dev agent with a pre-assigned bone and workspace.
215Skip steps 0 (RESUME CHECK), 1 (INBOX), and 2 (TRIAGE) entirely.
216
217Pre-assigned bone: {bone}
218Pre-assigned workspace: {ws}
219Workspace path: {ws_path}
220{mission_section}
221
222Go directly to:
2231. Verify your bone: maw exec default -- bn show {bone}
2242. Verify your workspace: maw ws list (confirm {ws} exists)
2253. Your bone is already doing and claimed. Proceed to step 4 (WORK).
226 Use absolute workspace path: {ws_path}
227 For commands in workspace: maw exec {ws} -- <command>
228
229"#,
230 bone = bone,
231 ws = ws,
232 ws_path = ws_path.display(),
233 mission_section = mission_section,
234 )
235 } else {
236 String::new()
237 };
238
239 let dispatched_intro = if self.dispatched_bone.is_some() {
240 "You are a dispatched worker — follow the FAST PATH section below."
241 } else {
242 r#"Execute exactly ONE cycle of the worker loop. Complete one task (or determine there is no work),
243then STOP. Do not start a second task — the outer loop handles iteration."#
244 };
245
246 let review_step_6 = if self.review_enabled {
247 format!(
248 r#"6. REVIEW REQUEST (risk-aware):
249 First, check the bone's risk label: maw exec default -- bn show <id> — look for risk:low, risk:high, or risk:critical labels.
250 No risk label = risk:medium (standard review, current default).
251
252 RISK:LOW PATH (evals, docs, tests, config) — Self-review and merge directly:
253 No security review needed regardless of REVIEW setting.
254 Add self-review comment: maw exec default -- bn bone comment add <id> "Self-review (risk:low): <what you verified>"
255 Proceed directly to step 7 (FINISH).
256
257 RISK:MEDIUM PATH — Standard review (current default):
258 Try protocol command: edict protocol review <bone-id> --agent {agent}
259 Read the output carefully. If status is Ready, run the suggested commands.
260 If it fails (exit 1 = command unavailable), fall back to manual review request:
261 CHECK for existing review first:
262 - Run: maw exec default -- bn comments <id> | grep "Review created:"
263 - If found, extract <review-id> and skip to requesting review (don't create duplicate)
264 Create review with reviewer assignment (only if none exists):
265 - maw exec $WS -- crit reviews create --agent {agent} --title "<id>: <title>" --description "<summary>" --reviewers {project}-security
266 - IMMEDIATELY record: maw exec default -- bn bone comment add <id> "Review created: <review-id> in workspace $WS"
267 bus statuses set --agent {agent} "Review: <review-id>".
268 Spawn reviewer via @mention: bus send --agent {agent} {project} "Review requested: <review-id> for <id> @{project}-security" -L review-request
269 Do NOT close the bone. Do NOT merge. Do NOT release claims.
270 Output: <promise>COMPLETE</promise>
271 STOP this iteration.
272
273 RISK:HIGH PATH — Security review + failure-mode checklist:
274 Same as risk:medium, but when creating the review, add to description: "risk:high — failure-mode checklist required."
275 The security reviewer will include the 5 failure-mode questions in their review:
276 1. What could fail in production? 2. How would we detect it quickly?
277 3. What is the fastest safe rollback? 4. What dependency could invalidate this plan?
278 5. What assumption is least certain?
279 MUST request security reviewer. Do not skip.
280 STOP this iteration.
281
282 RISK:CRITICAL PATH — Security review + human approval required:
283 Same as risk:high, but ALSO:
284 - Add to review description: "risk:critical — REQUIRES HUMAN APPROVAL before merge."
285 - Post to bus requesting human approval:
286 bus send --agent {agent} {project} "risk:critical review for <id>: requires human approval before merge. {critical_approvers}" -L review-request
287 STOP this iteration."#,
288 agent = self.agent,
289 project = self.project,
290 critical_approvers = if self.critical_approvers.is_empty() {
291 "Check project.critical_approvers in .edict.toml".to_string()
292 } else {
293 format!("Approvers: {}", self.critical_approvers.join(", "))
294 }
295 )
296 } else {
297 r#" REVIEW is disabled. Skip code review.
298 Proceed directly to step 7 (FINISH)."#
299 .to_string()
300 };
301
302 let finish_step_7 = if self.dispatched_bone.is_some() {
303 format!(
304 r#"7. FINISH (dispatched worker — lead handles merge):
305 Try protocol command: edict protocol finish <bone-id> --agent {agent} --no-merge
306 Read the output carefully. If status is Ready, run the suggested commands.
307 If it fails (exit 1 = command unavailable), fall back to manual finish:
308 Close bone: maw exec default -- bn done <id> --reason "Completed"
309 Announce: bus send --agent {agent} {project} "Completed <id>: <title>" -L task-done
310 Release bone claim: bus claims release --agent {agent} "bone://{project}/<id>"
311 Do NOT merge the workspace — the lead dev will handle merging via the merge protocol.
312 Do NOT run the release check — the lead handles releases.
313 Output: <promise>COMPLETE</promise>"#,
314 agent = self.agent,
315 project = self.project,
316 )
317 } else {
318 format!(
319 r#"7. FINISH (only reached after LGTM from step 0, or after step 6 when REVIEW is false):
320 Try protocol command: edict protocol finish <bone-id> --agent {agent}
321 Read the output carefully. If status is Ready, run the suggested commands.
322 If it fails (exit 1 = command unavailable), fall back to manual finish:
323 If a review was conducted:
324 maw exec default -- crit reviews mark-merged <review-id> --agent {agent}.
325 RISK:CRITICAL CHECK — Before merging a risk:critical bone:
326 Verify human approval exists: bus history {project} -n 50 -L review-request | look for approval message referencing this bone/review from an authorized approver.
327 If no approval found, do NOT merge. Post: bus send --agent {agent} {project} "Waiting for human approval on risk:critical <id>" -L review-request. STOP.
328 If approval found, record it: maw exec default -- bn bone comment add <id> "Human approval: <approver> via bus message <msg-id>"
329 maw exec default -- bn bone comment add <id> "Completed by {agent}".
330 maw exec default -- bn done <id> --reason "Completed" --suggest-next.
331 bus send --agent {agent} {project} "Completed <id>: <title>" -L task-done.
332 bus claims release --agent {agent} "bone://{project}/<id>".
333 Keep workspace claim — the lead will merge it.
334 STOP — do not proceed to RELEASE CHECK (only leads check for releases after merging)."#,
335 agent = self.agent,
336 project = self.project,
337 )
338 };
339
340 let review_status_str = if self.review_enabled { "true" } else { "false" };
341 let review_note = if self.review_enabled {
342 "risk:low (evals, docs, tests, config) skips security review — self-review and merge directly. risk:medium gets standard review. risk:high requires failure-mode checklist. risk:critical requires human approval."
343 } else {
344 "Review is disabled. Skip review and proceed to FINISH after describing commit."
345 };
346
347 format!(
348 r#"You are worker agent "{agent}" for project "{project}".
349
350IMPORTANT: Use --agent {agent} on ALL bus and crit commands. bn resolves agent identity from $AGENT/$BOTBUS_AGENT env automatically. Set EDICT_PROJECT={project}.
351
352CRITICAL - HUMAN MESSAGE PRIORITY: If you see a system reminder with "STOP:" showing unread bus messages, these are from humans or other agents trying to reach you. IMMEDIATELY check inbox and respond before continuing your current task. Human questions, clarifications, and redirects take priority over heads-down work.
353
354COMMAND PATTERN — maw exec: All bn commands run in the default workspace. All crit commands run in their workspace.
355 bn: maw exec default -- bn <args>
356 crit: maw exec $WS -- crit <args>
357 other: maw exec $WS -- <command> (cargo test, etc.)
358
359VERSION CONTROL: This project uses Git + maw. Do NOT run jj commands.
360 Workers commit with: maw exec $WS -- git add -A && maw exec $WS -- git commit -m "<message>"
361 The lead handles merging workspaces into main.
362
363{dispatched}{dispatched_intro}
364
365At the end of your work, output exactly one of these completion signals:
366- <promise>COMPLETE</promise> if you completed a task or determined there is no work
367- <promise>BLOCKED</promise> if you are stuck and cannot proceed
368
3690. RESUME CHECK (do this FIRST):
370 Try protocol command: edict protocol resume --agent {agent}
371 If it fails (exit 1 = command unavailable), fall back to manual resume check:
372 Run: bus claims list --agent {agent} --mine
373 If you hold a bone:// claim, you have an in-progress bone from a previous iteration.
374 - Run: maw exec default -- bn comments <bone-id> to understand what was done before and what remains.
375 - Look for workspace info in comments (workspace name and path).
376 - If a "Review created: <review-id>" comment exists:
377 * Find the review: maw exec $WS -- crit review <review-id>
378 * Check review status: maw exec $WS -- crit review <review-id>
379 * If LGTM (approved): proceed to FINISH (step 7) — merge the review and close the bone.
380 * If BLOCKED (changes requested): fix the issues, then re-request review:
381 1. Read threads: maw exec $WS -- crit review <review-id> (threads show inline with comments)
382 2. For each unresolved thread with reviewer feedback:
383 - Fix the code in the workspace (use absolute WS_PATH for file edits)
384 - Reply: maw exec $WS -- crit reply <thread-id> --agent {agent} "Fixed: <what you did>"
385 - Resolve: maw exec $WS -- crit threads resolve <thread-id> --agent {agent}
386 3. Re-request: maw exec $WS -- crit reviews request <review-id> --reviewers {project}-security --agent {agent}
387 5. Announce: bus send --agent {agent} {project} "Review updated: <review-id> — addressed feedback @{project}-security" -L review-response
388 STOP this iteration — wait for re-review.
389 * If PENDING (no votes yet): STOP this iteration. Wait for the reviewer.
390 * If review not found: DO NOT merge or create a new review. The reviewer may still be starting up (hooks have latency). STOP this iteration and wait. Only create a new review if the workspace was destroyed AND 3+ iterations have passed since the review comment.
391 - If no review comment (work was in progress when session ended):
392 * Read the workspace code to see what's already done.
393 * Complete the remaining work in the EXISTING workspace — do NOT create a new one.
394 * After completing: maw exec default -- bn bone comment add <id> "Resumed and completed: <what you finished>".
395 * Then proceed to step 6 (REVIEW REQUEST) or step 7 (FINISH).
396 If no active claims: proceed to step 1 (INBOX).
397
3981. INBOX (do this before triaging):
399 Run: bus inbox --agent {agent} --channels {project} --mark-read
400 For each message:
401 - Task request (-L task-request or asks for work): create a bone with maw exec default -- bn create.
402 - Status check or question: reply on bus, do NOT create a bone.
403 - Feedback (-L feedback): if it contains a bug report, feature request, or actionable work — create a bone. Evaluate critically: is this a real issue? Is it well-scoped? Set priority accordingly. Then acknowledge on bus.
404 - Announcements from other agents ("Working on...", "Completed...", "online"): ignore, no action.
405 - Duplicate of existing bone: do NOT create another bone, note it covers the request.
406
4072. TRIAGE: Check maw exec default -- bn next. If no ready bones and inbox created none, say "NO_WORK_AVAILABLE" and stop.
408 GROOM each ready bone (maw exec default -- bn show <id>): ensure clear title, description with acceptance criteria
409 and testing strategy, appropriate priority, and risk label. Fix anything missing, comment what you changed.
410 RISK LABELS: Assess each bone for risk using these dimensions: blast radius, data sensitivity, reversibility, dependency uncertainty.
411 - risk:low — typo fixes, doc updates, config tweaks (add label: bn bone tag <id> risk:low)
412 - risk:medium — standard features/bugs (default, no label needed)
413 - risk:high — security-sensitive, data integrity, user-visible behavior changes (add label)
414 - risk:critical — irreversible actions, migrations, regulated changes (add label)
415 Any agent can escalate risk upward. Downgrades require lead approval with justification comment.
416 Use maw exec default -- bn --robot-next to pick exactly one small task. If the task is large, break it down with
417 maw exec default -- bn create + bn triage dep add, then bn next again. If a bone is claimed
418 (bus claims check --agent {agent} "bone://{project}/<id>"), skip it.
419
420 MISSION CONTEXT: After picking a bone, check if it has a mission:bd-xxx label (visible in bn show output).
421 If it does, read the mission bone for shared context:
422 maw exec default -- bn show <mission-id>
423 Note the mission's Outcome, Constraints, and Stop criteria. Check siblings:
424 maw exec default -- bn list -l "mission:<mission-id>"
425 Use this context to understand how your work fits into the larger effort.
426
427 SIBLING COORDINATION (missions only):
428 When working on a mission bone, you share the codebase with sibling workers. Coordinate through bus:
429
430 READ siblings: Before editing a file listed in EDICT_FILE_HINTS as owned by a sibling, and periodically
431 during work (~every 5 minutes), check for sibling messages:
432 bus history {project} -n 10 -L "mission:<mission-id>" --since "5 minutes ago"
433 Look for coord:interface messages — these tell you about API/schema/config changes siblings made.
434 If a sibling changed something you depend on, adapt your implementation to match.
435
436 POST discoveries: When you change an API, schema, config format, shared type, or exported interface
437 that siblings might depend on, announce it immediately:
438 bus send --agent {agent} {project} "<file>: <what changed and why>" -L coord:interface -L "mission:<mission-id>"
439
440 COORDINATION LABELS on bus messages:
441 - coord:interface — API/schema/config changes that affect siblings
442 - coord:blocker — You need something from a sibling: bus send --agent {agent} {project} "Blocked by <sibling-bone>: <reason>" -L coord:blocker -L "mission:<mission-id>"
443 - task-done — Signal completion: bus send --agent {agent} {project} "Completed <id>" -L task-done -L "mission:<mission-id>"
444
4453. START: Try protocol command: edict protocol start <bone-id> --agent {agent}
446 Read the output carefully. If status is Ready, run the suggested commands.
447 If it fails (exit 1 = command unavailable), fall back to manual start:
448 maw exec default -- bn do <id>.
449 bus claims stake --agent {agent} "bone://{project}/<id>" -m "<id>".
450 Create workspace: run maw ws create --random. Note the workspace name AND absolute path
451 from the output (e.g., name "frost-castle", path "/abs/path/ws/frost-castle").
452 Store the name as WS and the absolute path as WS_PATH.
453 IMPORTANT: All file operations (Read, Write, Edit) must use the absolute WS_PATH.
454 For commands in the workspace: maw exec $WS -- <command>.
455 Do NOT cd into the workspace and stay there — the workspace is destroyed during finish.
456 bus claims stake --agent {agent} "workspace://{project}/$WS" -m "<id>".
457 maw exec default -- bn bone comment add <id> "Started in workspace $WS ($WS_PATH)".
458 bus statuses set --agent {agent} "Working: <id>" --ttl 30m.
459 Announce: bus send --agent {agent} {project} "Working on <id>: <title>" -L task-claim.
460
4614. WORK: maw exec default -- bn show <id>, then implement the task in the workspace.
462 If this bone is part of a mission, check bus for sibling updates BEFORE starting implementation:
463 bus history {project} -n 10 -L "mission:<mission-id>" -L coord:interface
464 Adapt your approach if siblings have already defined interfaces you need to consume or conform to.
465 Add at least one progress comment: maw exec default -- bn bone comment add <id> "Progress: ...".
466
4675. STUCK CHECK: If same approach tried twice, info missing, or tool fails repeatedly — you are
468 stuck. maw exec default -- bn bone comment add <id> "Blocked: <details>".
469 bus statuses set --agent {agent} "Blocked: <short reason>".
470 bus send --agent {agent} {project} "Stuck on <id>: <reason>" -L task-blocked.
471 maw exec default -- bn bone tag <id> blocked.
472 Release: bus claims release --agent {agent} "bone://{project}/<id>".
473 Output: <promise>BLOCKED</promise>
474 Stop this cycle.
475
476{review_step}
477
478{finish_step}
479
4808. CLEANUP (always run before stopping, even on error or BLOCKED):
481 Try protocol command: edict protocol cleanup --agent {agent}
482 If it fails (exit 1 = command unavailable), fall back to manual cleanup:
483 bus statuses clear --agent {agent}
484 (bn is event-sourced — no sync needed)
485
486Key rules:
487- Exactly one small task per cycle.
488- Always finish or release before stopping.
489- If claim denied, pick something else.
490- All bus and crit commands use --agent {agent}.
491- All file operations use the absolute workspace path from maw ws create output. Do NOT cd into the workspace and stay there.
492- All bn commands: maw exec default -- bn ...
493- All crit/git commands in a workspace: maw exec $WS -- crit/git ...
494- If a tool behaves unexpectedly, report it: bus send --agent {agent} {project} "Tool issue: <details>" -L tool-issue.
495- STOP after completing one task or determining no work. Do not loop.
496- Always output <promise>COMPLETE</promise> or <promise>BLOCKED</promise> at the end.
497- RISK LABELS: Check bone risk labels before review. REVIEW={review_status}. {review_note}"#,
498 agent = self.agent,
499 project = self.project,
500 dispatched = dispatched_section,
501 dispatched_intro = dispatched_intro,
502 review_step = review_step_6,
503 finish_step = finish_step_7,
504 review_status = review_status_str,
505 review_note = review_note,
506 )
507 }
508}
509
510#[derive(Debug, Clone, Copy, PartialEq, Eq)]
512pub enum LoopStatus {
513 Complete,
514 Blocked,
515 Unknown,
516}
517
518fn emit_build_env_diagnostic(config_env: &std::collections::HashMap<String, String>) {
527 use std::env;
528
529 const BUILD_VARS: &[(&str, &str)] = &[
530 ("CARGO_BUILD_JOBS", "limits parallel rustc processes"),
531 ("RUSTC_WRAPPER", "enables sccache for build caching"),
532 ("SCCACHE_DIR", "sccache cache directory"),
533 ];
534
535 eprintln!("--- build env diagnostic ---");
536
537 let mut any_missing = false;
538 for &(var, purpose) in BUILD_VARS {
539 let effective = env::var(var).ok().filter(|v| !v.is_empty());
540 let from_config = config_env.get(var);
541
542 match (&effective, from_config) {
543 (Some(val), Some(cfg_val)) if val == cfg_val => {
544 eprintln!(" {var}={val} (from config)");
545 }
546 (Some(val), Some(cfg_val)) => {
547 eprintln!(" {var}={val} (effective; config has {cfg_val})");
550 }
551 (Some(val), None) => {
552 eprintln!(" {var}={val} (from environment, not in config)");
554 }
555 (None, Some(cfg_val)) => {
556 eprintln!(" {var}=<unset> (config has {cfg_val}, failed to apply?)");
559 any_missing = true;
560 }
561 (None, None) => {
562 eprintln!(" {var}=<unset> — warning: {purpose}");
563 any_missing = true;
564 }
565 }
566 }
567
568 if any_missing {
569 eprintln!(
570 " ⚠ Some build env vars are unset. Consider adding them to .edict.toml [env]"
571 );
572 eprintln!(
573 " to prevent OOM from unthrottled parallel builds in multi-agent setups."
574 );
575 }
576
577 eprintln!("--- end build env diagnostic ---");
578}
579
580fn load_config(root: &Path) -> anyhow::Result<Config> {
583 let (config_path, _config_dir) = crate::config::find_config_in_project(root)?;
584 Config::load(&config_path)
585}
586
587fn run_agent_with_fallback(
593 prompt: &str,
594 model_pool: &[String],
595 timeout: u64,
596) -> anyhow::Result<String> {
597 for (i, model) in model_pool.iter().enumerate() {
598 if model_pool.len() > 1 {
599 eprintln!("Trying model {}/{}: {}", i + 1, model_pool.len(), model);
600 }
601 match try_run_agent(prompt, model, timeout) {
602 Ok(output) => {
603 if is_rate_limit_output(&output) {
604 eprintln!(
605 "Rate limited on {} (detected in output), trying next model...",
606 model
607 );
608 crate::telemetry::metrics::counter(
609 "edict.worker.rate_limit_retries_total",
610 1,
611 &[("model", model)],
612 );
613 continue;
614 }
615 if output.trim().is_empty() && i + 1 < model_pool.len() {
619 eprintln!(
620 "Empty output from {} (process likely hung), trying next model...",
621 model
622 );
623 continue;
624 }
625 return Ok(output);
626 }
627 Err(e) => {
628 let err_str = format!("{e:#}");
629 if (is_rate_limit_error(&err_str) || err_str.contains("exited with code"))
630 && i + 1 < model_pool.len()
631 {
632 eprintln!(
633 "Failed on {} ({}), trying next model...",
634 model,
635 err_str.lines().next().unwrap_or("error")
636 );
637 continue;
638 }
639 return Err(e);
640 }
641 }
642 }
643 anyhow::bail!(
644 "All {} models in pool exhausted (rate limited)",
645 model_pool.len()
646 )
647}
648
649fn is_rate_limit_output(output: &str) -> bool {
651 let lower = output.to_lowercase();
652 lower.contains("429")
653 && (lower.contains("rate limit")
654 || lower.contains("rate_limit")
655 || lower.contains("quota")
656 || lower.contains("exhausted your capacity")
657 || lower.contains("resource_exhausted"))
658}
659
660fn is_rate_limit_error(err: &str) -> bool {
662 let lower = err.to_lowercase();
663 lower.contains("429")
664 || lower.contains("rate limit")
665 || lower.contains("rate_limit")
666 || lower.contains("quota")
667 || lower.contains("resource_exhausted")
668}
669
670fn try_run_agent(prompt: &str, model: &str, timeout: u64) -> anyhow::Result<String> {
675 use std::io::{BufRead, BufReader};
676 use std::process::{Command, Stdio};
677
678 let timeout_string = timeout.to_string();
679 let mut args = vec!["run", "agent", prompt, "-t", &timeout_string];
680
681 if !model.is_empty() {
683 args.push("-m");
684 args.push(model);
685 }
686
687 let mut child = Command::new("edict")
688 .args(&args)
689 .stdin(Stdio::null())
690 .stdout(Stdio::piped())
691 .stderr(Stdio::inherit())
692 .spawn()
693 .context("spawning edict run agent")?;
694
695 let stdout = child.stdout.take().context("capturing stdout")?;
696 let reader = BufReader::new(stdout);
697
698 let mut output = String::new();
699 for line in reader.lines() {
700 let line = line.context("reading line from edict run agent")?;
701 eprintln!("{}", line);
703 output.push_str(&line);
704 output.push('\n');
705 }
706
707 let status = child.wait().context("waiting for edict run agent")?;
708 if status.success() {
709 Ok(output)
710 } else {
711 let code = status.code().unwrap_or(-1);
712 anyhow::bail!("edict run agent exited with code {code}")
713 }
714}
715
716fn parse_completion_signal(output: &str) -> LoopStatus {
718 if output.contains("<promise>COMPLETE</promise>") {
719 LoopStatus::Complete
720 } else if output.contains("<promise>BLOCKED</promise>") {
721 LoopStatus::Blocked
722 } else {
723 LoopStatus::Unknown
724 }
725}
726
727fn register_cleanup_handlers(agent: &str, project: &str) {
729 let agent = agent.to_string();
730 let project = project.to_string();
731
732 ctrlc::set_handler(move || {
733 eprintln!("Received interrupt signal, cleaning up...");
734 let _ = cleanup(&agent, &project);
735 process::exit(0);
736 })
737 .expect("Error setting Ctrl-C handler");
738}
739
740fn cleanup(agent: &str, project: &str) -> anyhow::Result<()> {
742 eprintln!("Cleaning up...");
743
744 let result = Tool::new("bn")
751 .args(&["list", "--state", "doing", "--assignee", agent, "--json"])
752 .in_workspace("default")?
753 .new_process_group()
754 .run();
755
756 if let Ok(output) = result
757 && let Ok(bones) = output.parse_json::<Vec<serde_json::Value>>()
758 {
759 for bone in bones {
760 if let Some(id) = bone.get("id").and_then(|v| v.as_str()) {
761 let _ = Tool::new("bn")
763 .args(&[
764 "bone",
765 "comment",
766 "add",
767 id,
768 &format!("Worker {agent} exited without completing. Needs reassignment."),
769 ])
770 .in_workspace("default")?
771 .new_process_group()
772 .run();
773 eprintln!("Noted orphaned bone {id}");
774 }
775 }
776 }
777
778 let _ = Tool::new("bus")
780 .args(&[
781 "send",
782 "--agent",
783 agent,
784 project,
785 &format!("Agent {agent} signing off."),
786 "-L",
787 "agent-idle",
788 ])
789 .new_process_group()
790 .run();
791
792 let _ = Tool::new("bus")
794 .args(&["statuses", "clear", "--agent", agent])
795 .new_process_group()
796 .run();
797
798 let _ = Tool::new("bus")
800 .args(&[
801 "claims",
802 "release",
803 "--agent",
804 agent,
805 &format!("agent://{agent}"),
806 ])
807 .new_process_group()
808 .run();
809
810 let _ = Tool::new("bus")
812 .args(&["claims", "release", "--agent", agent, "--all"])
813 .new_process_group()
814 .run();
815
816 eprintln!("Cleanup complete for {agent}.");
819 Ok(())
820}
821
822pub fn run_worker_loop(
824 project_root: Option<PathBuf>,
825 agent: Option<String>,
826 model: Option<String>,
827) -> anyhow::Result<()> {
828 let worker = WorkerLoop::new(project_root, agent, model)?;
829
830 let bone_info = worker
832 .dispatched_bone
833 .as_deref()
834 .unwrap_or("(triage)");
835 let ws_info = worker
836 .dispatched_workspace
837 .as_deref()
838 .unwrap_or("(none)");
839 let _ = Tool::new("bus")
840 .args(&[
841 "send",
842 "--agent",
843 &worker.agent,
844 &worker.project,
845 &format!("Worker started: {bone_info} in ws/{ws_info}"),
846 "-L",
847 "worker-lifecycle",
848 ])
849 .run();
850
851 let status = worker.run_once();
852
853 let exit_msg = match &status {
855 Ok(LoopStatus::Complete) => format!("Worker exited OK: {bone_info} COMPLETE"),
856 Ok(LoopStatus::Blocked) => format!("Worker exited OK: {bone_info} BLOCKED"),
857 Ok(LoopStatus::Unknown) => format!("Worker exited: {bone_info} (no completion signal)"),
858 Err(e) => format!("Worker exited ERROR: {bone_info} — {e}"),
859 };
860 let _ = Tool::new("bus")
861 .args(&[
862 "send",
863 "--agent",
864 &worker.agent,
865 &worker.project,
866 &exit_msg,
867 "-L",
868 "worker-lifecycle",
869 ])
870 .run();
871
872 match status? {
873 LoopStatus::Complete => {
874 eprintln!("Worker loop completed successfully");
875 Ok(())
876 }
877 LoopStatus::Blocked => {
878 eprintln!("Worker loop blocked");
879 Ok(())
880 }
881 LoopStatus::Unknown => {
882 eprintln!("Warning: completion signal not found in output");
883 Ok(())
884 }
885 }
886}
887
888#[cfg(test)]
889mod tests {
890 use super::*;
891
892 #[test]
893 fn parse_completion_signal_complete() {
894 let output = "some text\n<promise>COMPLETE</promise>\nmore text";
895 assert_eq!(parse_completion_signal(output), LoopStatus::Complete);
896 }
897
898 #[test]
899 fn parse_completion_signal_blocked() {
900 let output = "error occurred\n<promise>BLOCKED</promise>";
901 assert_eq!(parse_completion_signal(output), LoopStatus::Blocked);
902 }
903
904 #[test]
905 fn parse_completion_signal_missing() {
906 let output = "no signal here";
907 assert_eq!(parse_completion_signal(output), LoopStatus::Unknown);
908 }
909
910 #[test]
911 fn build_prompt_contains_agent_identity() {
912 unsafe {
913 std::env::set_var("EDICT_BONE", "");
914 std::env::set_var("EDICT_WORKSPACE", "");
915 }
916
917 let worker = WorkerLoop {
918 project_root: PathBuf::from("/test"),
919 agent: "test-worker".to_string(),
920 project: "testproject".to_string(),
921 model_pool: vec!["haiku".to_string()],
922 timeout: 900,
923 review_enabled: true,
924 critical_approvers: vec![],
925 dispatched_bone: None,
926 dispatched_workspace: None,
927 dispatched_mission: None,
928 dispatched_siblings: None,
929 dispatched_mission_outcome: None,
930 dispatched_file_hints: None,
931 };
932
933 let prompt = worker.build_prompt();
934 assert!(prompt.contains("test-worker"));
935 assert!(prompt.contains("testproject"));
936 assert!(prompt.contains("RESUME CHECK"));
937 assert!(prompt.contains("INBOX"));
938 assert!(prompt.contains("TRIAGE"));
939 }
940
941 #[test]
942 fn build_prompt_dispatched_fast_path() {
943 unsafe {
944 std::env::set_var("EDICT_BONE", "bd-test");
945 std::env::set_var("EDICT_WORKSPACE", "test-ws");
946 }
947
948 let worker = WorkerLoop {
949 project_root: PathBuf::from("/test"),
950 agent: "test-worker".to_string(),
951 project: "testproject".to_string(),
952 model_pool: vec!["haiku".to_string()],
953 timeout: 900,
954 review_enabled: true,
955 critical_approvers: vec![],
956 dispatched_bone: Some("bd-test".to_string()),
957 dispatched_workspace: Some("test-ws".to_string()),
958 dispatched_mission: None,
959 dispatched_siblings: None,
960 dispatched_mission_outcome: None,
961 dispatched_file_hints: None,
962 };
963
964 let prompt = worker.build_prompt();
965 assert!(prompt.contains("DISPATCHED WORKER — FAST PATH"));
966 assert!(prompt.contains("Pre-assigned bone: bd-test"));
967 assert!(prompt.contains("Pre-assigned workspace: test-ws"));
968 assert!(prompt.contains("Skip steps 0 (RESUME CHECK), 1 (INBOX), and 2 (TRIAGE)"));
969 }
970
971 #[test]
972 fn build_prompt_contains_all_protocol_commands() {
973 unsafe {
974 std::env::set_var("EDICT_BONE", "");
975 std::env::set_var("EDICT_WORKSPACE", "");
976 }
977
978 let worker = WorkerLoop {
979 project_root: PathBuf::from("/test"),
980 agent: "test-worker".to_string(),
981 project: "testproject".to_string(),
982 model_pool: vec!["haiku".to_string()],
983 timeout: 900,
984 review_enabled: true,
985 critical_approvers: vec![],
986 dispatched_bone: None,
987 dispatched_workspace: None,
988 dispatched_mission: None,
989 dispatched_siblings: None,
990 dispatched_mission_outcome: None,
991 dispatched_file_hints: None,
992 };
993
994 let prompt = worker.build_prompt();
995
996 assert!(
998 prompt.contains("edict protocol resume"),
999 "worker prompt must reference 'edict protocol resume'"
1000 );
1001 assert!(
1002 prompt.contains("edict protocol start"),
1003 "worker prompt must reference 'edict protocol start'"
1004 );
1005 assert!(
1006 prompt.contains("edict protocol review"),
1007 "worker prompt must reference 'edict protocol review'"
1008 );
1009 assert!(
1010 prompt.contains("edict protocol finish"),
1011 "worker prompt must reference 'edict protocol finish'"
1012 );
1013 assert!(
1014 prompt.contains("edict protocol cleanup"),
1015 "worker prompt must reference 'edict protocol cleanup'"
1016 );
1017 }
1018
1019 #[test]
1020 fn build_prompt_review_disabled() {
1021 unsafe {
1022 std::env::set_var("EDICT_BONE", "");
1023 std::env::set_var("EDICT_WORKSPACE", "");
1024 }
1025
1026 let worker = WorkerLoop {
1027 project_root: PathBuf::from("/test"),
1028 agent: "test-worker".to_string(),
1029 project: "testproject".to_string(),
1030 model_pool: vec!["haiku".to_string()],
1031 timeout: 900,
1032 review_enabled: false,
1033 critical_approvers: vec![],
1034 dispatched_bone: None,
1035 dispatched_workspace: None,
1036 dispatched_mission: None,
1037 dispatched_siblings: None,
1038 dispatched_mission_outcome: None,
1039 dispatched_file_hints: None,
1040 };
1041
1042 let prompt = worker.build_prompt();
1043 assert!(prompt.contains("REVIEW is disabled"));
1044 assert!(prompt.contains("Skip code review"));
1045 assert!(prompt.contains("REVIEW=false"));
1046 }
1047
1048 #[test]
1049 fn build_prompt_contains_protocol_fallback_wording() {
1050 unsafe {
1051 std::env::set_var("EDICT_BONE", "");
1052 std::env::set_var("EDICT_WORKSPACE", "");
1053 }
1054
1055 let worker = WorkerLoop {
1056 project_root: PathBuf::from("/test"),
1057 agent: "test-worker".to_string(),
1058 project: "testproject".to_string(),
1059 model_pool: vec!["haiku".to_string()],
1060 timeout: 900,
1061 review_enabled: true,
1062 critical_approvers: vec![],
1063 dispatched_bone: None,
1064 dispatched_workspace: None,
1065 dispatched_mission: None,
1066 dispatched_siblings: None,
1067 dispatched_mission_outcome: None,
1068 dispatched_file_hints: None,
1069 };
1070
1071 let prompt = worker.build_prompt();
1072
1073 assert!(
1076 prompt.contains("If it fails (exit 1 = command unavailable), fall back"),
1077 "worker prompt must contain protocol fallback wording for unavailable commands"
1078 );
1079
1080 assert!(
1082 prompt.contains("Try protocol command:"),
1083 "worker prompt must guide agents to try protocol commands first"
1084 );
1085
1086 let fallback_patterns = [
1089 ("protocol resume", "resume check"),
1090 ("protocol start", "start"),
1091 ("protocol review", "review request"),
1092 ("protocol finish", "finish"),
1093 ("protocol cleanup", "cleanup"),
1094 ];
1095
1096 for (protocol_cmd, step_name) in fallback_patterns.iter() {
1097 assert!(
1098 prompt.contains(protocol_cmd),
1099 "worker prompt must reference '{}' in {} step",
1100 protocol_cmd,
1101 step_name
1102 );
1103 }
1104 }
1105
1106 #[test]
1107 fn rate_limit_detection_output() {
1108 assert!(is_rate_limit_output("Error 429: rate limit exceeded"));
1109 assert!(is_rate_limit_output("HTTP 429 - quota exceeded"));
1110 assert!(is_rate_limit_output("429 resource_exhausted"));
1111 assert!(is_rate_limit_output(
1112 "Got 429: You have exhausted your capacity"
1113 ));
1114 assert!(!is_rate_limit_output("Everything is fine"));
1115 assert!(!is_rate_limit_output("Error 500: server error"));
1116 assert!(!is_rate_limit_output("429"));
1118 }
1119
1120 #[test]
1121 fn rate_limit_detection_error() {
1122 assert!(is_rate_limit_error("429 Too Many Requests"));
1123 assert!(is_rate_limit_error("rate limit exceeded"));
1124 assert!(is_rate_limit_error("quota exhausted"));
1125 assert!(is_rate_limit_error("resource_exhausted"));
1126 assert!(!is_rate_limit_error("normal error"));
1127 assert!(!is_rate_limit_error("exit code 1"));
1128 }
1129
1130 #[test]
1131 fn build_env_diagnostic_does_not_panic() {
1132 let mut config_env = std::collections::HashMap::new();
1135 config_env.insert("CARGO_BUILD_JOBS".to_string(), "2".to_string());
1136 emit_build_env_diagnostic(&config_env);
1140 }
1141
1142 #[test]
1143 fn build_env_diagnostic_with_empty_config() {
1144 let config_env = std::collections::HashMap::new();
1145 emit_build_env_diagnostic(&config_env);
1147 }
1148}