1use crate::budget::{BudgetLimit, BudgetState};
20use crate::level::{GateDecision, HumanGate, LoopLevel, ProposedAction};
21use crate::spec::LoopSpec;
22use async_trait::async_trait;
23use harness_core::{Memory, MemoryEntry, Model, SubagentStatus, Task, Tool, ToolRisk};
24use harness_loop::{Subagent, SubagentReport, SubagentSpec};
25use harness_sandbox::{NullSandbox, Sandbox};
26use std::path::PathBuf;
27use std::sync::Arc;
28
29#[derive(Debug, Clone, PartialEq, Eq)]
31pub struct ActionReceipt {
32 pub kind: String,
34 pub summary: String,
36}
37
38impl ActionReceipt {
39 pub fn new(kind: impl Into<String>, summary: impl Into<String>) -> Self {
40 Self {
41 kind: kind.into(),
42 summary: summary.into(),
43 }
44 }
45}
46
47#[derive(Debug, thiserror::Error)]
48#[non_exhaustive]
49pub enum ActionError {
50 #[error("action executor: {0}")]
51 Exec(String),
52}
53
54#[async_trait]
62pub trait ActionExecutor: Send + Sync {
63 async fn execute(
64 &self,
65 spec: &LoopSpec,
66 action: &ProposedAction,
67 world: &mut harness_core::World,
68 ) -> Result<ActionReceipt, ActionError>;
69}
70
71pub struct ApprovalOnlyExecutor;
75
76#[async_trait]
77impl ActionExecutor for ApprovalOnlyExecutor {
78 async fn execute(
79 &self,
80 spec: &LoopSpec,
81 action: &ProposedAction,
82 _world: &mut harness_core::World,
83 ) -> Result<ActionReceipt, ActionError> {
84 Ok(ActionReceipt::new(
85 action.kind.clone(),
86 format!(
87 "loop `{}` auto-approved `{}`; no external action executor configured",
88 spec.name, action.kind
89 ),
90 ))
91 }
92}
93
94pub struct CallbackActionExecutor<F>(pub F)
97where
98 F: Fn(
99 &LoopSpec,
100 &ProposedAction,
101 &mut harness_core::World,
102 ) -> Result<ActionReceipt, ActionError>
103 + Send
104 + Sync;
105
106#[async_trait]
107impl<F> ActionExecutor for CallbackActionExecutor<F>
108where
109 F: Fn(
110 &LoopSpec,
111 &ProposedAction,
112 &mut harness_core::World,
113 ) -> Result<ActionReceipt, ActionError>
114 + Send
115 + Sync,
116{
117 async fn execute(
118 &self,
119 spec: &LoopSpec,
120 action: &ProposedAction,
121 world: &mut harness_core::World,
122 ) -> Result<ActionReceipt, ActionError> {
123 (self.0)(spec, action, world)
124 }
125}
126
127#[derive(Debug, Clone, PartialEq, Eq)]
129pub enum RoundOutcome {
130 Reported,
133 Proceeded,
136 Escalated { reason: String },
138 BudgetExhausted { limit: BudgetLimit },
140 Failed { error: String },
143}
144
145#[derive(Debug, Clone)]
149pub struct RoundReport {
150 pub loop_name: String,
151 pub intent: String,
152 pub level: LoopLevel,
153 pub maker: Option<SubagentReport>,
154 pub checker: Option<SubagentReport>,
155 pub decision: Option<GateDecision>,
156 pub action: Option<ActionReceipt>,
157 pub input_tokens: u64,
158 pub output_tokens: u64,
159 pub outcome: RoundOutcome,
160}
161
162impl RoundReport {
163 pub fn total_tokens(&self) -> u64 {
164 self.input_tokens + self.output_tokens
165 }
166
167 pub fn should_deliver(&self) -> bool {
170 !matches!(self.outcome, RoundOutcome::Proceeded)
171 }
172
173 pub fn render(&self) -> String {
175 let mut s = format!(
176 "[{}] loop `{}` ({})\nintent: {}\n",
177 self.level.label(),
178 self.loop_name,
179 outcome_label(&self.outcome),
180 self.intent
181 );
182 if let Some(m) = &self.maker {
183 s.push_str(&format!("maker: {:?} in {} iters\n", m.status, m.iters));
184 if let Some(t) = &m.text {
185 s.push_str(&format!("{}\n", t.trim()));
186 }
187 }
188 if let Some(c) = &self.checker {
189 s.push_str(&format!("checker: {:?} in {} iters\n", c.status, c.iters));
190 }
191 if let RoundOutcome::Escalated { reason } = &self.outcome {
192 s.push_str(&format!("escalation: {reason}\n"));
193 }
194 if let Some(a) = &self.action {
195 s.push_str(&format!("action: {} — {}\n", a.kind, a.summary));
196 }
197 s.push_str(&format!(
198 "tokens: {} in / {} out\n",
199 self.input_tokens, self.output_tokens
200 ));
201 s
202 }
203}
204
205fn outcome_label(o: &RoundOutcome) -> &'static str {
206 match o {
207 RoundOutcome::Reported => "reported",
208 RoundOutcome::Proceeded => "proceeded",
209 RoundOutcome::Escalated { .. } => "escalated",
210 RoundOutcome::BudgetExhausted { .. } => "budget-exhausted",
211 RoundOutcome::Failed { .. } => "failed",
212 }
213}
214
215pub struct LoopEngine {
219 spec: LoopSpec,
220 model: Arc<dyn Model>,
221 maker_tools: Vec<Arc<dyn Tool>>,
222 checker_tools: Vec<Arc<dyn Tool>>,
223 sandbox: Arc<dyn Sandbox>,
224 gate: Arc<dyn HumanGate>,
225 action_executor: Arc<dyn ActionExecutor>,
226 memory: Option<Arc<dyn Memory>>,
227}
228
229impl LoopEngine {
230 pub fn new(spec: LoopSpec, model: Arc<dyn Model>) -> Self {
235 let gate = crate::level::default_gate_for(spec.level);
236 Self {
237 spec,
238 model,
239 maker_tools: Vec::new(),
240 checker_tools: Vec::new(),
241 sandbox: Arc::new(NullSandbox::new(PathBuf::from("."))),
242 gate,
243 action_executor: Arc::new(ApprovalOnlyExecutor),
244 memory: None,
245 }
246 }
247
248 pub fn with_maker_tool(mut self, t: Arc<dyn Tool>) -> Self {
249 self.maker_tools.push(t);
250 self
251 }
252 pub fn with_checker_tool(mut self, t: Arc<dyn Tool>) -> Self {
253 self.checker_tools.push(t);
254 self
255 }
256 pub fn with_sandbox(mut self, s: Arc<dyn Sandbox>) -> Self {
257 self.sandbox = s;
258 self
259 }
260 pub fn with_gate(mut self, g: Arc<dyn HumanGate>) -> Self {
261 self.gate = g;
262 self
263 }
264 pub fn with_action_executor(mut self, e: Arc<dyn ActionExecutor>) -> Self {
265 self.action_executor = e;
266 self
267 }
268 pub fn with_memory(mut self, m: Arc<dyn Memory>) -> Self {
269 self.memory = Some(m);
270 self
271 }
272
273 pub fn spec(&self) -> &LoopSpec {
274 &self.spec
275 }
276
277 pub async fn run_once(&self) -> RoundReport {
282 let report = self.run_round().await;
283 self.record(&report).await;
284 report
285 }
286
287 async fn run_round(&self) -> RoundReport {
288 let mut budget = BudgetState::new(self.spec.budget);
289 let level = self.spec.level;
290
291 let prior = self.recall_state().await;
293
294 let mut handle = match self.sandbox.spawn().await {
296 Ok(h) => h,
297 Err(e) => {
298 return self.failed(format!("sandbox spawn failed: {e}"), &budget, None, None);
299 }
300 };
301
302 let maker_desc = self.maker_task_description(&prior);
304 let maker = SubagentSpec::new(
305 format!("{}:maker", self.spec.name),
306 Task {
307 description: maker_desc,
308 source: None,
309 deadline: None,
310 },
311 )
312 .with_max_iters(budget.max_iters());
313 let maker = with_tools_for_level(maker, &self.maker_tools, level);
314 let maker_report = match Subagent::new(dyn_model(&self.model), maker)
315 .run(&mut handle.world)
316 .await
317 {
318 Ok(r) => r,
319 Err(e) => {
320 return self.failed(format!("maker failed: {e}"), &budget, None, None);
321 }
322 };
323 budget.add(&maker_report.usage);
324 if let Some(limit) = budget.exceeded() {
325 return self.budget_exhausted(limit, &budget, Some(maker_report), None);
326 }
327
328 let checker_desc = self.checker_task_description(&maker_report);
330 let checker = SubagentSpec::new(
331 format!("{}:checker", self.spec.name),
332 Task {
333 description: checker_desc,
334 source: None,
335 deadline: None,
336 },
337 )
338 .with_max_iters(budget.max_iters());
339 let checker = with_tools_for_level(checker, &self.checker_tools, level);
340 let checker_report = match Subagent::new(dyn_model(&self.model), checker)
341 .run(&mut handle.world)
342 .await
343 {
344 Ok(r) => r,
345 Err(e) => {
346 return self.failed(
347 format!("checker failed: {e}"),
348 &budget,
349 Some(maker_report),
350 None,
351 );
352 }
353 };
354 budget.add(&checker_report.usage);
355 if let Some(limit) = budget.exceeded() {
356 return self.budget_exhausted(limit, &budget, Some(maker_report), Some(checker_report));
357 }
358
359 let verified = checker_report.status == SubagentStatus::Done;
361 let summary = checker_report
362 .text
363 .clone()
364 .or_else(|| maker_report.text.clone())
365 .unwrap_or_else(|| self.spec.intent.clone());
366 let proposed = ProposedAction::new(self.spec.action_kind.clone(), summary, verified);
367 let decision = self.gate.decide(level, &proposed);
368
369 let (outcome, action_receipt) = match (&decision, level) {
370 (_, LoopLevel::L1Report) => (RoundOutcome::Reported, None),
372 (GateDecision::AutoProceed, _) => {
373 match self
374 .action_executor
375 .execute(&self.spec, &proposed, &mut handle.world)
376 .await
377 {
378 Ok(receipt) => (RoundOutcome::Proceeded, Some(receipt)),
379 Err(e) => {
380 return self.failed(
381 format!("action executor failed: {e}"),
382 &budget,
383 Some(maker_report),
384 Some(checker_report),
385 );
386 }
387 }
388 }
389 (GateDecision::Escalate { reason }, _) => (
390 RoundOutcome::Escalated {
391 reason: reason.clone(),
392 },
393 None,
394 ),
395 };
396
397 RoundReport {
398 loop_name: self.spec.name.clone(),
399 intent: self.spec.intent.clone(),
400 level,
401 maker: Some(maker_report),
402 checker: Some(checker_report),
403 decision: Some(decision),
404 action: action_receipt,
405 input_tokens: budget.input_tokens,
406 output_tokens: budget.output_tokens,
407 outcome,
408 }
409 }
410
411 fn maker_task_description(&self, prior: &Option<String>) -> String {
412 let write_note = if self.spec.level.maker_may_write() {
413 "You MAY modify files in this workspace to accomplish the task."
414 } else {
415 "READ-ONLY: do NOT modify any files. Investigate and report findings only."
416 };
417 let mut d = format!(
418 "Loop intent: {}\nMaturity level: {}\n{}\n\nTask:\n{}",
419 self.spec.intent,
420 self.spec.level.label(),
421 write_note,
422 self.spec.maker_prompt,
423 );
424 if let Some(p) = prior {
425 d.push_str(&format!("\n\nState from previous rounds:\n{p}"));
426 }
427 d
428 }
429
430 fn checker_task_description(&self, maker: &SubagentReport) -> String {
431 format!(
432 "You are the checker (verifier) for loop `{}`.\nLoop intent: {}\n\n\
433 Verify the work below. Run any available tests and gates, look for \
434 regressions, and decide whether it is safe. Report DoneWithConcerns \
435 if anything is questionable.\n\nMaker's report:\n{}\n\n\
436 Verification task:\n{}",
437 self.spec.name,
438 self.spec.intent,
439 maker.text.as_deref().unwrap_or("(maker produced no text)"),
440 self.spec.checker_prompt,
441 )
442 }
443
444 async fn recall_state(&self) -> Option<String> {
445 let mem = self.memory.as_ref()?;
446 match mem.recall(&self.spec.name, 5).await {
447 Ok(hits) if !hits.is_empty() => Some(
448 hits.iter()
449 .map(|e| format!("- {}", e.content))
450 .collect::<Vec<_>>()
451 .join("\n"),
452 ),
453 Ok(_) => None,
454 Err(e) => {
455 tracing::warn!(loop = %self.spec.name, error = %e, "loop-engine: recall failed");
456 None
457 }
458 }
459 }
460
461 async fn record(&self, report: &RoundReport) {
462 let Some(mem) = self.memory.as_ref() else {
463 return;
464 };
465 let entry = MemoryEntry::new(format!(
466 "{} — {}",
467 outcome_label(&report.outcome),
468 report
469 .checker
470 .as_ref()
471 .and_then(|c| c.text.clone())
472 .or_else(|| report.maker.as_ref().and_then(|m| m.text.clone()))
473 .unwrap_or_else(|| report.intent.clone())
474 ))
475 .with_tags([self.spec.name.clone(), "loop-state".into()])
476 .with_source(format!("loop:{}", self.spec.name));
477 if let Err(e) = mem.write(entry).await {
478 tracing::warn!(loop = %self.spec.name, error = %e, "loop-engine: state write failed");
479 }
480 }
481
482 fn failed(
483 &self,
484 error: String,
485 budget: &BudgetState,
486 maker: Option<SubagentReport>,
487 checker: Option<SubagentReport>,
488 ) -> RoundReport {
489 tracing::warn!(loop = %self.spec.name, %error, "loop-engine: round failed");
490 RoundReport {
491 loop_name: self.spec.name.clone(),
492 intent: self.spec.intent.clone(),
493 level: self.spec.level,
494 maker,
495 checker,
496 decision: None,
497 action: None,
498 input_tokens: budget.input_tokens,
499 output_tokens: budget.output_tokens,
500 outcome: RoundOutcome::Failed { error },
501 }
502 }
503
504 fn budget_exhausted(
505 &self,
506 limit: BudgetLimit,
507 budget: &BudgetState,
508 maker: Option<SubagentReport>,
509 checker: Option<SubagentReport>,
510 ) -> RoundReport {
511 tracing::info!(loop = %self.spec.name, limit = limit.label(), "loop-engine: budget exhausted");
512 RoundReport {
513 loop_name: self.spec.name.clone(),
514 intent: self.spec.intent.clone(),
515 level: self.spec.level,
516 maker,
517 checker,
518 decision: None,
519 action: None,
520 input_tokens: budget.input_tokens,
521 output_tokens: budget.output_tokens,
522 outcome: RoundOutcome::BudgetExhausted { limit },
523 }
524 }
525}
526
527fn dyn_model(m: &Arc<dyn Model>) -> harness_core::DynModel {
528 harness_core::DynModel(m.clone())
529}
530
531fn with_tools_for_level(
532 mut spec: SubagentSpec,
533 tools: &[Arc<dyn Tool>],
534 level: LoopLevel,
535) -> SubagentSpec {
536 for t in tools {
537 if level == LoopLevel::L1Report && !l1_tool_allowed(t.risk()) {
538 tracing::info!(
539 subagent = %spec.name,
540 tool = %t.name(),
541 risk = ?t.risk(),
542 "loop-engine: skipping mutating tool for L1 loop"
543 );
544 continue;
545 }
546 spec = spec.with_tool(t.clone());
547 }
548 spec
549}
550
551fn l1_tool_allowed(risk: ToolRisk) -> bool {
552 matches!(risk, ToolRisk::ReadOnly | ToolRisk::Network)
553}
554
555#[cfg(test)]
556mod tests {
557 use super::*;
558 use crate::{AllowlistGate, TokenBudget};
559 use async_trait::async_trait;
560 use harness_core::{MemoryError, Model, ToolError, ToolResult, ToolSchema, World};
561 use harness_models::{MockModel, MockResponse};
562 use serde_json::{Value, json};
563 use std::sync::{
564 Arc as StdArc, Mutex,
565 atomic::{AtomicUsize, Ordering},
566 };
567
568 fn spec(level: LoopLevel) -> LoopSpec {
569 LoopSpec::new("test-loop", "keep the test loop honest", level)
570 .with_maker_prompt("make a report")
571 .with_checker_prompt("verify the report")
572 .with_action_kind("commit")
573 }
574
575 fn model(resps: impl IntoIterator<Item = MockResponse>) -> Arc<MockModel> {
576 Arc::new(MockModel::new().script_many(resps))
577 }
578
579 #[derive(Clone)]
580 struct TestTool {
581 schema: ToolSchema,
582 risk: ToolRisk,
583 }
584
585 impl TestTool {
586 fn new(name: &str, risk: ToolRisk) -> Arc<Self> {
587 Arc::new(Self {
588 schema: ToolSchema {
589 name: name.into(),
590 description: "test tool".into(),
591 input: json!({"type": "object"}),
592 },
593 risk,
594 })
595 }
596 }
597
598 #[async_trait]
599 impl Tool for TestTool {
600 fn name(&self) -> &str {
601 &self.schema.name
602 }
603
604 fn schema(&self) -> &ToolSchema {
605 &self.schema
606 }
607
608 fn risk(&self) -> ToolRisk {
609 self.risk
610 }
611
612 async fn invoke(&self, _args: Value, _world: &mut World) -> Result<ToolResult, ToolError> {
613 Ok(ToolResult {
614 ok: true,
615 content: json!({"ok": true}),
616 trace: None,
617 })
618 }
619 }
620
621 #[derive(Default)]
622 struct TestMemory {
623 entries: Mutex<Vec<MemoryEntry>>,
624 }
625
626 impl TestMemory {
627 fn with_entry(entry: MemoryEntry) -> Self {
628 Self {
629 entries: Mutex::new(vec![entry]),
630 }
631 }
632
633 fn entries(&self) -> Vec<MemoryEntry> {
634 self.entries.lock().unwrap().clone()
635 }
636 }
637
638 #[async_trait]
639 impl Memory for TestMemory {
640 async fn recall(&self, _query: &str, k: usize) -> Result<Vec<MemoryEntry>, MemoryError> {
641 Ok(self
642 .entries
643 .lock()
644 .unwrap()
645 .iter()
646 .take(k)
647 .cloned()
648 .collect())
649 }
650
651 async fn write(&self, entry: MemoryEntry) -> Result<(), MemoryError> {
652 self.entries.lock().unwrap().push(entry);
653 Ok(())
654 }
655 }
656
657 #[tokio::test]
658 async fn l1_filters_mutating_tools_before_subagent_context() {
659 let model = model([
660 MockResponse::text("maker report"),
661 MockResponse::text("checker report"),
662 ]);
663 let engine = LoopEngine::new(spec(LoopLevel::L1Report), model.clone() as Arc<dyn Model>)
664 .with_maker_tool(TestTool::new("read", ToolRisk::ReadOnly))
665 .with_maker_tool(TestTool::new("write", ToolRisk::Destructive))
666 .with_checker_tool(TestTool::new("web", ToolRisk::Network))
667 .with_checker_tool(TestTool::new("format", ToolRisk::Idempotent));
668
669 let report = engine.run_once().await;
670
671 assert_eq!(report.outcome, RoundOutcome::Reported);
672 let calls = model.calls();
673 assert_eq!(calls.len(), 2);
674 assert_eq!(calls[0].tools_available, vec!["read"]);
675 assert_eq!(calls[1].tools_available, vec!["web"]);
676 assert!(calls[0].task_description.contains("READ-ONLY"));
677 }
678
679 #[tokio::test]
680 async fn l3_allowlisted_verified_round_proceeds_quietly() {
681 let model = model([
682 MockResponse::text("maker produced patch").with_usage(10, 5),
683 MockResponse::text("verified clean").with_usage(7, 3),
684 ]);
685 let engine = LoopEngine::new(spec(LoopLevel::L3Unattended), model as Arc<dyn Model>)
686 .with_gate(Arc::new(AllowlistGate::new(["commit"])));
687
688 let report = engine.run_once().await;
689
690 assert_eq!(report.outcome, RoundOutcome::Proceeded);
691 assert!(matches!(report.decision, Some(GateDecision::AutoProceed)));
692 let action = report.action.as_ref().expect("action receipt");
693 assert_eq!(action.kind, "commit");
694 assert!(
695 action
696 .summary
697 .contains("no external action executor configured")
698 );
699 assert!(!report.should_deliver());
700 assert_eq!(report.input_tokens, 17);
701 assert_eq!(report.output_tokens, 8);
702 }
703
704 #[tokio::test]
705 async fn auto_proceed_invokes_custom_action_executor() {
706 let model = model([
707 MockResponse::text("maker produced patch"),
708 MockResponse::text("verified clean"),
709 ]);
710 let calls = StdArc::new(AtomicUsize::new(0));
711 let seen = calls.clone();
712 let executor = CallbackActionExecutor(move |spec, action, world| {
713 seen.fetch_add(1, Ordering::SeqCst);
714 assert_eq!(spec.name, "test-loop");
715 assert_eq!(action.kind, "commit");
716 assert_eq!(world.repo.root, PathBuf::from("."));
717 Ok(ActionReceipt::new("commit", "created commit abc123"))
718 });
719 let engine = LoopEngine::new(spec(LoopLevel::L3Unattended), model as Arc<dyn Model>)
720 .with_gate(Arc::new(AllowlistGate::new(["commit"])))
721 .with_action_executor(Arc::new(executor));
722
723 let report = engine.run_once().await;
724
725 assert_eq!(calls.load(Ordering::SeqCst), 1);
726 assert_eq!(report.outcome, RoundOutcome::Proceeded);
727 assert_eq!(
728 report.action,
729 Some(ActionReceipt::new("commit", "created commit abc123"))
730 );
731 assert!(report.render().contains("action: commit"));
732 }
733
734 #[tokio::test]
735 async fn action_executor_failure_fails_round() {
736 let model = model([
737 MockResponse::text("maker produced patch"),
738 MockResponse::text("verified clean"),
739 ]);
740 let executor = CallbackActionExecutor(|_, _, _| Err(ActionError::Exec("boom".into())));
741 let engine = LoopEngine::new(spec(LoopLevel::L3Unattended), model as Arc<dyn Model>)
742 .with_gate(Arc::new(AllowlistGate::new(["commit"])))
743 .with_action_executor(Arc::new(executor));
744
745 let report = engine.run_once().await;
746
747 match &report.outcome {
748 RoundOutcome::Failed { error } => {
749 assert!(error.contains("action executor failed"));
750 assert!(error.contains("boom"));
751 }
752 other => panic!("expected action failure, got {other:?}"),
753 }
754 assert!(report.action.is_none());
755 assert!(report.should_deliver());
756 }
757
758 #[tokio::test]
759 async fn budget_exhaustion_after_maker_skips_checker() {
760 let model = model([
761 MockResponse::text("maker spent too much").with_usage(4, 3),
762 MockResponse::text("checker should not run"),
763 ]);
764 let low_budget = TokenBudget::iters(4).with_max_total_tokens(5);
765 let spec = spec(LoopLevel::L2Assisted).with_budget(low_budget);
766 let engine = LoopEngine::new(spec, model.clone() as Arc<dyn Model>);
767
768 let report = engine.run_once().await;
769
770 assert_eq!(
771 report.outcome,
772 RoundOutcome::BudgetExhausted {
773 limit: BudgetLimit::Total
774 }
775 );
776 assert!(report.maker.is_some());
777 assert!(report.checker.is_none());
778 assert_eq!(model.call_count(), 1);
779 }
780
781 #[tokio::test]
782 async fn memory_state_is_recalled_and_round_is_recorded() {
783 let model = model([
784 MockResponse::text("maker used prior state"),
785 MockResponse::text("checker verified"),
786 ]);
787 let memory = Arc::new(TestMemory::with_entry(
788 MemoryEntry::new("prior loop state").with_tags(["test-loop", "loop-state"]),
789 ));
790 let engine = LoopEngine::new(spec(LoopLevel::L1Report), model.clone() as Arc<dyn Model>)
791 .with_memory(memory.clone());
792
793 let report = engine.run_once().await;
794
795 assert_eq!(report.outcome, RoundOutcome::Reported);
796 assert!(
797 model.calls()[0]
798 .task_description
799 .contains("State from previous rounds:\n- prior loop state")
800 );
801 let entries = memory.entries();
802 assert_eq!(entries.len(), 2);
803 let recorded = entries.last().unwrap();
804 assert!(recorded.content.starts_with("reported"));
805 assert_eq!(recorded.source.as_deref(), Some("loop:test-loop"));
806 assert!(recorded.tags.iter().any(|t| t == "loop-state"));
807 }
808}