Skip to main content

harness_loop_engine/
engine.rs

1//! `LoopEngine` — the runner that turns a [`LoopSpec`] into recursive,
2//! verified, budgeted, gated agent work.
3//!
4//! One `run_once` is one trip around the anatomical loop:
5//!
6//! ```text
7//! recall state (memory)  ->  isolated sandbox  ->  maker sub-agent
8//!   ->  checker sub-agent (tests + gates)  ->  human gate  ->  record state
9//! ```
10//!
11//! The maker/checker split is the verification discipline made structural:
12//! one sub-agent proposes, a second, independent one tries to confirm the
13//! work is clean. The gate then decides — within the loop's maturity level —
14//! whether to proceed automatically or escalate to a human. Every round is
15//! bounded by a [`TokenBudget`]; every round's outcome is written back to
16//! memory as the durable spine that lets the next round pick up where this
17//! one left off.
18
19use crate::budget::{BudgetLimit, BudgetState};
20use crate::level::{GateDecision, HumanGate, LoopLevel, ProposedAction};
21use crate::spec::LoopSpec;
22use async_trait::async_trait;
23use harness_core::{Memory, MemoryEntry, Model, SubagentStatus, Task, Tool, ToolRisk};
24use harness_loop::{Subagent, SubagentReport, SubagentSpec};
25use harness_sandbox::{NullSandbox, Sandbox};
26use std::path::PathBuf;
27use std::sync::Arc;
28
29/// Evidence that an auto-approved action was handed off to its executor.
30#[derive(Debug, Clone, PartialEq, Eq)]
31pub struct ActionReceipt {
32    /// Action kind that was executed, e.g. `"commit"` or `"open-pr"`.
33    pub kind: String,
34    /// Human-readable execution summary.
35    pub summary: String,
36}
37
38impl ActionReceipt {
39    pub fn new(kind: impl Into<String>, summary: impl Into<String>) -> Self {
40        Self {
41            kind: kind.into(),
42            summary: summary.into(),
43        }
44    }
45}
46
47#[derive(Debug, thiserror::Error)]
48#[non_exhaustive]
49pub enum ActionError {
50    #[error("action executor: {0}")]
51    Exec(String),
52}
53
54/// Carries out a verified, auto-approved action.
55///
56/// Gates decide whether a proposal is allowed. Executors do the side effect:
57/// create a PR, commit a worktree, post a comment, apply a patch, or hand the
58/// proposal to any project-specific system. The engine ships conservative
59/// defaults; production loops should install an executor that matches their
60/// action kinds.
61#[async_trait]
62pub trait ActionExecutor: Send + Sync {
63    async fn execute(
64        &self,
65        spec: &LoopSpec,
66        action: &ProposedAction,
67        world: &mut harness_core::World,
68    ) -> Result<ActionReceipt, ActionError>;
69}
70
71/// Default executor: records that a gate approved the action but performs no
72/// external side effect. This keeps `LoopEngine::new` safe while making the
73/// missing production handoff visible in the round report.
74pub struct ApprovalOnlyExecutor;
75
76#[async_trait]
77impl ActionExecutor for ApprovalOnlyExecutor {
78    async fn execute(
79        &self,
80        spec: &LoopSpec,
81        action: &ProposedAction,
82        _world: &mut harness_core::World,
83    ) -> Result<ActionReceipt, ActionError> {
84        Ok(ActionReceipt::new(
85            action.kind.clone(),
86            format!(
87                "loop `{}` auto-approved `{}`; no external action executor configured",
88                spec.name, action.kind
89            ),
90        ))
91    }
92}
93
94/// Wrap a synchronous callback as an [`ActionExecutor`]. Use this for
95/// application-specific handoffs without defining a bespoke type.
96pub struct CallbackActionExecutor<F>(pub F)
97where
98    F: Fn(
99            &LoopSpec,
100            &ProposedAction,
101            &mut harness_core::World,
102        ) -> Result<ActionReceipt, ActionError>
103        + Send
104        + Sync;
105
106#[async_trait]
107impl<F> ActionExecutor for CallbackActionExecutor<F>
108where
109    F: Fn(
110            &LoopSpec,
111            &ProposedAction,
112            &mut harness_core::World,
113        ) -> Result<ActionReceipt, ActionError>
114        + Send
115        + Sync,
116{
117    async fn execute(
118        &self,
119        spec: &LoopSpec,
120        action: &ProposedAction,
121        world: &mut harness_core::World,
122    ) -> Result<ActionReceipt, ActionError> {
123        (self.0)(spec, action, world)
124    }
125}
126
127/// What one round of a loop did.
128#[derive(Debug, Clone, PartialEq, Eq)]
129pub enum RoundOutcome {
130    /// L1 (or any escalate-by-design round): the loop investigated and the
131    /// finding is delivered as a report. No change was applied.
132    Reported,
133    /// The gate auto-approved a verified proposal — the loop is cleared to
134    /// carry out its action (commit / PR / comment / …).
135    Proceeded,
136    /// Handed to a human with context. The loop will recurse next tick.
137    Escalated { reason: String },
138    /// A spend ceiling was crossed mid-round; the loop stopped early.
139    BudgetExhausted { limit: BudgetLimit },
140    /// The sandbox, maker, or checker errored. Best-effort: the scheduler
141    /// keeps ticking; this round simply produced nothing actionable.
142    Failed { error: String },
143}
144
145/// The full record of a round — the maker/checker reports, token spend, the
146/// gate decision, and the outcome. Suitable for delivery to a channel and
147/// for writing to memory.
148#[derive(Debug, Clone)]
149pub struct RoundReport {
150    pub loop_name: String,
151    pub intent: String,
152    pub level: LoopLevel,
153    pub maker: Option<SubagentReport>,
154    pub checker: Option<SubagentReport>,
155    pub decision: Option<GateDecision>,
156    pub action: Option<ActionReceipt>,
157    pub input_tokens: u64,
158    pub output_tokens: u64,
159    pub outcome: RoundOutcome,
160}
161
162impl RoundReport {
163    pub fn total_tokens(&self) -> u64 {
164        self.input_tokens + self.output_tokens
165    }
166
167    /// Whether this round produced something worth delivering to a human.
168    /// A clean auto-proceed at L3 is intentionally quiet.
169    pub fn should_deliver(&self) -> bool {
170        !matches!(self.outcome, RoundOutcome::Proceeded)
171    }
172
173    /// A compact, human-readable summary for channels and memory.
174    pub fn render(&self) -> String {
175        let mut s = format!(
176            "[{}] loop `{}` ({})\nintent: {}\n",
177            self.level.label(),
178            self.loop_name,
179            outcome_label(&self.outcome),
180            self.intent
181        );
182        if let Some(m) = &self.maker {
183            s.push_str(&format!("maker: {:?} in {} iters\n", m.status, m.iters));
184            if let Some(t) = &m.text {
185                s.push_str(&format!("{}\n", t.trim()));
186            }
187        }
188        if let Some(c) = &self.checker {
189            s.push_str(&format!("checker: {:?} in {} iters\n", c.status, c.iters));
190        }
191        if let RoundOutcome::Escalated { reason } = &self.outcome {
192            s.push_str(&format!("escalation: {reason}\n"));
193        }
194        if let Some(a) = &self.action {
195            s.push_str(&format!("action: {} — {}\n", a.kind, a.summary));
196        }
197        s.push_str(&format!(
198            "tokens: {} in / {} out\n",
199            self.input_tokens, self.output_tokens
200        ));
201        s
202    }
203}
204
205fn outcome_label(o: &RoundOutcome) -> &'static str {
206    match o {
207        RoundOutcome::Reported => "reported",
208        RoundOutcome::Proceeded => "proceeded",
209        RoundOutcome::Escalated { .. } => "escalated",
210        RoundOutcome::BudgetExhausted { .. } => "budget-exhausted",
211        RoundOutcome::Failed { .. } => "failed",
212    }
213}
214
215/// Binds a [`LoopSpec`] to the live pieces it needs to run: a model, the
216/// maker/checker tool sets, an isolation sandbox, a gate, and (optionally)
217/// memory for the state spine.
218pub struct LoopEngine {
219    spec: LoopSpec,
220    model: Arc<dyn Model>,
221    maker_tools: Vec<Arc<dyn Tool>>,
222    checker_tools: Vec<Arc<dyn Tool>>,
223    sandbox: Arc<dyn Sandbox>,
224    gate: Arc<dyn HumanGate>,
225    action_executor: Arc<dyn ActionExecutor>,
226    memory: Option<Arc<dyn Memory>>,
227}
228
229impl LoopEngine {
230    /// Construct an engine. By default the maker and checker run with no
231    /// tools, in a [`NullSandbox`] rooted at the current directory, with the
232    /// gate the spec's level implies (`AlwaysEscalate` for L1/L2). Override
233    /// any of these with the builder methods.
234    pub fn new(spec: LoopSpec, model: Arc<dyn Model>) -> Self {
235        let gate = crate::level::default_gate_for(spec.level);
236        Self {
237            spec,
238            model,
239            maker_tools: Vec::new(),
240            checker_tools: Vec::new(),
241            sandbox: Arc::new(NullSandbox::new(PathBuf::from("."))),
242            gate,
243            action_executor: Arc::new(ApprovalOnlyExecutor),
244            memory: None,
245        }
246    }
247
248    pub fn with_maker_tool(mut self, t: Arc<dyn Tool>) -> Self {
249        self.maker_tools.push(t);
250        self
251    }
252    pub fn with_checker_tool(mut self, t: Arc<dyn Tool>) -> Self {
253        self.checker_tools.push(t);
254        self
255    }
256    pub fn with_sandbox(mut self, s: Arc<dyn Sandbox>) -> Self {
257        self.sandbox = s;
258        self
259    }
260    pub fn with_gate(mut self, g: Arc<dyn HumanGate>) -> Self {
261        self.gate = g;
262        self
263    }
264    pub fn with_action_executor(mut self, e: Arc<dyn ActionExecutor>) -> Self {
265        self.action_executor = e;
266        self
267    }
268    pub fn with_memory(mut self, m: Arc<dyn Memory>) -> Self {
269        self.memory = Some(m);
270        self
271    }
272
273    pub fn spec(&self) -> &LoopSpec {
274        &self.spec
275    }
276
277    /// Run exactly one round of the loop. Never panics and never returns an
278    /// `Err`: sandbox/maker/checker failures are folded into
279    /// [`RoundOutcome::Failed`] so a scheduler can keep ticking. The result
280    /// is also recorded to memory when memory is configured.
281    pub async fn run_once(&self) -> RoundReport {
282        let report = self.run_round().await;
283        self.record(&report).await;
284        report
285    }
286
287    async fn run_round(&self) -> RoundReport {
288        let mut budget = BudgetState::new(self.spec.budget);
289        let level = self.spec.level;
290
291        // --- Triage: recall prior state from memory. ---
292        let prior = self.recall_state().await;
293
294        // --- Isolated sandbox for this round. ---
295        let mut handle = match self.sandbox.spawn().await {
296            Ok(h) => h,
297            Err(e) => {
298                return self.failed(format!("sandbox spawn failed: {e}"), &budget, None, None);
299            }
300        };
301
302        // --- Maker sub-agent. ---
303        let maker_desc = self.maker_task_description(&prior);
304        let maker = SubagentSpec::new(
305            format!("{}:maker", self.spec.name),
306            Task {
307                description: maker_desc,
308                source: None,
309                deadline: None,
310            },
311        )
312        .with_max_iters(budget.max_iters());
313        let maker = with_tools_for_level(maker, &self.maker_tools, level);
314        let maker_report = match Subagent::new(dyn_model(&self.model), maker)
315            .run(&mut handle.world)
316            .await
317        {
318            Ok(r) => r,
319            Err(e) => {
320                return self.failed(format!("maker failed: {e}"), &budget, None, None);
321            }
322        };
323        budget.add(&maker_report.usage);
324        if let Some(limit) = budget.exceeded() {
325            return self.budget_exhausted(limit, &budget, Some(maker_report), None);
326        }
327
328        // --- Checker sub-agent (verification). ---
329        let checker_desc = self.checker_task_description(&maker_report);
330        let checker = SubagentSpec::new(
331            format!("{}:checker", self.spec.name),
332            Task {
333                description: checker_desc,
334                source: None,
335                deadline: None,
336            },
337        )
338        .with_max_iters(budget.max_iters());
339        let checker = with_tools_for_level(checker, &self.checker_tools, level);
340        let checker_report = match Subagent::new(dyn_model(&self.model), checker)
341            .run(&mut handle.world)
342            .await
343        {
344            Ok(r) => r,
345            Err(e) => {
346                return self.failed(
347                    format!("checker failed: {e}"),
348                    &budget,
349                    Some(maker_report),
350                    None,
351                );
352            }
353        };
354        budget.add(&checker_report.usage);
355        if let Some(limit) = budget.exceeded() {
356            return self.budget_exhausted(limit, &budget, Some(maker_report), Some(checker_report));
357        }
358
359        // --- Gate: proceed or escalate. ---
360        let verified = checker_report.status == SubagentStatus::Done;
361        let summary = checker_report
362            .text
363            .clone()
364            .or_else(|| maker_report.text.clone())
365            .unwrap_or_else(|| self.spec.intent.clone());
366        let proposed = ProposedAction::new(self.spec.action_kind.clone(), summary, verified);
367        let decision = self.gate.decide(level, &proposed);
368
369        let (outcome, action_receipt) = match (&decision, level) {
370            // L1 never acts — it reports, regardless of the gate verdict.
371            (_, LoopLevel::L1Report) => (RoundOutcome::Reported, None),
372            (GateDecision::AutoProceed, _) => {
373                match self
374                    .action_executor
375                    .execute(&self.spec, &proposed, &mut handle.world)
376                    .await
377                {
378                    Ok(receipt) => (RoundOutcome::Proceeded, Some(receipt)),
379                    Err(e) => {
380                        return self.failed(
381                            format!("action executor failed: {e}"),
382                            &budget,
383                            Some(maker_report),
384                            Some(checker_report),
385                        );
386                    }
387                }
388            }
389            (GateDecision::Escalate { reason }, _) => (
390                RoundOutcome::Escalated {
391                    reason: reason.clone(),
392                },
393                None,
394            ),
395        };
396
397        RoundReport {
398            loop_name: self.spec.name.clone(),
399            intent: self.spec.intent.clone(),
400            level,
401            maker: Some(maker_report),
402            checker: Some(checker_report),
403            decision: Some(decision),
404            action: action_receipt,
405            input_tokens: budget.input_tokens,
406            output_tokens: budget.output_tokens,
407            outcome,
408        }
409    }
410
411    fn maker_task_description(&self, prior: &Option<String>) -> String {
412        let write_note = if self.spec.level.maker_may_write() {
413            "You MAY modify files in this workspace to accomplish the task."
414        } else {
415            "READ-ONLY: do NOT modify any files. Investigate and report findings only."
416        };
417        let mut d = format!(
418            "Loop intent: {}\nMaturity level: {}\n{}\n\nTask:\n{}",
419            self.spec.intent,
420            self.spec.level.label(),
421            write_note,
422            self.spec.maker_prompt,
423        );
424        if let Some(p) = prior {
425            d.push_str(&format!("\n\nState from previous rounds:\n{p}"));
426        }
427        d
428    }
429
430    fn checker_task_description(&self, maker: &SubagentReport) -> String {
431        format!(
432            "You are the checker (verifier) for loop `{}`.\nLoop intent: {}\n\n\
433             Verify the work below. Run any available tests and gates, look for \
434             regressions, and decide whether it is safe. Report DoneWithConcerns \
435             if anything is questionable.\n\nMaker's report:\n{}\n\n\
436             Verification task:\n{}",
437            self.spec.name,
438            self.spec.intent,
439            maker.text.as_deref().unwrap_or("(maker produced no text)"),
440            self.spec.checker_prompt,
441        )
442    }
443
444    async fn recall_state(&self) -> Option<String> {
445        let mem = self.memory.as_ref()?;
446        match mem.recall(&self.spec.name, 5).await {
447            Ok(hits) if !hits.is_empty() => Some(
448                hits.iter()
449                    .map(|e| format!("- {}", e.content))
450                    .collect::<Vec<_>>()
451                    .join("\n"),
452            ),
453            Ok(_) => None,
454            Err(e) => {
455                tracing::warn!(loop = %self.spec.name, error = %e, "loop-engine: recall failed");
456                None
457            }
458        }
459    }
460
461    async fn record(&self, report: &RoundReport) {
462        let Some(mem) = self.memory.as_ref() else {
463            return;
464        };
465        let entry = MemoryEntry::new(format!(
466            "{} — {}",
467            outcome_label(&report.outcome),
468            report
469                .checker
470                .as_ref()
471                .and_then(|c| c.text.clone())
472                .or_else(|| report.maker.as_ref().and_then(|m| m.text.clone()))
473                .unwrap_or_else(|| report.intent.clone())
474        ))
475        .with_tags([self.spec.name.clone(), "loop-state".into()])
476        .with_source(format!("loop:{}", self.spec.name));
477        if let Err(e) = mem.write(entry).await {
478            tracing::warn!(loop = %self.spec.name, error = %e, "loop-engine: state write failed");
479        }
480    }
481
482    fn failed(
483        &self,
484        error: String,
485        budget: &BudgetState,
486        maker: Option<SubagentReport>,
487        checker: Option<SubagentReport>,
488    ) -> RoundReport {
489        tracing::warn!(loop = %self.spec.name, %error, "loop-engine: round failed");
490        RoundReport {
491            loop_name: self.spec.name.clone(),
492            intent: self.spec.intent.clone(),
493            level: self.spec.level,
494            maker,
495            checker,
496            decision: None,
497            action: None,
498            input_tokens: budget.input_tokens,
499            output_tokens: budget.output_tokens,
500            outcome: RoundOutcome::Failed { error },
501        }
502    }
503
504    fn budget_exhausted(
505        &self,
506        limit: BudgetLimit,
507        budget: &BudgetState,
508        maker: Option<SubagentReport>,
509        checker: Option<SubagentReport>,
510    ) -> RoundReport {
511        tracing::info!(loop = %self.spec.name, limit = limit.label(), "loop-engine: budget exhausted");
512        RoundReport {
513            loop_name: self.spec.name.clone(),
514            intent: self.spec.intent.clone(),
515            level: self.spec.level,
516            maker,
517            checker,
518            decision: None,
519            action: None,
520            input_tokens: budget.input_tokens,
521            output_tokens: budget.output_tokens,
522            outcome: RoundOutcome::BudgetExhausted { limit },
523        }
524    }
525}
526
527fn dyn_model(m: &Arc<dyn Model>) -> harness_core::DynModel {
528    harness_core::DynModel(m.clone())
529}
530
531fn with_tools_for_level(
532    mut spec: SubagentSpec,
533    tools: &[Arc<dyn Tool>],
534    level: LoopLevel,
535) -> SubagentSpec {
536    for t in tools {
537        if level == LoopLevel::L1Report && !l1_tool_allowed(t.risk()) {
538            tracing::info!(
539                subagent = %spec.name,
540                tool = %t.name(),
541                risk = ?t.risk(),
542                "loop-engine: skipping mutating tool for L1 loop"
543            );
544            continue;
545        }
546        spec = spec.with_tool(t.clone());
547    }
548    spec
549}
550
551fn l1_tool_allowed(risk: ToolRisk) -> bool {
552    matches!(risk, ToolRisk::ReadOnly | ToolRisk::Network)
553}
554
555#[cfg(test)]
556mod tests {
557    use super::*;
558    use crate::{AllowlistGate, TokenBudget};
559    use async_trait::async_trait;
560    use harness_core::{MemoryError, Model, ToolError, ToolResult, ToolSchema, World};
561    use harness_models::{MockModel, MockResponse};
562    use serde_json::{Value, json};
563    use std::sync::{
564        Arc as StdArc, Mutex,
565        atomic::{AtomicUsize, Ordering},
566    };
567
568    fn spec(level: LoopLevel) -> LoopSpec {
569        LoopSpec::new("test-loop", "keep the test loop honest", level)
570            .with_maker_prompt("make a report")
571            .with_checker_prompt("verify the report")
572            .with_action_kind("commit")
573    }
574
575    fn model(resps: impl IntoIterator<Item = MockResponse>) -> Arc<MockModel> {
576        Arc::new(MockModel::new().script_many(resps))
577    }
578
579    #[derive(Clone)]
580    struct TestTool {
581        schema: ToolSchema,
582        risk: ToolRisk,
583    }
584
585    impl TestTool {
586        fn new(name: &str, risk: ToolRisk) -> Arc<Self> {
587            Arc::new(Self {
588                schema: ToolSchema {
589                    name: name.into(),
590                    description: "test tool".into(),
591                    input: json!({"type": "object"}),
592                },
593                risk,
594            })
595        }
596    }
597
598    #[async_trait]
599    impl Tool for TestTool {
600        fn name(&self) -> &str {
601            &self.schema.name
602        }
603
604        fn schema(&self) -> &ToolSchema {
605            &self.schema
606        }
607
608        fn risk(&self) -> ToolRisk {
609            self.risk
610        }
611
612        async fn invoke(&self, _args: Value, _world: &mut World) -> Result<ToolResult, ToolError> {
613            Ok(ToolResult {
614                ok: true,
615                content: json!({"ok": true}),
616                trace: None,
617            })
618        }
619    }
620
621    #[derive(Default)]
622    struct TestMemory {
623        entries: Mutex<Vec<MemoryEntry>>,
624    }
625
626    impl TestMemory {
627        fn with_entry(entry: MemoryEntry) -> Self {
628            Self {
629                entries: Mutex::new(vec![entry]),
630            }
631        }
632
633        fn entries(&self) -> Vec<MemoryEntry> {
634            self.entries.lock().unwrap().clone()
635        }
636    }
637
638    #[async_trait]
639    impl Memory for TestMemory {
640        async fn recall(&self, _query: &str, k: usize) -> Result<Vec<MemoryEntry>, MemoryError> {
641            Ok(self
642                .entries
643                .lock()
644                .unwrap()
645                .iter()
646                .take(k)
647                .cloned()
648                .collect())
649        }
650
651        async fn write(&self, entry: MemoryEntry) -> Result<(), MemoryError> {
652            self.entries.lock().unwrap().push(entry);
653            Ok(())
654        }
655    }
656
657    #[tokio::test]
658    async fn l1_filters_mutating_tools_before_subagent_context() {
659        let model = model([
660            MockResponse::text("maker report"),
661            MockResponse::text("checker report"),
662        ]);
663        let engine = LoopEngine::new(spec(LoopLevel::L1Report), model.clone() as Arc<dyn Model>)
664            .with_maker_tool(TestTool::new("read", ToolRisk::ReadOnly))
665            .with_maker_tool(TestTool::new("write", ToolRisk::Destructive))
666            .with_checker_tool(TestTool::new("web", ToolRisk::Network))
667            .with_checker_tool(TestTool::new("format", ToolRisk::Idempotent));
668
669        let report = engine.run_once().await;
670
671        assert_eq!(report.outcome, RoundOutcome::Reported);
672        let calls = model.calls();
673        assert_eq!(calls.len(), 2);
674        assert_eq!(calls[0].tools_available, vec!["read"]);
675        assert_eq!(calls[1].tools_available, vec!["web"]);
676        assert!(calls[0].task_description.contains("READ-ONLY"));
677    }
678
679    #[tokio::test]
680    async fn l3_allowlisted_verified_round_proceeds_quietly() {
681        let model = model([
682            MockResponse::text("maker produced patch").with_usage(10, 5),
683            MockResponse::text("verified clean").with_usage(7, 3),
684        ]);
685        let engine = LoopEngine::new(spec(LoopLevel::L3Unattended), model as Arc<dyn Model>)
686            .with_gate(Arc::new(AllowlistGate::new(["commit"])));
687
688        let report = engine.run_once().await;
689
690        assert_eq!(report.outcome, RoundOutcome::Proceeded);
691        assert!(matches!(report.decision, Some(GateDecision::AutoProceed)));
692        let action = report.action.as_ref().expect("action receipt");
693        assert_eq!(action.kind, "commit");
694        assert!(
695            action
696                .summary
697                .contains("no external action executor configured")
698        );
699        assert!(!report.should_deliver());
700        assert_eq!(report.input_tokens, 17);
701        assert_eq!(report.output_tokens, 8);
702    }
703
704    #[tokio::test]
705    async fn auto_proceed_invokes_custom_action_executor() {
706        let model = model([
707            MockResponse::text("maker produced patch"),
708            MockResponse::text("verified clean"),
709        ]);
710        let calls = StdArc::new(AtomicUsize::new(0));
711        let seen = calls.clone();
712        let executor = CallbackActionExecutor(move |spec, action, world| {
713            seen.fetch_add(1, Ordering::SeqCst);
714            assert_eq!(spec.name, "test-loop");
715            assert_eq!(action.kind, "commit");
716            assert_eq!(world.repo.root, PathBuf::from("."));
717            Ok(ActionReceipt::new("commit", "created commit abc123"))
718        });
719        let engine = LoopEngine::new(spec(LoopLevel::L3Unattended), model as Arc<dyn Model>)
720            .with_gate(Arc::new(AllowlistGate::new(["commit"])))
721            .with_action_executor(Arc::new(executor));
722
723        let report = engine.run_once().await;
724
725        assert_eq!(calls.load(Ordering::SeqCst), 1);
726        assert_eq!(report.outcome, RoundOutcome::Proceeded);
727        assert_eq!(
728            report.action,
729            Some(ActionReceipt::new("commit", "created commit abc123"))
730        );
731        assert!(report.render().contains("action: commit"));
732    }
733
734    #[tokio::test]
735    async fn action_executor_failure_fails_round() {
736        let model = model([
737            MockResponse::text("maker produced patch"),
738            MockResponse::text("verified clean"),
739        ]);
740        let executor = CallbackActionExecutor(|_, _, _| Err(ActionError::Exec("boom".into())));
741        let engine = LoopEngine::new(spec(LoopLevel::L3Unattended), model as Arc<dyn Model>)
742            .with_gate(Arc::new(AllowlistGate::new(["commit"])))
743            .with_action_executor(Arc::new(executor));
744
745        let report = engine.run_once().await;
746
747        match &report.outcome {
748            RoundOutcome::Failed { error } => {
749                assert!(error.contains("action executor failed"));
750                assert!(error.contains("boom"));
751            }
752            other => panic!("expected action failure, got {other:?}"),
753        }
754        assert!(report.action.is_none());
755        assert!(report.should_deliver());
756    }
757
758    #[tokio::test]
759    async fn budget_exhaustion_after_maker_skips_checker() {
760        let model = model([
761            MockResponse::text("maker spent too much").with_usage(4, 3),
762            MockResponse::text("checker should not run"),
763        ]);
764        let low_budget = TokenBudget::iters(4).with_max_total_tokens(5);
765        let spec = spec(LoopLevel::L2Assisted).with_budget(low_budget);
766        let engine = LoopEngine::new(spec, model.clone() as Arc<dyn Model>);
767
768        let report = engine.run_once().await;
769
770        assert_eq!(
771            report.outcome,
772            RoundOutcome::BudgetExhausted {
773                limit: BudgetLimit::Total
774            }
775        );
776        assert!(report.maker.is_some());
777        assert!(report.checker.is_none());
778        assert_eq!(model.call_count(), 1);
779    }
780
781    #[tokio::test]
782    async fn memory_state_is_recalled_and_round_is_recorded() {
783        let model = model([
784            MockResponse::text("maker used prior state"),
785            MockResponse::text("checker verified"),
786        ]);
787        let memory = Arc::new(TestMemory::with_entry(
788            MemoryEntry::new("prior loop state").with_tags(["test-loop", "loop-state"]),
789        ));
790        let engine = LoopEngine::new(spec(LoopLevel::L1Report), model.clone() as Arc<dyn Model>)
791            .with_memory(memory.clone());
792
793        let report = engine.run_once().await;
794
795        assert_eq!(report.outcome, RoundOutcome::Reported);
796        assert!(
797            model.calls()[0]
798                .task_description
799                .contains("State from previous rounds:\n- prior loop state")
800        );
801        let entries = memory.entries();
802        assert_eq!(entries.len(), 2);
803        let recorded = entries.last().unwrap();
804        assert!(recorded.content.starts_with("reported"));
805        assert_eq!(recorded.source.as_deref(), Some("loop:test-loop"));
806        assert!(recorded.tags.iter().any(|t| t == "loop-state"));
807    }
808}