Skip to main content

harness_loop_engine/
engine.rs

1//! `LoopEngine` — the runner that turns a [`LoopSpec`] into recursive,
2//! verified, budgeted, gated agent work.
3//!
4//! One `run_once` is one trip around the anatomical loop:
5//!
6//! ```text
7//! recall state (memory)  ->  isolated sandbox  ->  maker sub-agent
8//!   ->  checker sub-agent (tests + gates)  ->  human gate  ->  record state
9//! ```
10//!
11//! The maker/checker split is the verification discipline made structural:
12//! one sub-agent proposes, a second, independent one tries to confirm the
13//! work is clean. The gate then decides — within the loop's maturity level —
14//! whether to proceed automatically or escalate to a human. Every round is
15//! bounded by a [`TokenBudget`]; every round's outcome is written back to
16//! memory as the durable spine that lets the next round pick up where this
17//! one left off.
18
19use crate::budget::{BudgetLimit, BudgetState};
20use crate::level::{GateDecision, HumanGate, LoopLevel, ProposedAction};
21use crate::spec::LoopSpec;
22use harness_core::{Memory, MemoryEntry, Model, SubagentStatus, Task, Tool};
23use harness_loop::{Subagent, SubagentReport, SubagentSpec};
24use harness_sandbox::{NullSandbox, Sandbox};
25use std::path::PathBuf;
26use std::sync::Arc;
27
28/// What one round of a loop did.
29#[derive(Debug, Clone, PartialEq, Eq)]
30pub enum RoundOutcome {
31    /// L1 (or any escalate-by-design round): the loop investigated and the
32    /// finding is delivered as a report. No change was applied.
33    Reported,
34    /// The gate auto-approved a verified proposal — the loop is cleared to
35    /// carry out its action (commit / PR / comment / …).
36    Proceeded,
37    /// Handed to a human with context. The loop will recurse next tick.
38    Escalated { reason: String },
39    /// A spend ceiling was crossed mid-round; the loop stopped early.
40    BudgetExhausted { limit: BudgetLimit },
41    /// The sandbox, maker, or checker errored. Best-effort: the scheduler
42    /// keeps ticking; this round simply produced nothing actionable.
43    Failed { error: String },
44}
45
46/// The full record of a round — the maker/checker reports, token spend, the
47/// gate decision, and the outcome. Suitable for delivery to a channel and
48/// for writing to memory.
49#[derive(Debug, Clone)]
50pub struct RoundReport {
51    pub loop_name: String,
52    pub intent: String,
53    pub level: LoopLevel,
54    pub maker: Option<SubagentReport>,
55    pub checker: Option<SubagentReport>,
56    pub decision: Option<GateDecision>,
57    pub input_tokens: u64,
58    pub output_tokens: u64,
59    pub outcome: RoundOutcome,
60}
61
62impl RoundReport {
63    pub fn total_tokens(&self) -> u64 {
64        self.input_tokens + self.output_tokens
65    }
66
67    /// Whether this round produced something worth delivering to a human.
68    /// A clean auto-proceed at L3 is intentionally quiet.
69    pub fn should_deliver(&self) -> bool {
70        !matches!(self.outcome, RoundOutcome::Proceeded)
71    }
72
73    /// A compact, human-readable summary for channels and memory.
74    pub fn render(&self) -> String {
75        let mut s = format!(
76            "[{}] loop `{}` ({})\nintent: {}\n",
77            self.level.label(),
78            self.loop_name,
79            outcome_label(&self.outcome),
80            self.intent
81        );
82        if let Some(m) = &self.maker {
83            s.push_str(&format!("maker: {:?} in {} iters\n", m.status, m.iters));
84            if let Some(t) = &m.text {
85                s.push_str(&format!("{}\n", t.trim()));
86            }
87        }
88        if let Some(c) = &self.checker {
89            s.push_str(&format!("checker: {:?} in {} iters\n", c.status, c.iters));
90        }
91        if let RoundOutcome::Escalated { reason } = &self.outcome {
92            s.push_str(&format!("escalation: {reason}\n"));
93        }
94        s.push_str(&format!(
95            "tokens: {} in / {} out\n",
96            self.input_tokens, self.output_tokens
97        ));
98        s
99    }
100}
101
102fn outcome_label(o: &RoundOutcome) -> &'static str {
103    match o {
104        RoundOutcome::Reported => "reported",
105        RoundOutcome::Proceeded => "proceeded",
106        RoundOutcome::Escalated { .. } => "escalated",
107        RoundOutcome::BudgetExhausted { .. } => "budget-exhausted",
108        RoundOutcome::Failed { .. } => "failed",
109    }
110}
111
112/// Binds a [`LoopSpec`] to the live pieces it needs to run: a model, the
113/// maker/checker tool sets, an isolation sandbox, a gate, and (optionally)
114/// memory for the state spine.
115pub struct LoopEngine {
116    spec: LoopSpec,
117    model: Arc<dyn Model>,
118    maker_tools: Vec<Arc<dyn Tool>>,
119    checker_tools: Vec<Arc<dyn Tool>>,
120    sandbox: Arc<dyn Sandbox>,
121    gate: Arc<dyn HumanGate>,
122    memory: Option<Arc<dyn Memory>>,
123}
124
125impl LoopEngine {
126    /// Construct an engine. By default the maker and checker run with no
127    /// tools, in a [`NullSandbox`] rooted at the current directory, with the
128    /// gate the spec's level implies (`AlwaysEscalate` for L1/L2). Override
129    /// any of these with the builder methods.
130    pub fn new(spec: LoopSpec, model: Arc<dyn Model>) -> Self {
131        let gate = crate::level::default_gate_for(spec.level);
132        Self {
133            spec,
134            model,
135            maker_tools: Vec::new(),
136            checker_tools: Vec::new(),
137            sandbox: Arc::new(NullSandbox::new(PathBuf::from("."))),
138            gate,
139            memory: None,
140        }
141    }
142
143    pub fn with_maker_tool(mut self, t: Arc<dyn Tool>) -> Self {
144        self.maker_tools.push(t);
145        self
146    }
147    pub fn with_checker_tool(mut self, t: Arc<dyn Tool>) -> Self {
148        self.checker_tools.push(t);
149        self
150    }
151    pub fn with_sandbox(mut self, s: Arc<dyn Sandbox>) -> Self {
152        self.sandbox = s;
153        self
154    }
155    pub fn with_gate(mut self, g: Arc<dyn HumanGate>) -> Self {
156        self.gate = g;
157        self
158    }
159    pub fn with_memory(mut self, m: Arc<dyn Memory>) -> Self {
160        self.memory = Some(m);
161        self
162    }
163
164    pub fn spec(&self) -> &LoopSpec {
165        &self.spec
166    }
167
168    /// Run exactly one round of the loop. Never panics and never returns an
169    /// `Err`: sandbox/maker/checker failures are folded into
170    /// [`RoundOutcome::Failed`] so a scheduler can keep ticking. The result
171    /// is also recorded to memory when memory is configured.
172    pub async fn run_once(&self) -> RoundReport {
173        let report = self.run_round().await;
174        self.record(&report).await;
175        report
176    }
177
178    async fn run_round(&self) -> RoundReport {
179        let mut budget = BudgetState::new(self.spec.budget);
180        let level = self.spec.level;
181
182        // --- Triage: recall prior state from memory. ---
183        let prior = self.recall_state().await;
184
185        // --- Isolated sandbox for this round. ---
186        let mut handle = match self.sandbox.spawn().await {
187            Ok(h) => h,
188            Err(e) => {
189                return self.failed(format!("sandbox spawn failed: {e}"), &budget, None, None);
190            }
191        };
192
193        // --- Maker sub-agent. ---
194        let maker_desc = self.maker_task_description(&prior);
195        let maker = SubagentSpec::new(
196            format!("{}:maker", self.spec.name),
197            Task {
198                description: maker_desc,
199                source: None,
200                deadline: None,
201            },
202        )
203        .with_max_iters(budget.max_iters());
204        let maker = with_tools(maker, &self.maker_tools);
205        let maker_report = match Subagent::new(dyn_model(&self.model), maker)
206            .run(&mut handle.world)
207            .await
208        {
209            Ok(r) => r,
210            Err(e) => {
211                return self.failed(format!("maker failed: {e}"), &budget, None, None);
212            }
213        };
214        budget.add(&maker_report.usage);
215        if let Some(limit) = budget.exceeded() {
216            return self.budget_exhausted(limit, &budget, Some(maker_report), None);
217        }
218
219        // --- Checker sub-agent (verification). ---
220        let checker_desc = self.checker_task_description(&maker_report);
221        let checker = SubagentSpec::new(
222            format!("{}:checker", self.spec.name),
223            Task {
224                description: checker_desc,
225                source: None,
226                deadline: None,
227            },
228        )
229        .with_max_iters(budget.max_iters());
230        let checker = with_tools(checker, &self.checker_tools);
231        let checker_report = match Subagent::new(dyn_model(&self.model), checker)
232            .run(&mut handle.world)
233            .await
234        {
235            Ok(r) => r,
236            Err(e) => {
237                return self.failed(
238                    format!("checker failed: {e}"),
239                    &budget,
240                    Some(maker_report),
241                    None,
242                );
243            }
244        };
245        budget.add(&checker_report.usage);
246        if let Some(limit) = budget.exceeded() {
247            return self.budget_exhausted(limit, &budget, Some(maker_report), Some(checker_report));
248        }
249
250        // --- Gate: proceed or escalate. ---
251        let verified = checker_report.status == SubagentStatus::Done;
252        let summary = checker_report
253            .text
254            .clone()
255            .or_else(|| maker_report.text.clone())
256            .unwrap_or_else(|| self.spec.intent.clone());
257        let proposed = ProposedAction::new(self.spec.action_kind.clone(), summary, verified);
258        let decision = self.gate.decide(level, &proposed);
259
260        let outcome = match (&decision, level) {
261            // L1 never acts — it reports, regardless of the gate verdict.
262            (_, LoopLevel::L1Report) => RoundOutcome::Reported,
263            (GateDecision::AutoProceed, _) => RoundOutcome::Proceeded,
264            (GateDecision::Escalate { reason }, _) => RoundOutcome::Escalated {
265                reason: reason.clone(),
266            },
267        };
268
269        RoundReport {
270            loop_name: self.spec.name.clone(),
271            intent: self.spec.intent.clone(),
272            level,
273            maker: Some(maker_report),
274            checker: Some(checker_report),
275            decision: Some(decision),
276            input_tokens: budget.input_tokens,
277            output_tokens: budget.output_tokens,
278            outcome,
279        }
280    }
281
282    fn maker_task_description(&self, prior: &Option<String>) -> String {
283        let write_note = if self.spec.level.maker_may_write() {
284            "You MAY modify files in this workspace to accomplish the task."
285        } else {
286            "READ-ONLY: do NOT modify any files. Investigate and report findings only."
287        };
288        let mut d = format!(
289            "Loop intent: {}\nMaturity level: {}\n{}\n\nTask:\n{}",
290            self.spec.intent,
291            self.spec.level.label(),
292            write_note,
293            self.spec.maker_prompt,
294        );
295        if let Some(p) = prior {
296            d.push_str(&format!("\n\nState from previous rounds:\n{p}"));
297        }
298        d
299    }
300
301    fn checker_task_description(&self, maker: &SubagentReport) -> String {
302        format!(
303            "You are the checker (verifier) for loop `{}`.\nLoop intent: {}\n\n\
304             Verify the work below. Run any available tests and gates, look for \
305             regressions, and decide whether it is safe. Report DoneWithConcerns \
306             if anything is questionable.\n\nMaker's report:\n{}\n\n\
307             Verification task:\n{}",
308            self.spec.name,
309            self.spec.intent,
310            maker.text.as_deref().unwrap_or("(maker produced no text)"),
311            self.spec.checker_prompt,
312        )
313    }
314
315    async fn recall_state(&self) -> Option<String> {
316        let mem = self.memory.as_ref()?;
317        match mem.recall(&self.spec.name, 5).await {
318            Ok(hits) if !hits.is_empty() => Some(
319                hits.iter()
320                    .map(|e| format!("- {}", e.content))
321                    .collect::<Vec<_>>()
322                    .join("\n"),
323            ),
324            Ok(_) => None,
325            Err(e) => {
326                tracing::warn!(loop = %self.spec.name, error = %e, "loop-engine: recall failed");
327                None
328            }
329        }
330    }
331
332    async fn record(&self, report: &RoundReport) {
333        let Some(mem) = self.memory.as_ref() else {
334            return;
335        };
336        let entry = MemoryEntry::new(format!(
337            "{} — {}",
338            outcome_label(&report.outcome),
339            report
340                .checker
341                .as_ref()
342                .and_then(|c| c.text.clone())
343                .or_else(|| report.maker.as_ref().and_then(|m| m.text.clone()))
344                .unwrap_or_else(|| report.intent.clone())
345        ))
346        .with_tags([self.spec.name.clone(), "loop-state".into()])
347        .with_source(format!("loop:{}", self.spec.name));
348        if let Err(e) = mem.write(entry).await {
349            tracing::warn!(loop = %self.spec.name, error = %e, "loop-engine: state write failed");
350        }
351    }
352
353    fn failed(
354        &self,
355        error: String,
356        budget: &BudgetState,
357        maker: Option<SubagentReport>,
358        checker: Option<SubagentReport>,
359    ) -> RoundReport {
360        tracing::warn!(loop = %self.spec.name, %error, "loop-engine: round failed");
361        RoundReport {
362            loop_name: self.spec.name.clone(),
363            intent: self.spec.intent.clone(),
364            level: self.spec.level,
365            maker,
366            checker,
367            decision: None,
368            input_tokens: budget.input_tokens,
369            output_tokens: budget.output_tokens,
370            outcome: RoundOutcome::Failed { error },
371        }
372    }
373
374    fn budget_exhausted(
375        &self,
376        limit: BudgetLimit,
377        budget: &BudgetState,
378        maker: Option<SubagentReport>,
379        checker: Option<SubagentReport>,
380    ) -> RoundReport {
381        tracing::info!(loop = %self.spec.name, limit = limit.label(), "loop-engine: budget exhausted");
382        RoundReport {
383            loop_name: self.spec.name.clone(),
384            intent: self.spec.intent.clone(),
385            level: self.spec.level,
386            maker,
387            checker,
388            decision: None,
389            input_tokens: budget.input_tokens,
390            output_tokens: budget.output_tokens,
391            outcome: RoundOutcome::BudgetExhausted { limit },
392        }
393    }
394}
395
396fn dyn_model(m: &Arc<dyn Model>) -> harness_core::DynModel {
397    harness_core::DynModel(m.clone())
398}
399
400fn with_tools(mut spec: SubagentSpec, tools: &[Arc<dyn Tool>]) -> SubagentSpec {
401    for t in tools {
402        spec = spec.with_tool(t.clone());
403    }
404    spec
405}