Skip to main content

oharness_loop/
agent.rs

1//! `Agent` assembly + `AgentBuilder` (§12.5).
2
3use crate::config::AgentConfig;
4use crate::loop_trait::{Loop, LoopContext};
5use oharness_core::{
6    AgentError, ApprovalChannel, BudgetHandle, Cancellation, EventSink, NullApprovalChannel,
7    NullBudget, NullSink, RunId, RunOutcome, ScopedEmitter, SharedSink, Task, TrajectoryHandle,
8};
9use oharness_critic::{CompositeCritic, CriticTrigger, ReflectionInjector};
10use oharness_llm::Llm;
11use oharness_memory::{MemoryPolicy, Passthrough};
12use oharness_tools::context::Workspace;
13use oharness_tools::ToolSet;
14use oharness_trace::{InMemorySink, RequestTracer, ToolTracer};
15use std::sync::atomic::AtomicU64;
16use std::sync::Arc;
17
18pub struct Agent {
19    llm: Arc<dyn Llm>,
20    tools: Arc<dyn ToolSet>,
21    memory: Arc<dyn MemoryPolicy>,
22    loop_impl: Box<dyn Loop>,
23    events: Arc<dyn EventSink>,
24    budget: Arc<dyn BudgetHandle>,
25    approval: Arc<dyn ApprovalChannel>,
26    critics: Option<Arc<CompositeCritic>>,
27    critic_trigger: CriticTrigger,
28    /// Optional scratch [`Workspace`]. Propagated into
29    /// `LoopContext.workspace` and from there into every
30    /// `ToolContext` the loop constructs, so the shipped `fs` /
31    /// `bash` tools scope to the agent's workspace rather than cwd.
32    /// Benchmark adapters (e.g. `oharness-bench-swe`) populate this
33    /// from their `LoadedTask.workspace` in the agent factory.
34    workspace: Option<Arc<Workspace>>,
35    /// Handle stashed for `run_reflexion` — `None` if the builder wasn't
36    /// given a [`ReflectionInjector`]. The injector itself (if present)
37    /// is also wired into the Llm middleware stack before the agent's
38    /// run begins; this field is just the accessor to let
39    /// [`run_reflexion`](crate::reflexion) swap its reflection list
40    /// between episodes.
41    reflection_injector: Option<Arc<ReflectionInjector>>,
42    config: AgentConfig,
43}
44
45impl Agent {
46    pub fn builder() -> AgentBuilder {
47        AgentBuilder::default()
48    }
49
50    pub fn llm(&self) -> &Arc<dyn Llm> {
51        &self.llm
52    }
53
54    pub fn tools(&self) -> &Arc<dyn ToolSet> {
55        &self.tools
56    }
57
58    pub fn sink(&self) -> &Arc<dyn EventSink> {
59        &self.events
60    }
61
62    /// Returns the agent's [`ReflectionInjector`], or `None` if the
63    /// builder wasn't given one. `run_reflexion` uses this accessor to
64    /// reconfigure injected reflections between episodes; building an
65    /// agent without an injector and then passing it to `run_reflexion`
66    /// is a configuration error caught before any episode runs.
67    pub fn injector(&self) -> Option<&Arc<ReflectionInjector>> {
68        self.reflection_injector.as_ref()
69    }
70
71    pub fn critics(&self) -> Option<&Arc<CompositeCritic>> {
72        self.critics.as_ref()
73    }
74
75    pub fn critic_trigger(&self) -> CriticTrigger {
76        self.critic_trigger
77    }
78
79    pub fn workspace(&self) -> Option<&Arc<Workspace>> {
80        self.workspace.as_ref()
81    }
82
83    pub async fn run(&self, task: Task) -> Result<RunOutcome, AgentError> {
84        let run_id = RunId::new();
85        let seq = Arc::new(AtomicU64::new(0));
86
87        // Always fan out into an in-memory capture so we can populate the returned
88        // TrajectoryHandle. The user's configured sink still sees every event too.
89        let capture = InMemorySink::new();
90        let fan: Arc<dyn EventSink> = Arc::new(FanOut {
91            a: self.events.clone(),
92            b: Arc::new(capture.clone()),
93        });
94        let emitter = ScopedEmitter::new(fan, run_id, seq);
95
96        // M1b-δ: wrap the user's Llm + ToolSet in the tracing middleware so
97        // llm.* and tool.* events are emitted at the provider / tool boundary
98        // instead of from inside the loop. See docs/remaining-work.md §2.4.
99        let traced_llm: Arc<dyn Llm> =
100            Arc::new(RequestTracer::new(self.llm.clone(), emitter.clone()));
101        let traced_tools: Arc<dyn ToolSet> =
102            Arc::new(ToolTracer::new(self.tools.clone(), emitter.clone()));
103
104        let loop_ctx = LoopContext {
105            llm: traced_llm,
106            tools: traced_tools,
107            memory: self.memory.clone(),
108            critics: self.critics.clone(),
109            critic_trigger: self.critic_trigger,
110            events: emitter,
111            budget: self.budget.clone(),
112            cancellation: Cancellation::new(),
113            approval: self.approval.clone(),
114            workspace: self.workspace.clone(),
115            revision_depth_cap: self.config.revision_depth_cap,
116            max_turns: self.config.max_turns,
117        };
118
119        let mut outcome = self.loop_impl.run(task, &loop_ctx).await?;
120        outcome.run_id = run_id;
121        outcome.trajectory = TrajectoryHandle::in_memory(capture.events());
122        Ok(outcome)
123    }
124}
125
126#[derive(Default)]
127pub struct AgentBuilder {
128    llm: Option<Arc<dyn Llm>>,
129    tools: Option<Arc<dyn ToolSet>>,
130    memory: Option<Arc<dyn MemoryPolicy>>,
131    loop_impl: Option<Box<dyn Loop>>,
132    events: Option<SharedSink>,
133    budget: Option<Arc<dyn BudgetHandle>>,
134    approval: Option<Arc<dyn ApprovalChannel>>,
135    critics: Option<Arc<CompositeCritic>>,
136    critic_trigger: Option<CriticTrigger>,
137    reflection_injector: Option<Arc<ReflectionInjector>>,
138    workspace: Option<Arc<Workspace>>,
139    config: AgentConfig,
140}
141
142impl AgentBuilder {
143    pub fn with_llm(mut self, llm: Arc<dyn Llm>) -> Self {
144        self.llm = Some(llm);
145        self
146    }
147
148    pub fn with_tools(mut self, tools: Arc<dyn ToolSet>) -> Self {
149        self.tools = Some(tools);
150        self
151    }
152
153    pub fn with_memory(mut self, memory: Arc<dyn MemoryPolicy>) -> Self {
154        self.memory = Some(memory);
155        self
156    }
157
158    pub fn with_loop(mut self, l: Box<dyn Loop>) -> Self {
159        self.loop_impl = Some(l);
160        self
161    }
162
163    pub fn with_event_sink(mut self, sink: SharedSink) -> Self {
164        self.events = Some(sink);
165        self
166    }
167
168    pub fn with_budget(mut self, budget: Arc<dyn BudgetHandle>) -> Self {
169        self.budget = Some(budget);
170        self
171    }
172
173    pub fn with_approval(mut self, approval: Arc<dyn ApprovalChannel>) -> Self {
174        self.approval = Some(approval);
175        self
176    }
177
178    pub fn with_config(mut self, config: AgentConfig) -> Self {
179        self.config = config;
180        self
181    }
182
183    pub fn with_max_turns(mut self, n: u32) -> Self {
184        self.config.max_turns = n;
185        self
186    }
187
188    /// Attach a critic. Typically a [`CompositeCritic`] wrapping one or
189    /// more [`oharness_critic::Critic`] implementations; single-critic
190    /// setups can construct a composite with one child under
191    /// `AggregationPolicy::FirstReject`.
192    pub fn with_critics(mut self, critics: Arc<CompositeCritic>) -> Self {
193        self.critics = Some(critics);
194        self
195    }
196
197    pub fn with_critic_trigger(mut self, trigger: CriticTrigger) -> Self {
198        self.critic_trigger = Some(trigger);
199        self
200    }
201
202    /// Attach a [`ReflectionInjector`] for use with
203    /// [`run_reflexion`](crate::reflexion). The injector is *not* wired
204    /// into the LLM middleware stack automatically — users wire it with
205    /// `LlmExt::with_request_layer(injector.clone())` before passing the
206    /// LLM to `.with_llm(..)`. This stash is so `run_reflexion` can find
207    /// the injector later and swap its reflection list between episodes.
208    pub fn with_reflection_injector(mut self, injector: Arc<ReflectionInjector>) -> Self {
209        self.reflection_injector = Some(injector);
210        self
211    }
212
213    /// Attach a [`Workspace`] that every tool call in this agent's run
214    /// will be scoped to. The shipped `fs` / `bash` tools respect
215    /// `ToolContext::workspace_path()` — without a workspace attached,
216    /// they fall back to cwd (surprise-prone for research runs).
217    /// Benchmark adapters populate this from their `LoadedTask.workspace`
218    /// in the agent factory.
219    pub fn with_workspace(mut self, workspace: Arc<Workspace>) -> Self {
220        self.workspace = Some(workspace);
221        self
222    }
223
224    pub fn build(self) -> Result<Agent, AgentError> {
225        let llm = self
226            .llm
227            .ok_or_else(|| AgentError::Configuration("llm is required".into()))?;
228        let tools = self
229            .tools
230            .ok_or_else(|| AgentError::Configuration("tools is required".into()))?;
231        // Passthrough default keeps surprise-free behavior.
232        let memory = self
233            .memory
234            .unwrap_or_else(|| Arc::new(Passthrough) as Arc<dyn MemoryPolicy>);
235
236        let loop_impl = match self.loop_impl {
237            Some(l) => l,
238            #[cfg(feature = "react")]
239            None => Box::new(crate::react::ReactLoop::default()),
240            #[cfg(not(feature = "react"))]
241            None => {
242                return Err(AgentError::Configuration(
243                    "loop is required (no default without `react` feature)".into(),
244                ));
245            }
246        };
247
248        let events = self.events.unwrap_or_else(|| Arc::new(NullSink));
249        let budget = self.budget.unwrap_or_else(|| Arc::new(NullBudget));
250        let approval = self
251            .approval
252            .unwrap_or_else(|| Arc::new(NullApprovalChannel));
253
254        Ok(Agent {
255            llm,
256            tools,
257            memory,
258            loop_impl,
259            events,
260            budget,
261            approval,
262            critics: self.critics,
263            critic_trigger: self.critic_trigger.unwrap_or_default(),
264            reflection_injector: self.reflection_injector,
265            workspace: self.workspace,
266            config: self.config,
267        })
268    }
269}
270
271struct FanOut {
272    a: Arc<dyn EventSink>,
273    b: Arc<dyn EventSink>,
274}
275
276impl EventSink for FanOut {
277    fn emit(&self, event: oharness_core::Event) {
278        self.a.emit(event.clone());
279        self.b.emit(event);
280    }
281    fn try_emit(&self, event: oharness_core::Event) -> Result<(), oharness_core::Event> {
282        // Best-effort: attempt both; return error if either refuses.
283        let e1 = self.a.try_emit(event.clone());
284        let e2 = self.b.try_emit(event);
285        match (e1, e2) {
286            (Ok(()), Ok(())) => Ok(()),
287            (Err(ev), _) | (_, Err(ev)) => Err(ev),
288        }
289    }
290}