Skip to main content

ailoop_core/
middleware.rs

1//! [`ChatMiddleware`] extension point and its decision enums
2//! ([`HookAction`], [`ToolDecision`]).
3
4use crate::{
5    ChatRequest, FinishReason, Message, RunId, StepId, StreamChunk, ToolResultContent, Usage,
6};
7use serde_json::Value;
8
9use crate::RunConfig;
10
11/// Extension point invoked by the engine at every lifecycle event of a
12/// run.
13///
14/// Middlewares run in the registration order of
15/// [`crate::RunConfig::middlewares`]. For every hook with a `_mut`
16/// counterpart, the engine fires every middleware's mutating variant
17/// first (in registration order), then every read-only variant — so
18/// transformers always run as a phase ahead of observers, and every
19/// observer sees the same fully-mutated input.
20///
21/// All hooks have default no-op implementations; only override the ones
22/// you need. Implementors must be `Send + Sync` because the engine
23/// holds them behind `Arc<dyn ChatMiddleware>`.
24#[async_trait::async_trait]
25#[allow(unused_variables)]
26pub trait ChatMiddleware: Send + Sync {
27    // chat
28    /// Fired once per run before any provider call. Return
29    /// [`HookAction::Terminate`] to abort early; the engine surfaces
30    /// the reason as [`crate::FinishReason::Aborted`] and still fires
31    /// [`Self::on_run_finished`] so observability is consistent.
32    async fn on_run_started(
33        &self,
34        run_id: &RunId,
35        messages: &[Message],
36        config: &RunConfig,
37    ) -> HookAction {
38        HookAction::Continue
39    }
40    /// Fired once per step, after the engine has assembled the
41    /// per-turn [`ChatRequest`] but before sending it. Mutate `req` to
42    /// inject defaults, switch model parameters per-turn, or strip
43    /// fields. The façade's per-builder defaults are wired through an
44    /// internal middleware that runs ahead of any user-supplied one.
45    async fn on_chat_request(&self, run_id: &RunId, step_id: &StepId, req: &mut ChatRequest) {}
46    /// Fired for every [`StreamChunk`] the engine emits, including
47    /// chunks the engine itself synthesizes
48    /// (`RunStarted`/`StepStarted`/`StepFinished`/`ToolResult`/
49    /// `RunFinished`/`HistoryCompacted`). For mutation, override
50    /// [`Self::on_chunk_mut`] instead.
51    async fn on_chunk(&self, chunk: &StreamChunk) {}
52    /// Mutating counterpart to [`Self::on_chunk`]. Engines invoke every
53    /// middleware's `on_chunk_mut` (in registration order) **before** any
54    /// `on_chunk`, so transformers run as a phase ahead of observers and
55    /// every observer sees the same fully-mutated chunk. The mutated
56    /// chunk is also what the engine itself uses to build assistant
57    /// history and what the stream consumer ultimately receives.
58    async fn on_chunk_mut(&self, chunk: &mut StreamChunk) {}
59    /// Fired once per run after the engine emits its
60    /// [`StreamChunk::RunFinished`]. Always fires — including aborted
61    /// runs and runs terminated by middleware — so observers see a
62    /// consistent close. `new_messages` covers everything the engine
63    /// added to history this run; partial tool results are preserved
64    /// when the run was aborted mid-step.
65    ///
66    /// [`StreamChunk::RunFinished`]: crate::StreamChunk::RunFinished
67    async fn on_run_finished(
68        &self,
69        run_id: &RunId,
70        reason: &FinishReason,
71        usage: &Usage,
72        new_messages: &[Message],
73    ) {
74    }
75    /// Fired when a run terminates with a transport / setup-time
76    /// error from the provider (i.e. an `Err` returned to the caller).
77    /// Aborts via [`HookAction::Terminate`] /
78    /// [`ToolDecision::Terminate`] / `RunConfig.cancellation` /
79    /// `RunConfig.timeout` go through [`Self::on_run_finished`]
80    /// instead — they are not errors.
81    async fn on_run_error(&self, run_id: &RunId, err: &(dyn std::error::Error + Send + Sync)) {}
82
83    // tools
84    /// Fired before the engine invokes a tool. Return
85    /// [`ToolDecision::Skip`] to feed a synthesized error result back
86    /// to the model without running the tool, or
87    /// [`ToolDecision::Terminate`] to abort the run. This is the
88    /// gating hook; for input rewriting, use
89    /// [`Self::on_before_tool_call_mut`].
90    async fn on_before_tool_call(
91        &self,
92        run_id: &RunId,
93        step_id: &StepId,
94        name: &str,
95        args: &Value,
96    ) -> ToolDecision {
97        ToolDecision::Continue
98    }
99    /// Mutating counterpart to [`Self::on_before_tool_call`]. Engines invoke
100    /// every middleware's `on_before_tool_call_mut` (in registration
101    /// order) **before** any `on_before_tool_call`, so input transforms
102    /// (sanitization, redaction, defaulting) run as a phase ahead of
103    /// gating decisions. Gating still belongs in `on_before_tool_call`;
104    /// this hook only rewrites `args`.
105    async fn on_before_tool_call_mut(
106        &self,
107        run_id: &RunId,
108        step_id: &StepId,
109        name: &str,
110        args: &mut Value,
111    ) {
112    }
113    /// Fired after the engine has executed a tool but before the
114    /// result is yielded to the stream consumer or recorded in
115    /// history. Read-only; for output rewriting use
116    /// [`Self::on_after_tool_call_mut`].
117    async fn on_after_tool_call(
118        &self,
119        run_id: &RunId,
120        step_id: &StepId,
121        name: &str,
122        args: &Value,
123        result: &ToolResultContent,
124    ) {
125    }
126    /// Mutating counterpart to [`Self::on_after_tool_call`]. Engines invoke
127    /// every middleware's `on_after_tool_call_mut` (in registration
128    /// order) **before** any `on_after_tool_call`, so output transforms
129    /// (PII scrubbing, truncation-with-marker) run as a phase ahead of
130    /// observers. The mutated `result` is what the model sees on the
131    /// next turn and what the engine emits in `StreamChunk::ToolResult`.
132    async fn on_after_tool_call_mut(
133        &self,
134        run_id: &RunId,
135        step_id: &StepId,
136        name: &str,
137        args: &Value,
138        result: &mut ToolResultContent,
139    ) {
140    }
141}
142
143/// Decision returned from
144/// [`ChatMiddleware::on_run_started`] to optionally short-circuit a
145/// run before any provider call.
146#[non_exhaustive]
147pub enum HookAction {
148    /// Default: let the run proceed.
149    Continue,
150    /// Abort the run before the first provider call. The engine
151    /// surfaces the reason as
152    /// [`crate::FinishReason::Aborted`]`(reason)` and still fires
153    /// [`ChatMiddleware::on_run_finished`].
154    Terminate {
155        /// Human-readable reason; threaded through `FinishReason::Aborted`.
156        reason: String,
157    },
158}
159
160/// Decision returned from
161/// [`ChatMiddleware::on_before_tool_call`] to optionally bypass or
162/// abort a tool invocation.
163#[non_exhaustive]
164pub enum ToolDecision {
165    /// Default: execute the tool.
166    Continue,
167    /// Skip execution. The engine synthesizes an `is_error: true`
168    /// [`crate::ToolResultContent`] carrying `reason` and feeds it
169    /// back to the model so the loop can continue. Use when a single
170    /// tool call should be denied (rate limit, policy violation) but
171    /// the run as a whole should keep going.
172    Skip {
173        /// Human-readable reason; surfaces in the synthesized error
174        /// `tool_result` the model receives.
175        reason: String,
176    },
177    /// Abort the run before executing this tool. The engine surfaces
178    /// the reason as [`crate::FinishReason::Aborted`]`(reason)` and
179    /// fires [`ChatMiddleware::on_run_finished`]; partial tool
180    /// results from earlier tool calls in the same step are preserved
181    /// in `new_messages`.
182    Terminate {
183        /// Human-readable reason; threaded through `FinishReason::Aborted`.
184        reason: String,
185    },
186}