ai_memory/hooks/recall.rs
1// Copyright 2026 AlphaOne LLC
2// SPDX-License-Identifier: Apache-2.0
3//
4// v0.7 Track G — Task G10: pre_recall_expand hot-path hook helper.
5//
6// G10 wires the new [`HookEvent::PreRecallExpand`] (events.rs) into
7// the recall hot path. The fire site is `mcp::handle_recall`; the
8// helper here is the seam between that call site and G5's
9// `HookChain::fire`.
10//
11// # Why a helper module
12//
13// `handle_recall` is a long, sync-heavy function with many call
14// sites and several test paths. Inlining the hook fire would
15// (a) require threading the hook chain + executor registry into
16// the function signature (cascading into every caller) and
17// (b) duplicate the payload-marshalling logic between this and the
18// future G11 / G7+ wiring tasks. Pulling the firing into a single
19// function lets the call site stay a one-liner and keeps the
20// daemon-mode contract testable in isolation.
21//
22// # Daemon mode is mandatory in production
23//
24// `PreRecallExpand` is classified as [`crate::hooks::EventClass::HotPath`]
25// (50ms class deadline). A subprocess fork+exec on Linux costs
26// ~5-10ms cold and ~1-2ms warm; a single misbehaving exec-mode
27// hook would consume the entire budget before the child even
28// processes the payload. Operators MUST configure the hook in
29// `mode = "daemon"` — the chain's per-hook budget enforcement
30// (G6) ensures a misconfigured exec-mode hook still respects the
31// 50ms ceiling, but the operator-visible behaviour will be
32// "every recall trips the budget".
33
34use serde_json::Value;
35
36use super::chain::{ChainResult, HookChain};
37use super::events::{HookEvent, RecallExpandQuery};
38use super::executor::ExecutorRegistry;
39
40// ---------------------------------------------------------------------------
41// Outcome of running the pre_recall_expand chain
42// ---------------------------------------------------------------------------
43
44/// What the helper reports back to `handle_recall`.
45///
46/// `Allow` and `Modified` both let the recall proceed; the only
47/// difference is whether the in-flight `(query, namespace, k)`
48/// triple was rewritten by a hook. `Denied` halts the recall —
49/// the caller is expected to return an empty result with the
50/// `reason` surfaced via the recall response's `meta.diagnostic`
51/// block (G5's chain-level Deny semantics).
52#[derive(Debug, Clone, PartialEq, Eq)]
53pub enum PreRecallOutcome {
54 /// The chain returned `Allow` (or no hooks were configured).
55 /// The recall proceeds with the original triple.
56 Allow,
57 /// At least one hook returned `Modify`. The recall proceeds
58 /// with the rewritten triple — any of `query`, `namespace`,
59 /// `k` may have been changed.
60 Modified {
61 query: String,
62 namespace: String,
63 k: u32,
64 },
65 /// A hook returned `Deny`. The recall is short-circuited; the
66 /// caller surfaces an empty result with a diagnostic.
67 Denied { reason: String, code: i32 },
68}
69
70impl PreRecallOutcome {
71 /// The resolved query string the recall should run with. For
72 /// `Denied` the value is the *original* query — callers should
73 /// only use it for logging, not for the actual recall (the
74 /// recall must be skipped).
75 #[must_use]
76 pub fn query(&self, original: &str) -> String {
77 match self {
78 PreRecallOutcome::Allow | PreRecallOutcome::Denied { .. } => original.to_string(),
79 PreRecallOutcome::Modified { query, .. } => query.clone(),
80 }
81 }
82
83 /// The resolved namespace.
84 #[must_use]
85 pub fn namespace(&self, original: &str) -> String {
86 match self {
87 PreRecallOutcome::Allow | PreRecallOutcome::Denied { .. } => original.to_string(),
88 PreRecallOutcome::Modified { namespace, .. } => namespace.clone(),
89 }
90 }
91
92 /// The resolved limit.
93 #[must_use]
94 pub fn k(&self, original: u32) -> u32 {
95 match self {
96 PreRecallOutcome::Allow | PreRecallOutcome::Denied { .. } => original,
97 PreRecallOutcome::Modified { k, .. } => *k,
98 }
99 }
100
101 /// Whether the recall must be skipped (i.e. the chain Denied).
102 #[must_use]
103 pub fn is_denied(&self) -> bool {
104 matches!(self, PreRecallOutcome::Denied { .. })
105 }
106}
107
108// ---------------------------------------------------------------------------
109// apply_pre_recall_expand — fire the hot-path chain
110// ---------------------------------------------------------------------------
111
112/// Fire the [`HookEvent::PreRecallExpand`] chain on the recall hot
113/// path.
114///
115/// The hot-path budget is enforced by G6's chain runner (the chain's
116/// `fire` method stamps a 50ms wall-clock ceiling at entry; the
117/// caller does not need to add a second `tokio::time::timeout`
118/// around this call).
119///
120/// ## Modify semantics
121///
122/// A hook returns `HookDecision::Modify` with a [`super::events::MemoryDelta`]
123/// — but `MemoryDelta` was designed for the `pre_store` shape
124/// (memory fields). For `pre_recall_expand` we reuse three of its
125/// fields with overloaded meaning:
126///
127/// * `MemoryDelta::content` → rewritten `query` text
128/// * `MemoryDelta::namespace` → rewritten `namespace`
129/// * `MemoryDelta::priority` → rewritten `k` (cast `i32 → u32`,
130/// non-positive values fall back to the original `k`)
131///
132/// This overload is documented here rather than forking
133/// `MemoryDelta` into a per-event family because (a) the chain
134/// runner's delta merge is generic over `MemoryDelta` and forking
135/// would cascade through G5 + G6, and (b) the hot-path payload is
136/// narrow enough that a typed per-hook payload was rejected during
137/// G2 design discussion. Future G* tasks may revisit if more
138/// per-event payload shapes accrue.
139///
140/// ## Return shape
141///
142/// See [`PreRecallOutcome`]. `Allow` and `Modified` both let the
143/// recall proceed; `Denied` halts it and the caller surfaces the
144/// reason in the response's diagnostic block.
145pub async fn apply_pre_recall_expand(
146 query: &str,
147 namespace: &str,
148 k: u32,
149 chain: &HookChain,
150 registry: &mut ExecutorRegistry,
151) -> PreRecallOutcome {
152 // No hooks configured — fast path. The G6 chain runner would
153 // also early-return, but skipping the JSON marshal here keeps
154 // the no-hook recall path zero-overhead.
155 if chain.hooks().is_empty() {
156 return PreRecallOutcome::Allow;
157 }
158
159 let payload_struct = RecallExpandQuery {
160 query: query.to_string(),
161 namespace: namespace.to_string(),
162 k,
163 };
164 let payload = serde_json::to_value(&payload_struct).unwrap_or_else(|_| Value::Null);
165
166 let result = chain
167 .fire(HookEvent::PreRecallExpand, payload, registry)
168 .await;
169
170 match result {
171 ChainResult::Allow => PreRecallOutcome::Allow,
172 ChainResult::ModifiedAllow(delta) => {
173 let new_query = delta.content.unwrap_or_else(|| query.to_string());
174 let new_namespace = delta.namespace.unwrap_or_else(|| namespace.to_string());
175 let new_k = match delta.priority {
176 Some(p) if p > 0 => u32::try_from(p).unwrap_or(k),
177 _ => k,
178 };
179 PreRecallOutcome::Modified {
180 query: new_query,
181 namespace: new_namespace,
182 k: new_k,
183 }
184 }
185 ChainResult::Deny { reason, code } => PreRecallOutcome::Denied { reason, code },
186 ChainResult::AskUser { .. } => {
187 // Hot-path hooks can't pause for an operator prompt
188 // inside a 50ms budget; surface AskUser as Allow with
189 // a tracing warning so the misconfigured hook is
190 // visible without breaking the recall.
191 tracing::warn!(
192 "hooks: pre_recall_expand returned AskUser; degrading to Allow \
193 (operator prompts are incompatible with the recall hot path)"
194 );
195 PreRecallOutcome::Allow
196 }
197 }
198}
199
200// ---------------------------------------------------------------------------
201// Tests
202// ---------------------------------------------------------------------------
203
204#[cfg(test)]
205mod tests {
206 use super::*;
207
208 #[test]
209 fn outcome_allow_uses_original_triple() {
210 let o = PreRecallOutcome::Allow;
211 assert_eq!(o.query("orig"), "orig");
212 assert_eq!(o.namespace("ns"), "ns");
213 assert_eq!(o.k(7), 7);
214 assert!(!o.is_denied());
215 }
216
217 #[test]
218 fn outcome_modified_returns_rewritten_triple() {
219 let o = PreRecallOutcome::Modified {
220 query: "rewrite".into(),
221 namespace: "team/x".into(),
222 k: 25,
223 };
224 assert_eq!(o.query("orig"), "rewrite");
225 assert_eq!(o.namespace("ns"), "team/x");
226 assert_eq!(o.k(7), 25);
227 assert!(!o.is_denied());
228 }
229
230 #[test]
231 fn outcome_denied_falls_back_to_original_for_logging() {
232 let o = PreRecallOutcome::Denied {
233 reason: "blocked".into(),
234 code: 451,
235 };
236 // Caller should NOT actually run the recall — but for
237 // logging the original triple is what we surface.
238 assert_eq!(o.query("orig"), "orig");
239 assert_eq!(o.namespace("ns"), "ns");
240 assert_eq!(o.k(7), 7);
241 assert!(o.is_denied());
242 }
243
244 #[tokio::test]
245 async fn empty_chain_is_allow_fast_path() {
246 let chain = HookChain::new(vec![]);
247 let mut reg = ExecutorRegistry::new();
248 let out = apply_pre_recall_expand("hello", "default", 10, &chain, &mut reg).await;
249 assert_eq!(out, PreRecallOutcome::Allow);
250 }
251}