Skip to main content

ai_memory/hooks/
recall.rs

1// Copyright 2026 AlphaOne LLC
2// SPDX-License-Identifier: Apache-2.0
3//
4// v0.7 Track G — Task G10: pre_recall_expand hot-path hook helper.
5//
6// G10 wires the new [`HookEvent::PreRecallExpand`] (events.rs) into
7// the recall hot path. The fire site is `mcp::handle_recall`; the
8// helper here is the seam between that call site and G5's
9// `HookChain::fire`.
10//
11// # Why a helper module
12//
13// `handle_recall` is a long, sync-heavy function with many call
14// sites and several test paths. Inlining the hook fire would
15// (a) require threading the hook chain + executor registry into
16// the function signature (cascading into every caller) and
17// (b) duplicate the payload-marshalling logic between this and the
18// future G11 / G7+ wiring tasks. Pulling the firing into a single
19// function lets the call site stay a one-liner and keeps the
20// daemon-mode contract testable in isolation.
21//
22// # Daemon mode is mandatory in production
23//
24// `PreRecallExpand` is classified as [`crate::hooks::EventClass::HotPath`]
25// (50ms class deadline). A subprocess fork+exec on Linux costs
26// ~5-10ms cold and ~1-2ms warm; a single misbehaving exec-mode
27// hook would consume the entire budget before the child even
28// processes the payload. Operators MUST configure the hook in
29// `mode = "daemon"` — the chain's per-hook budget enforcement
30// (G6) ensures a misconfigured exec-mode hook still respects the
31// 50ms ceiling, but the operator-visible behaviour will be
32// "every recall trips the budget".
33
34use serde_json::Value;
35
36use super::chain::{ChainResult, HookChain};
37use super::events::{HookEvent, RecallExpandQuery};
38use super::executor::ExecutorRegistry;
39
40// ---------------------------------------------------------------------------
41// Outcome of running the pre_recall_expand chain
42// ---------------------------------------------------------------------------
43
44/// What the helper reports back to `handle_recall`.
45///
46/// `Allow` and `Modified` both let the recall proceed; the only
47/// difference is whether the in-flight `(query, namespace, k)`
48/// triple was rewritten by a hook. `Denied` halts the recall —
49/// the caller is expected to return an empty result with the
50/// `reason` surfaced via the recall response's `meta.diagnostic`
51/// block (G5's chain-level Deny semantics).
52#[derive(Debug, Clone, PartialEq, Eq)]
53pub enum PreRecallOutcome {
54    /// The chain returned `Allow` (or no hooks were configured).
55    /// The recall proceeds with the original triple.
56    Allow,
57    /// At least one hook returned `Modify`. The recall proceeds
58    /// with the rewritten triple — any of `query`, `namespace`,
59    /// `k` may have been changed.
60    Modified {
61        query: String,
62        namespace: String,
63        k: u32,
64    },
65    /// A hook returned `Deny`. The recall is short-circuited; the
66    /// caller surfaces an empty result with a diagnostic.
67    Denied { reason: String, code: i32 },
68}
69
70impl PreRecallOutcome {
71    /// The resolved query string the recall should run with. For
72    /// `Denied` the value is the *original* query — callers should
73    /// only use it for logging, not for the actual recall (the
74    /// recall must be skipped).
75    #[must_use]
76    pub fn query(&self, original: &str) -> String {
77        match self {
78            PreRecallOutcome::Allow | PreRecallOutcome::Denied { .. } => original.to_string(),
79            PreRecallOutcome::Modified { query, .. } => query.clone(),
80        }
81    }
82
83    /// The resolved namespace.
84    #[must_use]
85    pub fn namespace(&self, original: &str) -> String {
86        match self {
87            PreRecallOutcome::Allow | PreRecallOutcome::Denied { .. } => original.to_string(),
88            PreRecallOutcome::Modified { namespace, .. } => namespace.clone(),
89        }
90    }
91
92    /// The resolved limit.
93    #[must_use]
94    pub fn k(&self, original: u32) -> u32 {
95        match self {
96            PreRecallOutcome::Allow | PreRecallOutcome::Denied { .. } => original,
97            PreRecallOutcome::Modified { k, .. } => *k,
98        }
99    }
100
101    /// Whether the recall must be skipped (i.e. the chain Denied).
102    #[must_use]
103    pub fn is_denied(&self) -> bool {
104        matches!(self, PreRecallOutcome::Denied { .. })
105    }
106}
107
108// ---------------------------------------------------------------------------
109// apply_pre_recall_expand — fire the hot-path chain
110// ---------------------------------------------------------------------------
111
112/// Fire the [`HookEvent::PreRecallExpand`] chain on the recall hot
113/// path.
114///
115/// The hot-path budget is enforced by G6's chain runner (the chain's
116/// `fire` method stamps a 50ms wall-clock ceiling at entry; the
117/// caller does not need to add a second `tokio::time::timeout`
118/// around this call).
119///
120/// ## Modify semantics
121///
122/// A hook returns `HookDecision::Modify` with a [`super::events::MemoryDelta`]
123/// — but `MemoryDelta` was designed for the `pre_store` shape
124/// (memory fields). For `pre_recall_expand` we reuse three of its
125/// fields with overloaded meaning:
126///
127///   * `MemoryDelta::content`   → rewritten `query` text
128///   * `MemoryDelta::namespace` → rewritten `namespace`
129///   * `MemoryDelta::priority`  → rewritten `k` (cast `i32 → u32`,
130///     non-positive values fall back to the original `k`)
131///
132/// This overload is documented here rather than forking
133/// `MemoryDelta` into a per-event family because (a) the chain
134/// runner's delta merge is generic over `MemoryDelta` and forking
135/// would cascade through G5 + G6, and (b) the hot-path payload is
136/// narrow enough that a typed per-hook payload was rejected during
137/// G2 design discussion. Future G* tasks may revisit if more
138/// per-event payload shapes accrue.
139///
140/// ## Return shape
141///
142/// See [`PreRecallOutcome`]. `Allow` and `Modified` both let the
143/// recall proceed; `Denied` halts it and the caller surfaces the
144/// reason in the response's diagnostic block.
145pub async fn apply_pre_recall_expand(
146    query: &str,
147    namespace: &str,
148    k: u32,
149    chain: &HookChain,
150    registry: &mut ExecutorRegistry,
151) -> PreRecallOutcome {
152    // No hooks configured — fast path. The G6 chain runner would
153    // also early-return, but skipping the JSON marshal here keeps
154    // the no-hook recall path zero-overhead.
155    if chain.hooks().is_empty() {
156        return PreRecallOutcome::Allow;
157    }
158
159    let payload_struct = RecallExpandQuery {
160        query: query.to_string(),
161        namespace: namespace.to_string(),
162        k,
163    };
164    let payload = serde_json::to_value(&payload_struct).unwrap_or_else(|_| Value::Null);
165
166    let result = chain
167        .fire(HookEvent::PreRecallExpand, payload, registry)
168        .await;
169
170    match result {
171        ChainResult::Allow => PreRecallOutcome::Allow,
172        ChainResult::ModifiedAllow(delta) => {
173            let new_query = delta.content.unwrap_or_else(|| query.to_string());
174            let new_namespace = delta.namespace.unwrap_or_else(|| namespace.to_string());
175            let new_k = match delta.priority {
176                Some(p) if p > 0 => u32::try_from(p).unwrap_or(k),
177                _ => k,
178            };
179            PreRecallOutcome::Modified {
180                query: new_query,
181                namespace: new_namespace,
182                k: new_k,
183            }
184        }
185        ChainResult::Deny { reason, code } => PreRecallOutcome::Denied { reason, code },
186        ChainResult::AskUser { .. } => {
187            // Hot-path hooks can't pause for an operator prompt
188            // inside a 50ms budget; surface AskUser as Allow with
189            // a tracing warning so the misconfigured hook is
190            // visible without breaking the recall.
191            tracing::warn!(
192                "hooks: pre_recall_expand returned AskUser; degrading to Allow \
193                 (operator prompts are incompatible with the recall hot path)"
194            );
195            PreRecallOutcome::Allow
196        }
197    }
198}
199
200// ---------------------------------------------------------------------------
201// Tests
202// ---------------------------------------------------------------------------
203
204#[cfg(test)]
205mod tests {
206    use super::*;
207
208    #[test]
209    fn outcome_allow_uses_original_triple() {
210        let o = PreRecallOutcome::Allow;
211        assert_eq!(o.query("orig"), "orig");
212        assert_eq!(o.namespace("ns"), "ns");
213        assert_eq!(o.k(7), 7);
214        assert!(!o.is_denied());
215    }
216
217    #[test]
218    fn outcome_modified_returns_rewritten_triple() {
219        let o = PreRecallOutcome::Modified {
220            query: "rewrite".into(),
221            namespace: "team/x".into(),
222            k: 25,
223        };
224        assert_eq!(o.query("orig"), "rewrite");
225        assert_eq!(o.namespace("ns"), "team/x");
226        assert_eq!(o.k(7), 25);
227        assert!(!o.is_denied());
228    }
229
230    #[test]
231    fn outcome_denied_falls_back_to_original_for_logging() {
232        let o = PreRecallOutcome::Denied {
233            reason: "blocked".into(),
234            code: 451,
235        };
236        // Caller should NOT actually run the recall — but for
237        // logging the original triple is what we surface.
238        assert_eq!(o.query("orig"), "orig");
239        assert_eq!(o.namespace("ns"), "ns");
240        assert_eq!(o.k(7), 7);
241        assert!(o.is_denied());
242    }
243
244    #[tokio::test]
245    async fn empty_chain_is_allow_fast_path() {
246        let chain = HookChain::new(vec![]);
247        let mut reg = ExecutorRegistry::new();
248        let out = apply_pre_recall_expand("hello", "default", 10, &chain, &mut reg).await;
249        assert_eq!(out, PreRecallOutcome::Allow);
250    }
251}