Skip to main content

mnemo_codemode/
runner.rs

1//! Host-side execution path for code-mode recall.
2//!
3//! The "guest program" is a pre-built sequence of host import calls
4//! the LLM-generated wasm would have made. Today the binary in
5//! mnemo-cli builds the program from CLI args; tomorrow the wasmtime
6//! runner (gated under the `wasm` feature) compiles + executes a
7//! real WIT guest. Either way the host-side contract is the same:
8//! a [`GuestProgram`] is consumed against the [`MemStore`]-shaped
9//! callable, producing a [`RecallBundle`] with the cited memories
10//! plus token-cost accounting.
11
12use std::time::Duration;
13
14use serde::{Deserialize, Serialize};
15use thiserror::Error;
16
17/// Resource limits the wasm sandbox is parameterised by. Defaults
18/// chosen so a runaway guest cannot DOS the host: 10M fuel, 64
19/// pages (4 MiB), 50 ms wall.
20#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
21pub struct ResourceBudget {
22    pub fuel: u64,
23    pub mem_pages: u32,
24    pub wall: Duration,
25}
26
27impl Default for ResourceBudget {
28    fn default() -> Self {
29        Self {
30            fuel: 10_000_000,
31            mem_pages: 64,
32            wall: Duration::from_millis(50),
33        }
34    }
35}
36
37/// One step a guest program asks the host to run. Mirrors the WIT
38/// world's `store` interface (`recall`, `score`, `cite`).
39#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
40pub enum RecallStep {
41    Recall { query: String, k: u32 },
42    Score { memory_id: String },
43    Cite { memory_id: String },
44}
45
46/// Bundle of host-import calls a guest program asks for. The host
47/// runs them in order and records what it returned in
48/// [`RecallBundle`].
49#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
50pub struct GuestProgram {
51    pub steps: Vec<RecallStep>,
52}
53
54/// What the guest program produces. The CLI hands `final_answer`
55/// back to the LLM; the bundle's other fields land in the audit
56/// trail so an offline auditor can replay the recall.
57#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
58pub struct RecallBundle {
59    pub recalled: Vec<RecallHit>,
60    pub final_answer: String,
61    /// Estimated token cost the guest paid talking to the host.
62    /// Compare this to [`json_mode_token_estimate`] to show the
63    /// savings.
64    pub guest_token_cost: usize,
65}
66
67#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
68pub struct RecallHit {
69    pub id: String,
70    pub content: String,
71    pub score: f32,
72}
73
74#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
75pub struct CodeModeRecall {
76    pub program: GuestProgram,
77    pub budget: ResourceBudget,
78}
79
80#[derive(Debug, Error, PartialEq)]
81pub enum CodeModeError {
82    #[error("guest fuel exhausted ({budget} units consumed)")]
83    Halted { budget: u64 },
84    #[error("guest exceeded wall-time budget {budget:?}")]
85    WallTimeExceeded { budget: Duration },
86    #[error("guest tried to access {capability} which is stripped from the sandbox")]
87    SandboxViolation { capability: &'static str },
88    #[error("guest emitted no recall steps — refusing an empty bundle")]
89    EmptyProgram,
90}
91
92/// Trait the host exposes to the guest. Mirrors the WIT
93/// `store` interface so swapping in the wasmtime path keeps the
94/// same contract.
95pub trait HostStore: Send + Sync {
96    fn recall(&self, query: &str, k: u32) -> Vec<RecallHit>;
97    fn score(&self, memory_id: &str) -> f32;
98    fn cite(&self, memory_id: &str) -> String;
99}
100
101/// Run a guest program against the host store. The wall-time and
102/// fuel budgets are enforced cooperatively on every step; the wasm
103/// sandbox enforces them preemptively under the `wasm` feature.
104pub fn run_code_mode_host(
105    program: &CodeModeRecall,
106    store: &dyn HostStore,
107) -> Result<RecallBundle, CodeModeError> {
108    if program.program.steps.is_empty() {
109        return Err(CodeModeError::EmptyProgram);
110    }
111    let start = std::time::Instant::now();
112    let mut fuel_used = 0u64;
113    let mut recalled = Vec::new();
114    let mut answer_parts = Vec::new();
115    for step in &program.program.steps {
116        // Each host import costs a fixed fuel quantum. The wasm
117        // path will additionally meter wasm instructions; for the
118        // host-only path this is enough to catch runaway programs.
119        fuel_used = fuel_used.saturating_add(1_000_000);
120        if fuel_used > program.budget.fuel {
121            return Err(CodeModeError::Halted {
122                budget: program.budget.fuel,
123            });
124        }
125        if start.elapsed() > program.budget.wall {
126            return Err(CodeModeError::WallTimeExceeded {
127                budget: program.budget.wall,
128            });
129        }
130        match step {
131            RecallStep::Recall { query, k } => {
132                let hits = store.recall(query, *k);
133                for h in &hits {
134                    answer_parts.push(format!("- {}", h.content));
135                }
136                recalled.extend(hits);
137            }
138            RecallStep::Score { memory_id } => {
139                let _ = store.score(memory_id);
140            }
141            RecallStep::Cite { memory_id } => {
142                let _ = store.cite(memory_id);
143            }
144        }
145    }
146    let final_answer = if answer_parts.is_empty() {
147        "(no relevant memories)".to_string()
148    } else {
149        answer_parts.join("\n")
150    };
151    let guest_token_cost =
152        crate::token::estimate_tokens(&final_answer) + program.program.steps.len() * 4; // ~4 tokens per host call
153    Ok(RecallBundle {
154        recalled,
155        final_answer,
156        guest_token_cost,
157    })
158}
159
160#[cfg(test)]
161mod tests {
162    use super::*;
163
164    struct StubStore;
165    impl HostStore for StubStore {
166        fn recall(&self, q: &str, k: u32) -> Vec<RecallHit> {
167            (0..k.min(3))
168                .map(|i| RecallHit {
169                    id: format!("m{i}"),
170                    content: format!("answer to '{q}' #{i}"),
171                    score: 1.0 - (i as f32) * 0.1,
172                })
173                .collect()
174        }
175        fn score(&self, _: &str) -> f32 {
176            0.5
177        }
178        fn cite(&self, mid: &str) -> String {
179            format!("receipt-for-{mid}")
180        }
181    }
182
183    #[test]
184    fn empty_program_is_rejected() {
185        let req = CodeModeRecall {
186            program: GuestProgram { steps: vec![] },
187            budget: ResourceBudget::default(),
188        };
189        let err = run_code_mode_host(&req, &StubStore).unwrap_err();
190        assert_eq!(err, CodeModeError::EmptyProgram);
191    }
192
193    #[test]
194    fn fuel_exhaust_halts() {
195        // Default budget = 10M fuel, each step burns 1M; 12 steps
196        // exceeds the budget on step 11 (after fuel_used > 10M).
197        let req = CodeModeRecall {
198            program: GuestProgram {
199                steps: vec![
200                    RecallStep::Recall {
201                        query: "x".into(),
202                        k: 1,
203                    };
204                    12
205                ],
206            },
207            budget: ResourceBudget::default(),
208        };
209        let err = run_code_mode_host(&req, &StubStore).unwrap_err();
210        assert!(matches!(err, CodeModeError::Halted { .. }));
211    }
212
213    #[test]
214    fn happy_path_returns_bundle() {
215        let req = CodeModeRecall {
216            program: GuestProgram {
217                steps: vec![RecallStep::Recall {
218                    query: "patient fatigue".into(),
219                    k: 3,
220                }],
221            },
222            budget: ResourceBudget::default(),
223        };
224        let bundle = run_code_mode_host(&req, &StubStore).unwrap();
225        assert_eq!(bundle.recalled.len(), 3);
226        assert!(bundle.final_answer.contains("answer to"));
227    }
228
229    #[test]
230    fn wall_time_budget_can_be_exceeded() {
231        // Budget zero forces an immediate wall-time violation on
232        // step 2 (step 1 always completes before the elapsed check).
233        let req = CodeModeRecall {
234            program: GuestProgram {
235                steps: vec![
236                    RecallStep::Recall {
237                        query: "x".into(),
238                        k: 1,
239                    };
240                    2
241                ],
242            },
243            budget: ResourceBudget {
244                wall: Duration::from_nanos(0),
245                ..ResourceBudget::default()
246            },
247        };
248        // Sleep a hair so step 2's elapsed > 0.
249        std::thread::sleep(Duration::from_millis(1));
250        let err = run_code_mode_host(&req, &StubStore).unwrap_err();
251        assert!(matches!(err, CodeModeError::WallTimeExceeded { .. }));
252    }
253}