Skip to main content

shadow_core/replay/
mock.rs

1//! [`MockLlm`] — deterministic backend that replays recorded responses.
2//!
3//! Given a baseline trace, MockLlm indexes every `chat_response` record by
4//! its parent `chat_request` id and serves it back on demand. Because the
5//! request id is `sha256(canonical_json(payload))` (SPEC §6), a request
6//! with the same payload as one in the baseline always hits the mock — no
7//! "fuzzy matching" or fallback is attempted in strict mode.
8//!
9//! Use this in CI, tests, and the offline demo. For running new
10//! configurations against live providers, see future backends in
11//! `python/src/shadow/llm/`.
12
13use std::collections::HashMap;
14
15use async_trait::async_trait;
16use serde_json::Value;
17
18use crate::agentlog::{Kind, Record};
19use crate::replay::backend::{LlmBackend, LlmError};
20
21/// Deterministic backend that replays recorded responses.
22pub struct MockLlm {
23    id: String,
24    /// request_id → response payload.
25    responses: HashMap<String, Value>,
26}
27
28impl MockLlm {
29    /// Build from a baseline trace.
30    pub fn from_trace(trace: &[Record]) -> Self {
31        let mut responses = HashMap::new();
32        for record in trace {
33            if record.kind == Kind::ChatResponse {
34                if let Some(parent_id) = &record.parent {
35                    responses.insert(parent_id.clone(), record.payload.clone());
36                }
37            }
38        }
39        Self {
40            id: "mock".to_string(),
41            responses,
42        }
43    }
44
45    /// Build from multiple traces (trace set).
46    pub fn from_traces<'a, I: IntoIterator<Item = &'a [Record]>>(traces: I) -> Self {
47        let mut responses = HashMap::new();
48        for trace in traces {
49            for record in trace {
50                if record.kind == Kind::ChatResponse {
51                    if let Some(parent_id) = &record.parent {
52                        responses.insert(parent_id.clone(), record.payload.clone());
53                    }
54                }
55            }
56        }
57        Self {
58            id: "mock".to_string(),
59            responses,
60        }
61    }
62
63    /// Override the backend's `id()` string (useful when running multiple
64    /// Mock variants in the same session).
65    pub fn with_id(mut self, id: impl Into<String>) -> Self {
66        self.id = id.into();
67        self
68    }
69
70    /// Number of request→response pairs the mock knows about.
71    pub fn len(&self) -> usize {
72        self.responses.len()
73    }
74
75    /// Whether the mock has no recorded responses at all.
76    pub fn is_empty(&self) -> bool {
77        self.responses.is_empty()
78    }
79}
80
81#[async_trait]
82impl LlmBackend for MockLlm {
83    async fn complete(&self, request: &Value) -> Result<Value, LlmError> {
84        let request_id = crate::agentlog::hash::content_id(request);
85        self.responses
86            .get(&request_id)
87            .cloned()
88            .ok_or(LlmError::MissingResponse(request_id))
89    }
90
91    fn id(&self) -> &str {
92        &self.id
93    }
94}
95
96#[cfg(test)]
97mod tests {
98    use super::*;
99    use crate::agentlog::{hash, Kind, Record};
100    use serde_json::json;
101
102    fn tiny_trace() -> Vec<Record> {
103        let meta = Record::new(
104            Kind::Metadata,
105            json!({"sdk": {"name": "shadow", "version": "0.1.0"}}),
106            "2026-04-21T10:00:00Z",
107            None,
108        );
109        let req_payload = json!({"model": "claude-opus-4-7", "messages": [], "params": {}});
110        let req = Record::new(
111            Kind::ChatRequest,
112            req_payload.clone(),
113            "2026-04-21T10:00:00.100Z",
114            Some(meta.id.clone()),
115        );
116        let resp = Record::new(
117            Kind::ChatResponse,
118            json!({"model": "claude-opus-4-7", "content": [{"text": "Hi!", "type": "text"}], "stop_reason": "end_turn", "latency_ms": 1, "usage": {"input_tokens": 1, "output_tokens": 1, "thinking_tokens": 0}}),
119            "2026-04-21T10:00:00.500Z",
120            Some(req.id.clone()),
121        );
122        vec![meta, req, resp]
123    }
124
125    #[tokio::test]
126    async fn recorded_request_returns_recorded_response() {
127        let trace = tiny_trace();
128        let req_payload = trace[1].payload.clone();
129        let expected_resp_payload = trace[2].payload.clone();
130
131        let mock = MockLlm::from_trace(&trace);
132        assert_eq!(mock.len(), 1);
133        assert_eq!(mock.id(), "mock");
134
135        let got = mock.complete(&req_payload).await.unwrap();
136        assert_eq!(got, expected_resp_payload);
137    }
138
139    #[tokio::test]
140    async fn unrecorded_request_returns_missing_error() {
141        let trace = tiny_trace();
142        let mock = MockLlm::from_trace(&trace);
143        let unknown = json!({"model": "gpt-5", "messages": [], "params": {}});
144        let unknown_id = hash::content_id(&unknown);
145        match mock.complete(&unknown).await {
146            Err(LlmError::MissingResponse(id)) => assert_eq!(id, unknown_id),
147            other => panic!("expected MissingResponse, got {other:?}"),
148        }
149    }
150
151    #[tokio::test]
152    async fn key_by_content_id_collapses_identical_payloads() {
153        // Two request-response pairs with byte-identical request payloads
154        // but different timestamps — the mock should deduplicate to one
155        // entry by content id.
156        let trace = tiny_trace();
157        let mut extended = trace.clone();
158        let req2 = Record::new(
159            Kind::ChatRequest,
160            trace[1].payload.clone(),
161            "2026-04-21T11:00:00Z",
162            Some(trace[0].id.clone()),
163        );
164        let resp2 = Record::new(
165            Kind::ChatResponse,
166            trace[2].payload.clone(),
167            "2026-04-21T11:00:00.500Z",
168            Some(req2.id.clone()),
169        );
170        extended.push(req2);
171        extended.push(resp2);
172        let mock = MockLlm::from_trace(&extended);
173        assert_eq!(mock.len(), 1); // collapsed
174    }
175
176    #[tokio::test]
177    async fn from_traces_merges_multiple_sources() {
178        let t1 = tiny_trace();
179        // Build a second trace with a different request.
180        let meta2 = Record::new(
181            Kind::Metadata,
182            json!({"sdk": {"name": "shadow", "version": "0.1.0"}, "tags": {"env": "other"}}),
183            "2026-04-21T12:00:00Z",
184            None,
185        );
186        let req2 = Record::new(
187            Kind::ChatRequest,
188            json!({"model": "claude-opus-4-7", "messages": [{"role": "user", "content": "hi"}], "params": {}}),
189            "2026-04-21T12:00:00.100Z",
190            Some(meta2.id.clone()),
191        );
192        let resp2 = Record::new(
193            Kind::ChatResponse,
194            json!({"model": "claude-opus-4-7", "content": [{"text": "hello", "type": "text"}], "stop_reason": "end_turn", "latency_ms": 1, "usage": {"input_tokens": 1, "output_tokens": 1, "thinking_tokens": 0}}),
195            "2026-04-21T12:00:00.500Z",
196            Some(req2.id.clone()),
197        );
198        let t2 = vec![meta2, req2, resp2];
199        let mock = MockLlm::from_traces([t1.as_slice(), t2.as_slice()]);
200        assert_eq!(mock.len(), 2);
201    }
202
203    #[tokio::test]
204    async fn empty_trace_produces_empty_mock() {
205        let mock = MockLlm::from_trace(&[]);
206        assert!(mock.is_empty());
207    }
208
209    #[tokio::test]
210    async fn with_id_overrides_default() {
211        let mock = MockLlm::from_trace(&[]).with_id("my-mock");
212        assert_eq!(mock.id(), "my-mock");
213    }
214}