Skip to main content

harn_vm/llm/
mock.rs

1use crate::value::VmDictExt;
2use std::cell::RefCell;
3use std::collections::{BTreeMap, BTreeSet};
4
5use super::api::{LlmResult, ProviderTelemetry};
6use crate::orchestration::ToolCallRecord;
7use crate::value::{ErrorCategory, VmError, VmValue};
8
9/// LLM replay mode.
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum LlmReplayMode {
12    Off,
13    Record,
14    Replay,
15}
16
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18enum CliLlmMockMode {
19    Off,
20    Replay,
21    Record,
22}
23
24/// Categorized error injected by a mock. When present, the mock
25/// short-circuits the provider call and surfaces as
26/// `VmError::CategorizedError`, so `llm_call` throws and
27/// `llm_call_safe` populates its `error` envelope.
28#[derive(Clone)]
29pub struct MockError {
30    pub category: ErrorCategory,
31    pub message: String,
32    pub status: Option<u16>,
33    pub kind: Option<String>,
34    pub reason: Option<String>,
35    /// Optional retry hint. Provider-envelope mocks put this directly
36    /// on the thrown dict; legacy category-only mocks embed it in the
37    /// message so the live-provider parser path still exercises the
38    /// same extraction code.
39    pub retry_after_ms: Option<u64>,
40}
41
42impl MockError {
43    fn has_provider_envelope(&self) -> bool {
44        self.status.is_some() || self.kind.is_some() || self.reason.is_some()
45    }
46}
47
48pub(crate) fn build_mock_error(
49    category: Option<String>,
50    message: Option<String>,
51    status: Option<u16>,
52    kind: Option<String>,
53    reason: Option<String>,
54    retry_after_ms: Option<u64>,
55) -> Result<MockError, String> {
56    if retry_after_ms.is_some_and(|ms| ms > i64::MAX as u64) {
57        return Err("error.retry_after_ms must fit in a signed 64-bit integer".to_string());
58    }
59    let kind = match kind {
60        Some(value) if value.trim().is_empty() => None,
61        Some(value) => {
62            let normalized = value.trim().to_ascii_lowercase();
63            if super::api::LlmErrorKind::parse(&normalized).is_none() {
64                return Err(format!("unknown error kind `{value}`"));
65            }
66            Some(normalized)
67        }
68        None => None,
69    };
70    let reason = reason.and_then(|value| {
71        let trimmed = value.trim();
72        if trimmed.is_empty() {
73            None
74        } else {
75            Some(trimmed.to_string())
76        }
77    });
78    let category_was_provided = category.is_some();
79    let category = match category {
80        Some(value) if value.trim().is_empty() => {
81            return Err("error.category must not be empty".to_string());
82        }
83        Some(value) => {
84            let normalized = value.trim().to_ascii_lowercase();
85            let category = ErrorCategory::parse(&normalized);
86            if category.as_str() != normalized {
87                return Err(format!("unknown error category `{value}`"));
88            }
89            category
90        }
91        None => infer_mock_error_category(status, kind.as_deref(), reason.as_deref()),
92    };
93    if !category_was_provided && kind.is_none() && status.is_none() && reason.is_none() {
94        return Err(
95            "error.category is required unless error.status, error.kind, or error.reason is set"
96                .to_string(),
97        );
98    }
99    Ok(MockError {
100        category,
101        message: message.unwrap_or_else(|| {
102            default_mock_error_message(status, kind.as_deref(), reason.as_deref())
103        }),
104        status,
105        kind,
106        reason,
107        retry_after_ms,
108    })
109}
110
111pub(crate) fn validate_mock_error_status(status: i64) -> Result<u16, String> {
112    let status = u16::try_from(status)
113        .map_err(|_| "error.status must be an HTTP status code".to_string())?;
114    reqwest::StatusCode::from_u16(status)
115        .map_err(|_| "error.status must be an HTTP status code".to_string())?;
116    Ok(status)
117}
118
119fn infer_mock_error_category(
120    status: Option<u16>,
121    kind: Option<&str>,
122    reason: Option<&str>,
123) -> ErrorCategory {
124    if let Some(status) = status {
125        match status {
126            401 | 403 => return ErrorCategory::Auth,
127            404 | 410 => return ErrorCategory::NotFound,
128            408 | 504 | 522 | 524 => return ErrorCategory::Timeout,
129            429 => return ErrorCategory::RateLimit,
130            503 | 529 => return ErrorCategory::Overloaded,
131            500 | 502 => return ErrorCategory::ServerError,
132            _ => {}
133        }
134    }
135    if let Some(reason) = reason {
136        match reason {
137            "rate_limit" => return ErrorCategory::RateLimit,
138            "timeout" => return ErrorCategory::Timeout,
139            "network_error" | "transient_network" => return ErrorCategory::TransientNetwork,
140            "server_error" | "provider_error" | "provider_5xx" | "upstream_unavailable" => {
141                return ErrorCategory::ServerError;
142            }
143            "auth_failure" => return ErrorCategory::Auth,
144            "model_unavailable" => return ErrorCategory::NotFound,
145            _ => {}
146        }
147    }
148    if kind == Some("transient") {
149        return ErrorCategory::ServerError;
150    }
151    ErrorCategory::Generic
152}
153
154fn default_mock_error_message(
155    status: Option<u16>,
156    kind: Option<&str>,
157    reason: Option<&str>,
158) -> String {
159    match (status, kind, reason) {
160        (Some(status), Some(kind), Some(reason)) => {
161            format!("HTTP {status} mock LLM error ({kind}/{reason})")
162        }
163        (Some(status), _, Some(reason)) => format!("HTTP {status} mock LLM error ({reason})"),
164        (Some(status), _, _) => format!("HTTP {status} mock LLM error"),
165        (None, Some(kind), Some(reason)) => format!("mock LLM error ({kind}/{reason})"),
166        (None, Some(kind), None) => format!("mock LLM error ({kind})"),
167        (None, None, Some(reason)) => format!("mock LLM error ({reason})"),
168        (None, None, None) => String::new(),
169    }
170}
171
172#[derive(Clone)]
173pub struct LlmMock {
174    pub text: String,
175    pub tool_calls: Vec<serde_json::Value>,
176    pub match_pattern: Option<String>, // None = FIFO (consumed), Some = glob (reusable)
177    pub consume_on_match: bool,
178    pub input_tokens: Option<i64>,
179    pub output_tokens: Option<i64>,
180    pub cache_read_tokens: Option<i64>,
181    pub cache_write_tokens: Option<i64>,
182    pub thinking: Option<String>,
183    pub thinking_summary: Option<String>,
184    pub stop_reason: Option<String>,
185    pub model: String,
186    pub provider: Option<String>,
187    pub blocks: Option<Vec<serde_json::Value>>,
188    pub logprobs: Vec<serde_json::Value>,
189    /// When `Some`, this mock synthesizes an error instead of an
190    /// `LlmResult`. `text`/`tool_calls` are ignored for error mocks.
191    pub error: Option<MockError>,
192}
193
194#[derive(Clone)]
195pub(crate) struct LlmMockCall {
196    pub api_mode: String,
197    pub messages: Vec<serde_json::Value>,
198    pub system: Option<String>,
199    pub tools: Option<Vec<serde_json::Value>>,
200    pub provider_tools: Option<Vec<serde_json::Value>>,
201    pub tool_choice: Option<serde_json::Value>,
202    pub output_format: serde_json::Value,
203    pub thinking: serde_json::Value,
204    pub previous_response_id: Option<String>,
205    pub store: Option<bool>,
206    pub background: Option<bool>,
207    pub truncation: Option<String>,
208    pub compact: Option<bool>,
209    pub include: Option<Vec<String>>,
210    pub max_tool_calls: Option<i64>,
211}
212
213type LlmMockScope = (Vec<LlmMock>, Vec<LlmMockCall>, BTreeSet<String>);
214
215thread_local! {
216    static LLM_REPLAY_MODE: RefCell<LlmReplayMode> = const { RefCell::new(LlmReplayMode::Off) };
217    static LLM_FIXTURE_DIR: RefCell<String> = const { RefCell::new(String::new()) };
218    static TOOL_RECORDINGS: RefCell<Vec<ToolCallRecord>> = const { RefCell::new(Vec::new()) };
219    static LLM_MOCKS: RefCell<Vec<LlmMock>> = const { RefCell::new(Vec::new()) };
220    static CLI_LLM_MOCK_MODE: RefCell<CliLlmMockMode> = const { RefCell::new(CliLlmMockMode::Off) };
221    static CLI_LLM_MOCKS: RefCell<Vec<LlmMock>> = const { RefCell::new(Vec::new()) };
222    static CLI_LLM_RECORDINGS: RefCell<Vec<LlmMock>> = const { RefCell::new(Vec::new()) };
223    static LLM_MOCK_CALLS: RefCell<Vec<LlmMockCall>> = const { RefCell::new(Vec::new()) };
224    static LLM_PROMPT_CACHE: RefCell<BTreeSet<String>> = const { RefCell::new(BTreeSet::new()) };
225    static LLM_MOCK_SCOPES: RefCell<Vec<LlmMockScope>> = const { RefCell::new(Vec::new()) };
226}
227
228pub(crate) fn push_llm_mock(mock: LlmMock) {
229    LLM_MOCKS.with(|v| v.borrow_mut().push(mock));
230}
231
232pub(crate) fn get_llm_mock_calls() -> Vec<LlmMockCall> {
233    LLM_MOCK_CALLS.with(|v| v.borrow().clone())
234}
235
236pub(crate) fn builtin_llm_mock_active() -> bool {
237    LLM_MOCKS.with(|v| !v.borrow().is_empty())
238}
239
240pub(crate) fn reset_llm_mock_state() {
241    LLM_MOCKS.with(|v| v.borrow_mut().clear());
242    CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Off);
243    CLI_LLM_MOCKS.with(|v| v.borrow_mut().clear());
244    CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
245    LLM_MOCK_CALLS.with(|v| v.borrow_mut().clear());
246    LLM_PROMPT_CACHE.with(|v| v.borrow_mut().clear());
247    LLM_MOCK_SCOPES.with(|v| v.borrow_mut().clear());
248}
249
250/// Save the current builtin LLM mock queue and recorded-calls list, then
251/// start a fresh empty scope. Paired with `pop_llm_mock_scope`. Backs
252/// the `with_llm_mocks` helper in `std/testing` so tests reliably
253/// roll back to the prior state, including when the body throws.
254pub(crate) fn push_llm_mock_scope() {
255    let mocks = LLM_MOCKS.with(|v| std::mem::take(&mut *v.borrow_mut()));
256    let calls = LLM_MOCK_CALLS.with(|v| std::mem::take(&mut *v.borrow_mut()));
257    let cache = LLM_PROMPT_CACHE.with(|v| std::mem::take(&mut *v.borrow_mut()));
258    LLM_MOCK_SCOPES.with(|v| v.borrow_mut().push((mocks, calls, cache)));
259}
260
261/// Restore the most recently pushed builtin LLM mock scope. Returns
262/// `false` when there is nothing to pop, so the builtin can surface a
263/// clear "imbalanced scope" error rather than silently corrupting
264/// state. CLI-installed mocks are intentionally untouched: they are an
265/// outer harness and should not flicker on each per-test scope swap.
266pub(crate) fn pop_llm_mock_scope() -> bool {
267    let entry = LLM_MOCK_SCOPES.with(|v| v.borrow_mut().pop());
268    match entry {
269        Some((mocks, calls, cache)) => {
270            LLM_MOCKS.with(|v| *v.borrow_mut() = mocks);
271            LLM_MOCK_CALLS.with(|v| *v.borrow_mut() = calls);
272            LLM_PROMPT_CACHE.with(|v| *v.borrow_mut() = cache);
273            true
274        }
275        None => false,
276    }
277}
278
279pub fn clear_cli_llm_mock_mode() {
280    CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Off);
281    CLI_LLM_MOCKS.with(|v| v.borrow_mut().clear());
282    CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
283}
284
285pub fn install_cli_llm_mocks(mocks: Vec<LlmMock>) {
286    CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Replay);
287    CLI_LLM_MOCKS.with(|v| *v.borrow_mut() = mocks);
288    CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
289}
290
291pub fn enable_cli_llm_mock_recording() {
292    CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Record);
293    CLI_LLM_MOCKS.with(|v| v.borrow_mut().clear());
294    CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
295}
296
297pub fn take_cli_llm_recordings() -> Vec<LlmMock> {
298    CLI_LLM_RECORDINGS.with(|v| std::mem::take(&mut *v.borrow_mut()))
299}
300
301pub(crate) fn cli_llm_mock_replay_active() -> bool {
302    CLI_LLM_MOCK_MODE.with(|v| *v.borrow() == CliLlmMockMode::Replay)
303}
304
305fn record_llm_mock_call(request: &super::api::LlmRequestPayload) {
306    LLM_MOCK_CALLS.with(|v| {
307        v.borrow_mut().push(LlmMockCall {
308            api_mode: request.api_mode.as_str().to_string(),
309            messages: request.messages.clone(),
310            system: request.system.clone(),
311            tools: request.native_tools.clone(),
312            provider_tools: if request.provider_tools.is_empty() {
313                None
314            } else {
315                Some(request.provider_tools.clone())
316            },
317            tool_choice: request.tool_choice.clone(),
318            output_format: serde_json::to_value(&request.output_format).unwrap_or_else(|_| {
319                serde_json::json!({
320                    "kind": "text"
321                })
322            }),
323            thinking: serde_json::to_value(&request.thinking).unwrap_or_else(|_| {
324                serde_json::json!({
325                    "mode": "disabled"
326                })
327            }),
328            previous_response_id: request.previous_response_id.clone(),
329            store: request.store,
330            background: request.background,
331            truncation: request.truncation.clone(),
332            compact: request.compact,
333            include: request.include.clone(),
334            max_tool_calls: request.max_tool_calls,
335        });
336    });
337}
338
339/// Build an LlmResult from a matched mock.
340fn build_mock_result(mock: &LlmMock, last_msg_len: usize) -> LlmResult {
341    let (tool_calls, blocks) = if let Some(blocks) = &mock.blocks {
342        (mock.tool_calls.clone(), blocks.clone())
343    } else {
344        let mut blocks = Vec::new();
345
346        if !mock.text.is_empty() {
347            blocks.push(serde_json::json!({
348                "type": "output_text",
349                "text": mock.text,
350                "visibility": "public",
351            }));
352        }
353
354        let mut tool_calls = Vec::new();
355        for (i, tc) in mock.tool_calls.iter().enumerate() {
356            let id = format!("mock_call_{}", i + 1);
357            let name = tc.get("name").and_then(|n| n.as_str()).unwrap_or("unknown");
358            let arguments = tc
359                .get("arguments")
360                .cloned()
361                .unwrap_or(serde_json::json!({}));
362            tool_calls.push(serde_json::json!({
363                "id": id,
364                "type": "tool_call",
365                "name": name,
366                "arguments": arguments,
367            }));
368            blocks.push(serde_json::json!({
369                "type": "tool_call",
370                "id": id,
371                "name": name,
372                "arguments": arguments,
373                "visibility": "internal",
374            }));
375        }
376
377        (tool_calls, blocks)
378    };
379
380    LlmResult {
381        served_fast: false,
382        text: mock.text.clone(),
383        tool_calls,
384        input_tokens: mock.input_tokens.unwrap_or(last_msg_len as i64),
385        output_tokens: mock.output_tokens.unwrap_or(30),
386        cache_read_tokens: mock.cache_read_tokens.unwrap_or(0),
387        cache_write_tokens: mock.cache_write_tokens.unwrap_or(0),
388        cache_supported: true,
389        model: mock.model.clone(),
390        provider: mock.provider.clone().unwrap_or_else(|| "mock".to_string()),
391        thinking: mock.thinking.clone(),
392        thinking_summary: mock.thinking_summary.clone(),
393        stop_reason: mock.stop_reason.clone(),
394        blocks,
395        logprobs: mock.logprobs.clone(),
396        telemetry: ProviderTelemetry::default(),
397    }
398}
399
400// Mock prompt patterns match free prose, where `?`/`[`/`{` are ordinary
401// characters — only `*` is a wildcard. The shared prose matcher keeps that
402// contract (`*`-only ordered segments).
403use harn_glob::match_prose as mock_glob_match;
404
405fn collect_mock_match_strings(value: &serde_json::Value, out: &mut Vec<String>) {
406    match value {
407        serde_json::Value::String(text) if !text.is_empty() => out.push(text.clone()),
408        serde_json::Value::String(_) => {}
409        serde_json::Value::Array(items) => {
410            for item in items {
411                collect_mock_match_strings(item, out);
412            }
413        }
414        serde_json::Value::Object(map) => {
415            for value in map.values() {
416                collect_mock_match_strings(value, out);
417            }
418        }
419        _ => {}
420    }
421}
422
423fn mock_match_text(messages: &[serde_json::Value]) -> String {
424    let mut parts = Vec::new();
425    for message in messages {
426        collect_mock_match_strings(message, &mut parts);
427    }
428    parts.join("\n")
429}
430
431fn mock_last_prompt_text(messages: &[serde_json::Value]) -> String {
432    for message in messages.iter().rev() {
433        let Some(content) = message.get("content") else {
434            continue;
435        };
436        let mut parts = Vec::new();
437        collect_mock_match_strings(content, &mut parts);
438        let text = parts.join("\n");
439        if !text.trim().is_empty() {
440            return text;
441        }
442    }
443    String::new()
444}
445
446fn mock_prompt_cache_key(
447    model: &str,
448    messages: &[serde_json::Value],
449    system: Option<&str>,
450) -> String {
451    serde_json::to_string(&serde_json::json!({
452        "model": model,
453        "system": system,
454        "messages": messages,
455    }))
456    .unwrap_or_default()
457}
458
459fn apply_mock_prompt_cache(result: &mut LlmResult, cache_key: &str) {
460    if result.cache_read_tokens > 0 || result.cache_write_tokens > 0 {
461        return;
462    }
463    let cache_tokens = result.input_tokens.max(0);
464    if cache_tokens == 0 {
465        return;
466    }
467    let cache_hit = LLM_PROMPT_CACHE.with(|cache| {
468        let mut cache = cache.borrow_mut();
469        if cache.contains(cache_key) {
470            true
471        } else {
472            cache.insert(cache_key.to_string());
473            false
474        }
475    });
476    if cache_hit {
477        result.cache_read_tokens = cache_tokens;
478    } else {
479        result.cache_write_tokens = cache_tokens;
480    }
481}
482
483/// Convert a mock's `error` payload into the `VmError` that the
484/// provider path would have raised, so classification, retry, and
485/// `error_category` all behave identically to a real failure.
486fn mock_error_to_vm_error(err: &MockError) -> VmError {
487    let message = mock_error_message(err);
488    if err.has_provider_envelope() {
489        let classified = super::api::classify_llm_error(err.category.clone(), &message);
490        let mut dict = BTreeMap::new();
491        dict.put_str("category", err.category.as_str());
492        dict.put_str(
493            "kind",
494            err.kind
495                .as_deref()
496                .unwrap_or_else(|| classified.kind.as_str()),
497        );
498        dict.put_str(
499            "reason",
500            err.reason
501                .as_deref()
502                .unwrap_or_else(|| classified.reason.as_str()),
503        );
504        dict.put_str("message", message);
505        if let Some(status) = err.status {
506            dict.insert("status".to_string(), VmValue::Int(i64::from(status)));
507        }
508        if let Some(retry_after_ms) = err.retry_after_ms {
509            dict.insert(
510                "retry_after_ms".to_string(),
511                VmValue::Int(retry_after_ms as i64),
512            );
513        }
514        return VmError::Thrown(VmValue::dict(dict));
515    }
516
517    VmError::CategorizedError {
518        message,
519        category: err.category.clone(),
520    }
521}
522
523fn mock_error_message(err: &MockError) -> String {
524    // Embed legacy category-only retry hints into the message so the
525    // same parser that handles live provider headers populates
526    // `retry_after_ms` on the final thrown dict.
527    let Some(ms) = err.retry_after_ms else {
528        return err.message.clone();
529    };
530    if err.has_provider_envelope() {
531        return err.message.clone();
532    }
533    let secs = (ms as f64 / 1000.0).max(0.0);
534    let sep = if err.message.is_empty() || err.message.ends_with('\n') {
535        ""
536    } else {
537        "\n"
538    };
539    format!("{}{sep}retry-after: {secs}\n", err.message)
540}
541
542/// Try to find and return a matching mock response. Returns
543/// `Some(Ok(LlmResult))` on a text/tool_call match, `Some(Err(VmError))`
544/// on an error-mock match, and `None` to fall through to default.
545fn try_match_mock_queue(
546    mocks: &mut Vec<LlmMock>,
547    match_text: &str,
548) -> Option<Result<LlmResult, VmError>> {
549    if let Some(idx) = mocks.iter().position(|m| m.match_pattern.is_none()) {
550        let mock = mocks.remove(idx);
551        return Some(match &mock.error {
552            Some(err) => Err(mock_error_to_vm_error(err)),
553            None => Ok(build_mock_result(&mock, match_text.len())),
554        });
555    }
556
557    for idx in 0..mocks.len() {
558        let mock = &mocks[idx];
559        if let Some(ref pattern) = mock.match_pattern {
560            if mock_glob_match(pattern, match_text) {
561                if mock.consume_on_match {
562                    let mock = mocks.remove(idx);
563                    return Some(match &mock.error {
564                        Some(err) => Err(mock_error_to_vm_error(err)),
565                        None => Ok(build_mock_result(&mock, match_text.len())),
566                    });
567                }
568                return Some(match &mock.error {
569                    Some(err) => Err(mock_error_to_vm_error(err)),
570                    None => Ok(build_mock_result(mock, match_text.len())),
571                });
572            }
573        }
574    }
575
576    None
577}
578
579fn try_match_builtin_mock(match_text: &str) -> Option<Result<LlmResult, VmError>> {
580    LLM_MOCKS.with(|mocks| try_match_mock_queue(&mut mocks.borrow_mut(), match_text))
581}
582
583fn try_match_cli_mock(match_text: &str) -> Option<Result<LlmResult, VmError>> {
584    CLI_LLM_MOCKS.with(|mocks| try_match_mock_queue(&mut mocks.borrow_mut(), match_text))
585}
586
587pub(crate) fn record_cli_llm_result(result: &LlmResult) {
588    record_unified_tape_llm_call(result);
589    if !CLI_LLM_MOCK_MODE.with(|mode| *mode.borrow() == CliLlmMockMode::Record) {
590        return;
591    }
592    CLI_LLM_RECORDINGS.with(|recordings| {
593        recordings.borrow_mut().push(LlmMock {
594            text: result.text.clone(),
595            tool_calls: result.tool_calls.clone(),
596            match_pattern: None,
597            consume_on_match: false,
598            input_tokens: Some(result.input_tokens),
599            output_tokens: Some(result.output_tokens),
600            cache_read_tokens: Some(result.cache_read_tokens),
601            cache_write_tokens: Some(result.cache_write_tokens),
602            thinking: result.thinking.clone(),
603            thinking_summary: result.thinking_summary.clone(),
604            stop_reason: result.stop_reason.clone(),
605            model: result.model.clone(),
606            provider: Some(result.provider.clone()),
607            blocks: Some(result.blocks.clone()),
608            logprobs: result.logprobs.clone(),
609            error: None,
610        });
611    });
612}
613
614/// Append an `LlmCall` record to the unified-tape recorder when one is
615/// active. The request digest is built from the most recently recorded
616/// `LlmMockCall` so the same hashing surface used for fixture matching
617/// drives the fidelity oracle's request comparison; falls back to a
618/// hash of the response text alone when no matching call is on record
619/// (e.g. when `record_llm_mock_call` was bypassed).
620fn record_unified_tape_llm_call(result: &LlmResult) {
621    if crate::testbench::tape::active_recorder().is_none() {
622        return;
623    }
624    let response_json = serde_json::to_vec(result).unwrap_or_else(|_| Vec::new());
625    let request_digest = LLM_MOCK_CALLS
626        .with(|calls| calls.borrow().last().cloned())
627        .map(|call| {
628            let mut request = serde_json::Map::new();
629            request.insert("messages".to_string(), serde_json::json!(call.messages));
630            request.insert("system".to_string(), serde_json::json!(call.system));
631            request.insert("tools".to_string(), serde_json::json!(call.tools));
632            request.insert(
633                "tool_choice".to_string(),
634                serde_json::json!(call.tool_choice),
635            );
636            request.insert("thinking".to_string(), serde_json::json!(call.thinking));
637            request.insert("model".to_string(), serde_json::json!(result.model));
638            if call.api_mode != "chat_completions" {
639                request.insert("api_mode".to_string(), serde_json::json!(call.api_mode));
640            }
641            if call.provider_tools.is_some() {
642                request.insert(
643                    "provider_tools".to_string(),
644                    serde_json::json!(call.provider_tools),
645                );
646            }
647            if call
648                .output_format
649                .get("kind")
650                .and_then(|value| value.as_str())
651                != Some("text")
652            {
653                request.insert(
654                    "output_format".to_string(),
655                    serde_json::json!(call.output_format),
656                );
657            }
658            if call.previous_response_id.is_some() {
659                request.insert(
660                    "previous_response_id".to_string(),
661                    serde_json::json!(call.previous_response_id),
662                );
663            }
664            if call.store.is_some() {
665                request.insert("store".to_string(), serde_json::json!(call.store));
666            }
667            if call.background.is_some() {
668                request.insert("background".to_string(), serde_json::json!(call.background));
669            }
670            if call.truncation.is_some() {
671                request.insert("truncation".to_string(), serde_json::json!(call.truncation));
672            }
673            if call.compact.is_some() {
674                request.insert("compact".to_string(), serde_json::json!(call.compact));
675            }
676            if call.include.is_some() {
677                request.insert("include".to_string(), serde_json::json!(call.include));
678            }
679            if call.max_tool_calls.is_some() {
680                request.insert(
681                    "max_tool_calls".to_string(),
682                    serde_json::json!(call.max_tool_calls),
683                );
684            }
685            let serialized =
686                serde_json::to_vec(&serde_json::Value::Object(request)).unwrap_or_default();
687            crate::testbench::tape::content_hash(&serialized)
688        })
689        .unwrap_or_else(|| {
690            // Fall back to hashing the response — keeps fidelity comparable
691            // across runs even when the request surface wasn't captured.
692            crate::testbench::tape::content_hash(result.text.as_bytes())
693        });
694    crate::testbench::tape::with_active_recorder(|recorder| {
695        let response = recorder.payload_from_bytes(response_json);
696        Some(crate::testbench::tape::TapeRecordKind::LlmCall {
697            request_digest,
698            response,
699        })
700    });
701}
702
703fn unmatched_cli_prompt_error(match_text: &str) -> VmError {
704    let mut snippet: String = match_text.chars().take(200).collect();
705    if match_text.chars().count() > 200 {
706        snippet.push_str("...");
707    }
708    VmError::Runtime(format!("No --llm-mock fixture matched prompt: {snippet:?}"))
709}
710
711/// Set LLM replay mode (record/replay) and fixture directory.
712pub fn set_replay_mode(mode: LlmReplayMode, fixture_dir: &str) {
713    LLM_REPLAY_MODE.with(|v| *v.borrow_mut() = mode);
714    LLM_FIXTURE_DIR.with(|v| *v.borrow_mut() = fixture_dir.to_string());
715}
716
717pub(crate) fn get_replay_mode() -> LlmReplayMode {
718    LLM_REPLAY_MODE.with(|v| *v.borrow())
719}
720
721pub(crate) fn get_fixture_dir() -> String {
722    LLM_FIXTURE_DIR.with(|v| v.borrow().clone())
723}
724
725/// Hash a request for fixture file naming using canonical JSON serialization.
726pub(crate) fn fixture_hash(
727    model: &str,
728    messages: &[serde_json::Value],
729    system: Option<&str>,
730) -> String {
731    use std::hash::{Hash, Hasher};
732    let mut hasher = std::collections::hash_map::DefaultHasher::new();
733    model.hash(&mut hasher);
734    // Canonical JSON hashing is stable across Debug-format changes.
735    serde_json::to_string(messages)
736        .unwrap_or_default()
737        .hash(&mut hasher);
738    system.hash(&mut hasher);
739    format!("{:016x}", hasher.finish())
740}
741
742pub(crate) fn save_fixture(hash: &str, result: &LlmResult) {
743    let dir = get_fixture_dir();
744    if dir.is_empty() {
745        return;
746    }
747    let _ = std::fs::create_dir_all(&dir);
748    let path = format!("{dir}/{hash}.json");
749    let json = serde_json::json!({
750        "text": result.text,
751        "tool_calls": result.tool_calls,
752        "input_tokens": result.input_tokens,
753        "output_tokens": result.output_tokens,
754        "cache_read_tokens": result.cache_read_tokens,
755        "cache_write_tokens": result.cache_write_tokens,
756        "cache_creation_input_tokens": result.cache_write_tokens,
757        "model": result.model,
758        "provider": result.provider,
759        "thinking": result.thinking,
760        "thinking_summary": result.thinking_summary,
761        "stop_reason": result.stop_reason,
762        "blocks": result.blocks,
763        "logprobs": result.logprobs,
764    });
765    let _ = std::fs::write(
766        &path,
767        serde_json::to_string_pretty(&json).unwrap_or_default(),
768    );
769}
770
771pub(crate) fn load_fixture(hash: &str) -> Option<LlmResult> {
772    let dir = get_fixture_dir();
773    if dir.is_empty() {
774        return None;
775    }
776    let path = format!("{dir}/{hash}.json");
777    let content = std::fs::read_to_string(&path).ok()?;
778    let json: serde_json::Value = serde_json::from_str(&content).ok()?;
779    Some(LlmResult {
780        served_fast: false,
781        text: json["text"].as_str().unwrap_or("").to_string(),
782        tool_calls: json["tool_calls"].as_array().cloned().unwrap_or_default(),
783        input_tokens: json["input_tokens"].as_i64().unwrap_or(0),
784        output_tokens: json["output_tokens"].as_i64().unwrap_or(0),
785        cache_read_tokens: json["cache_read_tokens"].as_i64().unwrap_or(0),
786        cache_write_tokens: json["cache_write_tokens"]
787            .as_i64()
788            .or_else(|| json["cache_creation_input_tokens"].as_i64())
789            .unwrap_or(0),
790        cache_supported: json["cache_supported"].as_bool().unwrap_or(true),
791        model: json["model"].as_str().unwrap_or("").to_string(),
792        provider: json["provider"].as_str().unwrap_or("mock").to_string(),
793        thinking: json["thinking"].as_str().map(|s| s.to_string()),
794        thinking_summary: json["thinking_summary"].as_str().map(|s| s.to_string()),
795        stop_reason: json["stop_reason"].as_str().map(|s| s.to_string()),
796        blocks: json["blocks"].as_array().cloned().unwrap_or_default(),
797        logprobs: json["logprobs"].as_array().cloned().unwrap_or_default(),
798        telemetry: serde_json::from_value(json["telemetry"].clone()).unwrap_or_default(),
799    })
800}
801
802/// Generate stub argument values for required parameters in a tool schema.
803/// This makes mock tool calls realistic — a real model would always fill
804/// required fields, so the mock should too.
805fn mock_required_args(tool_schema: &serde_json::Value) -> serde_json::Value {
806    let mut args = serde_json::Map::new();
807    // Anthropic: {name, input_schema: {properties, required}}
808    // OpenAI:    {function: {name, parameters: {properties, required}}}
809    // Harn VM:   {parameters: {name: {type, required}}}  (from tool_define)
810    let input_schema = tool_schema
811        .get("input_schema")
812        .or_else(|| tool_schema.get("inputSchema"))
813        .or_else(|| {
814            tool_schema
815                .get("function")
816                .and_then(|f| f.get("parameters"))
817        })
818        .or_else(|| tool_schema.get("parameters"));
819    let Some(schema) = input_schema else {
820        return serde_json::Value::Object(args);
821    };
822    let required: std::collections::BTreeSet<String> = schema
823        .get("required")
824        .and_then(|r| r.as_array())
825        .map(|arr| {
826            arr.iter()
827                .filter_map(|v| v.as_str().map(|s| s.to_string()))
828                .collect()
829        })
830        .unwrap_or_default();
831    if let Some(props) = schema.get("properties").and_then(|p| p.as_object()) {
832        for (name, prop) in props {
833            if !required.contains(name) {
834                continue;
835            }
836            let ty = prop
837                .get("type")
838                .and_then(|t| t.as_str())
839                .unwrap_or("string");
840            let placeholder = match ty {
841                "integer" => serde_json::json!(0),
842                "number" => serde_json::json!(0.0),
843                "boolean" => serde_json::json!(false),
844                "array" => serde_json::json!([]),
845                "object" => serde_json::json!({}),
846                _ => serde_json::json!(""),
847            };
848            args.insert(name.clone(), placeholder);
849        }
850    }
851    serde_json::Value::Object(args)
852}
853
854fn mock_tool_name(tool: &serde_json::Value) -> Option<&str> {
855    tool.get("name")
856        .or_else(|| {
857            tool.get("function")
858                .and_then(|function| function.get("name"))
859        })
860        .and_then(|name| name.as_str())
861}
862
863fn mock_auto_tool_candidate(tools: &[serde_json::Value]) -> Option<&serde_json::Value> {
864    tools
865        .iter()
866        .find(|tool| mock_tool_name(tool) != Some("agent_await_resumption"))
867}
868
869/// Mock LLM provider -- deterministic responses for testing without API keys.
870/// When configurable mocks have been registered via `llm_mock()`, those are
871/// checked first (FIFO queue, then pattern matching). Falls through to the
872/// default deterministic behavior when no mocks match.
873pub(crate) fn mock_llm_response(
874    request: &super::api::LlmRequestPayload,
875) -> Result<LlmResult, VmError> {
876    record_llm_mock_call(request);
877
878    let messages = &request.messages;
879    let system = request.system.as_deref();
880    let match_text = mock_match_text(messages);
881    let prompt_text = mock_last_prompt_text(messages);
882    let cache_key = mock_prompt_cache_key(&request.model, messages, system);
883
884    if let Some(matched) = try_match_cli_mock(&match_text) {
885        return matched.map(|mut result| {
886            if request.cache {
887                apply_mock_prompt_cache(&mut result, &cache_key);
888            }
889            result
890        });
891    }
892
893    if let Some(matched) = try_match_builtin_mock(&match_text) {
894        return matched.map(|mut result| {
895            if request.cache {
896                apply_mock_prompt_cache(&mut result, &cache_key);
897            }
898            result
899        });
900    }
901
902    if cli_llm_mock_replay_active() {
903        return Err(unmatched_cli_prompt_error(&match_text));
904    }
905
906    // Generate a mock tool call for the first tool, filling required
907    // params with placeholders so the call passes schema validation.
908    if let Some(tools) = request.native_tools.as_deref() {
909        if let Some(first_tool) = mock_auto_tool_candidate(tools) {
910            let tool_name = mock_tool_name(first_tool).unwrap_or("unknown");
911            let mock_args = mock_required_args(first_tool);
912            let mut result = LlmResult {
913                served_fast: false,
914                text: String::new(),
915                tool_calls: vec![serde_json::json!({
916                        "id": "mock_call_1",
917                        "type": "tool_call",
918                        "name": tool_name,
919                "arguments": mock_args
920                })],
921                input_tokens: prompt_text.len() as i64,
922                output_tokens: 20,
923                cache_read_tokens: 0,
924                cache_write_tokens: 0,
925                cache_supported: true,
926                model: request.model.clone(),
927                provider: "mock".to_string(),
928                thinking: None,
929                thinking_summary: None,
930                stop_reason: None,
931                blocks: vec![serde_json::json!({
932                    "type": "tool_call",
933                    "id": "mock_call_1",
934                    "name": tool_name,
935                    "arguments": mock_args,
936                    "visibility": "internal",
937                })],
938                logprobs: Vec::new(),
939                telemetry: ProviderTelemetry::default(),
940            };
941            if request.cache {
942                apply_mock_prompt_cache(&mut result, &cache_key);
943            }
944            return Ok(result);
945        }
946    }
947
948    // Preserve the historical auto-complete behavior for tagged text-tool
949    // prompts only. Bare `##DONE##` in no-tool/native prompts changes
950    // loop semantics by completing runs that used to exhaust budget unless
951    // a fixture explicitly returned the sentinel.
952    let tagged_done = system.is_some_and(|s| s.contains("<done>"));
953
954    let prose_body = if prompt_text.is_empty() {
955        "Mock LLM response".to_string()
956    } else {
957        let word_count = prompt_text.split_whitespace().count();
958        format!(
959            "Mock response to {word_count}-word prompt: {}",
960            prompt_text.chars().take(100).collect::<String>()
961        )
962    };
963    let response = if tagged_done {
964        format!("<assistant_prose>{prose_body}</assistant_prose>\n<done>##DONE##</done>")
965    } else {
966        prose_body
967    };
968
969    let mut result = LlmResult {
970        served_fast: false,
971        text: response.clone(),
972        tool_calls: vec![],
973        input_tokens: prompt_text.len() as i64,
974        output_tokens: 30,
975        cache_read_tokens: 0,
976        cache_write_tokens: 0,
977        cache_supported: true,
978        model: request.model.clone(),
979        provider: "mock".to_string(),
980        thinking: None,
981        thinking_summary: None,
982        stop_reason: None,
983        blocks: vec![serde_json::json!({
984            "type": "output_text",
985            "text": response,
986            "visibility": "public",
987        })],
988        logprobs: Vec::new(),
989        telemetry: ProviderTelemetry::default(),
990    };
991    if request.cache {
992        apply_mock_prompt_cache(&mut result, &cache_key);
993    }
994    Ok(result)
995}
996
997/// Take all recorded tool calls, leaving the buffer empty.
998pub fn drain_tool_recordings() -> Vec<ToolCallRecord> {
999    TOOL_RECORDINGS.with(|v| std::mem::take(&mut *v.borrow_mut()))
1000}