Skip to main content

harn_vm/llm/
mock.rs

1use std::cell::RefCell;
2use std::collections::{BTreeMap, BTreeSet};
3use std::rc::Rc;
4
5use super::api::{LlmResult, ProviderTelemetry};
6use crate::orchestration::ToolCallRecord;
7use crate::value::{ErrorCategory, VmError, VmValue};
8
9/// LLM replay mode.
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum LlmReplayMode {
12    Off,
13    Record,
14    Replay,
15}
16
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18enum CliLlmMockMode {
19    Off,
20    Replay,
21    Record,
22}
23
24/// Categorized error injected by a mock. When present, the mock
25/// short-circuits the provider call and surfaces as
26/// `VmError::CategorizedError`, so `llm_call` throws and
27/// `llm_call_safe` populates its `error` envelope.
28#[derive(Clone)]
29pub struct MockError {
30    pub category: ErrorCategory,
31    pub message: String,
32    pub status: Option<u16>,
33    pub kind: Option<String>,
34    pub reason: Option<String>,
35    /// Optional retry hint. Provider-envelope mocks put this directly
36    /// on the thrown dict; legacy category-only mocks embed it in the
37    /// message so the live-provider parser path still exercises the
38    /// same extraction code.
39    pub retry_after_ms: Option<u64>,
40}
41
42impl MockError {
43    fn has_provider_envelope(&self) -> bool {
44        self.status.is_some() || self.kind.is_some() || self.reason.is_some()
45    }
46}
47
48pub(crate) fn build_mock_error(
49    category: Option<String>,
50    message: Option<String>,
51    status: Option<u16>,
52    kind: Option<String>,
53    reason: Option<String>,
54    retry_after_ms: Option<u64>,
55) -> Result<MockError, String> {
56    if retry_after_ms.is_some_and(|ms| ms > i64::MAX as u64) {
57        return Err("error.retry_after_ms must fit in a signed 64-bit integer".to_string());
58    }
59    let kind = match kind {
60        Some(value) if value.trim().is_empty() => None,
61        Some(value) => {
62            let normalized = value.trim().to_ascii_lowercase();
63            if super::api::LlmErrorKind::parse(&normalized).is_none() {
64                return Err(format!("unknown error kind `{value}`"));
65            }
66            Some(normalized)
67        }
68        None => None,
69    };
70    let reason = reason.and_then(|value| {
71        let trimmed = value.trim();
72        if trimmed.is_empty() {
73            None
74        } else {
75            Some(trimmed.to_string())
76        }
77    });
78    let category_was_provided = category.is_some();
79    let category = match category {
80        Some(value) if value.trim().is_empty() => {
81            return Err("error.category must not be empty".to_string());
82        }
83        Some(value) => {
84            let normalized = value.trim().to_ascii_lowercase();
85            let category = ErrorCategory::parse(&normalized);
86            if category.as_str() != normalized {
87                return Err(format!("unknown error category `{value}`"));
88            }
89            category
90        }
91        None => infer_mock_error_category(status, kind.as_deref(), reason.as_deref()),
92    };
93    if !category_was_provided && kind.is_none() && status.is_none() && reason.is_none() {
94        return Err(
95            "error.category is required unless error.status, error.kind, or error.reason is set"
96                .to_string(),
97        );
98    }
99    Ok(MockError {
100        category,
101        message: message.unwrap_or_else(|| {
102            default_mock_error_message(status, kind.as_deref(), reason.as_deref())
103        }),
104        status,
105        kind,
106        reason,
107        retry_after_ms,
108    })
109}
110
111pub(crate) fn validate_mock_error_status(status: i64) -> Result<u16, String> {
112    let status = u16::try_from(status)
113        .map_err(|_| "error.status must be an HTTP status code".to_string())?;
114    reqwest::StatusCode::from_u16(status)
115        .map_err(|_| "error.status must be an HTTP status code".to_string())?;
116    Ok(status)
117}
118
119fn infer_mock_error_category(
120    status: Option<u16>,
121    kind: Option<&str>,
122    reason: Option<&str>,
123) -> ErrorCategory {
124    if let Some(status) = status {
125        match status {
126            401 | 403 => return ErrorCategory::Auth,
127            404 | 410 => return ErrorCategory::NotFound,
128            408 | 504 | 522 | 524 => return ErrorCategory::Timeout,
129            429 => return ErrorCategory::RateLimit,
130            503 | 529 => return ErrorCategory::Overloaded,
131            500 | 502 => return ErrorCategory::ServerError,
132            _ => {}
133        }
134    }
135    if let Some(reason) = reason {
136        match reason {
137            "rate_limit" => return ErrorCategory::RateLimit,
138            "timeout" => return ErrorCategory::Timeout,
139            "network_error" | "transient_network" => return ErrorCategory::TransientNetwork,
140            "server_error" | "provider_error" | "provider_5xx" | "upstream_unavailable" => {
141                return ErrorCategory::ServerError;
142            }
143            "auth_failure" => return ErrorCategory::Auth,
144            "model_unavailable" => return ErrorCategory::NotFound,
145            _ => {}
146        }
147    }
148    if kind == Some("transient") {
149        return ErrorCategory::ServerError;
150    }
151    ErrorCategory::Generic
152}
153
154fn default_mock_error_message(
155    status: Option<u16>,
156    kind: Option<&str>,
157    reason: Option<&str>,
158) -> String {
159    match (status, kind, reason) {
160        (Some(status), Some(kind), Some(reason)) => {
161            format!("HTTP {status} mock LLM error ({kind}/{reason})")
162        }
163        (Some(status), _, Some(reason)) => format!("HTTP {status} mock LLM error ({reason})"),
164        (Some(status), _, _) => format!("HTTP {status} mock LLM error"),
165        (None, Some(kind), Some(reason)) => format!("mock LLM error ({kind}/{reason})"),
166        (None, Some(kind), None) => format!("mock LLM error ({kind})"),
167        (None, None, Some(reason)) => format!("mock LLM error ({reason})"),
168        (None, None, None) => String::new(),
169    }
170}
171
172#[derive(Clone)]
173pub struct LlmMock {
174    pub text: String,
175    pub tool_calls: Vec<serde_json::Value>,
176    pub match_pattern: Option<String>, // None = FIFO (consumed), Some = glob (reusable)
177    pub consume_on_match: bool,
178    pub input_tokens: Option<i64>,
179    pub output_tokens: Option<i64>,
180    pub cache_read_tokens: Option<i64>,
181    pub cache_write_tokens: Option<i64>,
182    pub thinking: Option<String>,
183    pub thinking_summary: Option<String>,
184    pub stop_reason: Option<String>,
185    pub model: String,
186    pub provider: Option<String>,
187    pub blocks: Option<Vec<serde_json::Value>>,
188    pub logprobs: Vec<serde_json::Value>,
189    /// When `Some`, this mock synthesizes an error instead of an
190    /// `LlmResult`. `text`/`tool_calls` are ignored for error mocks.
191    pub error: Option<MockError>,
192}
193
194#[derive(Clone)]
195pub(crate) struct LlmMockCall {
196    pub api_mode: String,
197    pub messages: Vec<serde_json::Value>,
198    pub system: Option<String>,
199    pub tools: Option<Vec<serde_json::Value>>,
200    pub provider_tools: Option<Vec<serde_json::Value>>,
201    pub tool_choice: Option<serde_json::Value>,
202    pub output_format: serde_json::Value,
203    pub thinking: serde_json::Value,
204    pub previous_response_id: Option<String>,
205    pub store: Option<bool>,
206    pub background: Option<bool>,
207    pub truncation: Option<String>,
208    pub compact: Option<bool>,
209    pub include: Option<Vec<String>>,
210    pub max_tool_calls: Option<i64>,
211}
212
213type LlmMockScope = (Vec<LlmMock>, Vec<LlmMockCall>, BTreeSet<String>);
214
215thread_local! {
216    static LLM_REPLAY_MODE: RefCell<LlmReplayMode> = const { RefCell::new(LlmReplayMode::Off) };
217    static LLM_FIXTURE_DIR: RefCell<String> = const { RefCell::new(String::new()) };
218    static TOOL_RECORDINGS: RefCell<Vec<ToolCallRecord>> = const { RefCell::new(Vec::new()) };
219    static LLM_MOCKS: RefCell<Vec<LlmMock>> = const { RefCell::new(Vec::new()) };
220    static CLI_LLM_MOCK_MODE: RefCell<CliLlmMockMode> = const { RefCell::new(CliLlmMockMode::Off) };
221    static CLI_LLM_MOCKS: RefCell<Vec<LlmMock>> = const { RefCell::new(Vec::new()) };
222    static CLI_LLM_RECORDINGS: RefCell<Vec<LlmMock>> = const { RefCell::new(Vec::new()) };
223    static LLM_MOCK_CALLS: RefCell<Vec<LlmMockCall>> = const { RefCell::new(Vec::new()) };
224    static LLM_PROMPT_CACHE: RefCell<BTreeSet<String>> = const { RefCell::new(BTreeSet::new()) };
225    static LLM_MOCK_SCOPES: RefCell<Vec<LlmMockScope>> = const { RefCell::new(Vec::new()) };
226}
227
228pub(crate) fn push_llm_mock(mock: LlmMock) {
229    LLM_MOCKS.with(|v| v.borrow_mut().push(mock));
230}
231
232pub(crate) fn get_llm_mock_calls() -> Vec<LlmMockCall> {
233    LLM_MOCK_CALLS.with(|v| v.borrow().clone())
234}
235
236pub(crate) fn builtin_llm_mock_active() -> bool {
237    LLM_MOCKS.with(|v| !v.borrow().is_empty())
238}
239
240pub(crate) fn reset_llm_mock_state() {
241    LLM_MOCKS.with(|v| v.borrow_mut().clear());
242    CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Off);
243    CLI_LLM_MOCKS.with(|v| v.borrow_mut().clear());
244    CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
245    LLM_MOCK_CALLS.with(|v| v.borrow_mut().clear());
246    LLM_PROMPT_CACHE.with(|v| v.borrow_mut().clear());
247    LLM_MOCK_SCOPES.with(|v| v.borrow_mut().clear());
248}
249
250/// Save the current builtin LLM mock queue and recorded-calls list, then
251/// start a fresh empty scope. Paired with `pop_llm_mock_scope`. Backs
252/// the `with_llm_mocks` helper in `std/testing` so tests reliably
253/// roll back to the prior state, including when the body throws.
254pub(crate) fn push_llm_mock_scope() {
255    let mocks = LLM_MOCKS.with(|v| std::mem::take(&mut *v.borrow_mut()));
256    let calls = LLM_MOCK_CALLS.with(|v| std::mem::take(&mut *v.borrow_mut()));
257    let cache = LLM_PROMPT_CACHE.with(|v| std::mem::take(&mut *v.borrow_mut()));
258    LLM_MOCK_SCOPES.with(|v| v.borrow_mut().push((mocks, calls, cache)));
259}
260
261/// Restore the most recently pushed builtin LLM mock scope. Returns
262/// `false` when there is nothing to pop, so the builtin can surface a
263/// clear "imbalanced scope" error rather than silently corrupting
264/// state. CLI-installed mocks are intentionally untouched: they are an
265/// outer harness and should not flicker on each per-test scope swap.
266pub(crate) fn pop_llm_mock_scope() -> bool {
267    let entry = LLM_MOCK_SCOPES.with(|v| v.borrow_mut().pop());
268    match entry {
269        Some((mocks, calls, cache)) => {
270            LLM_MOCKS.with(|v| *v.borrow_mut() = mocks);
271            LLM_MOCK_CALLS.with(|v| *v.borrow_mut() = calls);
272            LLM_PROMPT_CACHE.with(|v| *v.borrow_mut() = cache);
273            true
274        }
275        None => false,
276    }
277}
278
279pub fn clear_cli_llm_mock_mode() {
280    CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Off);
281    CLI_LLM_MOCKS.with(|v| v.borrow_mut().clear());
282    CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
283}
284
285pub fn install_cli_llm_mocks(mocks: Vec<LlmMock>) {
286    CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Replay);
287    CLI_LLM_MOCKS.with(|v| *v.borrow_mut() = mocks);
288    CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
289}
290
291pub fn enable_cli_llm_mock_recording() {
292    CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Record);
293    CLI_LLM_MOCKS.with(|v| v.borrow_mut().clear());
294    CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
295}
296
297pub fn take_cli_llm_recordings() -> Vec<LlmMock> {
298    CLI_LLM_RECORDINGS.with(|v| std::mem::take(&mut *v.borrow_mut()))
299}
300
301pub(crate) fn cli_llm_mock_replay_active() -> bool {
302    CLI_LLM_MOCK_MODE.with(|v| *v.borrow() == CliLlmMockMode::Replay)
303}
304
305fn record_llm_mock_call(request: &super::api::LlmRequestPayload) {
306    LLM_MOCK_CALLS.with(|v| {
307        v.borrow_mut().push(LlmMockCall {
308            api_mode: request.api_mode.as_str().to_string(),
309            messages: request.messages.clone(),
310            system: request.system.clone(),
311            tools: request.native_tools.clone(),
312            provider_tools: if request.provider_tools.is_empty() {
313                None
314            } else {
315                Some(request.provider_tools.clone())
316            },
317            tool_choice: request.tool_choice.clone(),
318            output_format: serde_json::to_value(&request.output_format).unwrap_or_else(|_| {
319                serde_json::json!({
320                    "kind": "text"
321                })
322            }),
323            thinking: serde_json::to_value(&request.thinking).unwrap_or_else(|_| {
324                serde_json::json!({
325                    "mode": "disabled"
326                })
327            }),
328            previous_response_id: request.previous_response_id.clone(),
329            store: request.store,
330            background: request.background,
331            truncation: request.truncation.clone(),
332            compact: request.compact,
333            include: request.include.clone(),
334            max_tool_calls: request.max_tool_calls,
335        });
336    });
337}
338
339/// Build an LlmResult from a matched mock.
340fn build_mock_result(mock: &LlmMock, last_msg_len: usize) -> LlmResult {
341    let (tool_calls, blocks) = if let Some(blocks) = &mock.blocks {
342        (mock.tool_calls.clone(), blocks.clone())
343    } else {
344        let mut blocks = Vec::new();
345
346        if !mock.text.is_empty() {
347            blocks.push(serde_json::json!({
348                "type": "output_text",
349                "text": mock.text,
350                "visibility": "public",
351            }));
352        }
353
354        let mut tool_calls = Vec::new();
355        for (i, tc) in mock.tool_calls.iter().enumerate() {
356            let id = format!("mock_call_{}", i + 1);
357            let name = tc.get("name").and_then(|n| n.as_str()).unwrap_or("unknown");
358            let arguments = tc
359                .get("arguments")
360                .cloned()
361                .unwrap_or(serde_json::json!({}));
362            tool_calls.push(serde_json::json!({
363                "id": id,
364                "type": "tool_call",
365                "name": name,
366                "arguments": arguments,
367            }));
368            blocks.push(serde_json::json!({
369                "type": "tool_call",
370                "id": id,
371                "name": name,
372                "arguments": arguments,
373                "visibility": "internal",
374            }));
375        }
376
377        (tool_calls, blocks)
378    };
379
380    LlmResult {
381        text: mock.text.clone(),
382        tool_calls,
383        input_tokens: mock.input_tokens.unwrap_or(last_msg_len as i64),
384        output_tokens: mock.output_tokens.unwrap_or(30),
385        cache_read_tokens: mock.cache_read_tokens.unwrap_or(0),
386        cache_write_tokens: mock.cache_write_tokens.unwrap_or(0),
387        model: mock.model.clone(),
388        provider: mock.provider.clone().unwrap_or_else(|| "mock".to_string()),
389        thinking: mock.thinking.clone(),
390        thinking_summary: mock.thinking_summary.clone(),
391        stop_reason: mock.stop_reason.clone(),
392        blocks,
393        logprobs: mock.logprobs.clone(),
394        telemetry: ProviderTelemetry::default(),
395    }
396}
397
398/// Multi-segment glob match: split on `*` and check segments appear in order.
399/// Handles `*`, `prefix*`, `*suffix`, `*contains*`, `pre*mid*suf`, etc.
400fn mock_glob_match(pattern: &str, text: &str) -> bool {
401    if pattern == "*" {
402        return true;
403    }
404    if !pattern.contains('*') {
405        return pattern == text;
406    }
407    let parts: Vec<&str> = pattern.split('*').collect();
408    let mut remaining = text;
409    for (i, part) in parts.iter().enumerate() {
410        if part.is_empty() {
411            continue;
412        }
413        if i == 0 {
414            if !remaining.starts_with(part) {
415                return false;
416            }
417            remaining = &remaining[part.len()..];
418        } else if i == parts.len() - 1 {
419            if !remaining.ends_with(part) {
420                return false;
421            }
422            remaining = "";
423        } else {
424            match remaining.find(part) {
425                Some(pos) => remaining = &remaining[pos + part.len()..],
426                None => return false,
427            }
428        }
429    }
430    true
431}
432
433fn collect_mock_match_strings(value: &serde_json::Value, out: &mut Vec<String>) {
434    match value {
435        serde_json::Value::String(text) if !text.is_empty() => out.push(text.clone()),
436        serde_json::Value::String(_) => {}
437        serde_json::Value::Array(items) => {
438            for item in items {
439                collect_mock_match_strings(item, out);
440            }
441        }
442        serde_json::Value::Object(map) => {
443            for value in map.values() {
444                collect_mock_match_strings(value, out);
445            }
446        }
447        _ => {}
448    }
449}
450
451fn mock_match_text(messages: &[serde_json::Value]) -> String {
452    let mut parts = Vec::new();
453    for message in messages {
454        collect_mock_match_strings(message, &mut parts);
455    }
456    parts.join("\n")
457}
458
459fn mock_last_prompt_text(messages: &[serde_json::Value]) -> String {
460    for message in messages.iter().rev() {
461        let Some(content) = message.get("content") else {
462            continue;
463        };
464        let mut parts = Vec::new();
465        collect_mock_match_strings(content, &mut parts);
466        let text = parts.join("\n");
467        if !text.trim().is_empty() {
468            return text;
469        }
470    }
471    String::new()
472}
473
474fn mock_prompt_cache_key(
475    model: &str,
476    messages: &[serde_json::Value],
477    system: Option<&str>,
478) -> String {
479    serde_json::to_string(&serde_json::json!({
480        "model": model,
481        "system": system,
482        "messages": messages,
483    }))
484    .unwrap_or_default()
485}
486
487fn apply_mock_prompt_cache(result: &mut LlmResult, cache_key: &str) {
488    if result.cache_read_tokens > 0 || result.cache_write_tokens > 0 {
489        return;
490    }
491    let cache_tokens = result.input_tokens.max(0);
492    if cache_tokens == 0 {
493        return;
494    }
495    let cache_hit = LLM_PROMPT_CACHE.with(|cache| {
496        let mut cache = cache.borrow_mut();
497        if cache.contains(cache_key) {
498            true
499        } else {
500            cache.insert(cache_key.to_string());
501            false
502        }
503    });
504    if cache_hit {
505        result.cache_read_tokens = cache_tokens;
506    } else {
507        result.cache_write_tokens = cache_tokens;
508    }
509}
510
511/// Convert a mock's `error` payload into the `VmError` that the
512/// provider path would have raised, so classification, retry, and
513/// `error_category` all behave identically to a real failure.
514fn mock_error_to_vm_error(err: &MockError) -> VmError {
515    let message = mock_error_message(err);
516    if err.has_provider_envelope() {
517        let classified = super::api::classify_llm_error(err.category.clone(), &message);
518        let mut dict = BTreeMap::new();
519        dict.insert(
520            "category".to_string(),
521            VmValue::String(Rc::from(err.category.as_str())),
522        );
523        dict.insert(
524            "kind".to_string(),
525            VmValue::String(Rc::from(
526                err.kind
527                    .as_deref()
528                    .unwrap_or_else(|| classified.kind.as_str()),
529            )),
530        );
531        dict.insert(
532            "reason".to_string(),
533            VmValue::String(Rc::from(
534                err.reason
535                    .as_deref()
536                    .unwrap_or_else(|| classified.reason.as_str()),
537            )),
538        );
539        dict.insert("message".to_string(), VmValue::String(Rc::from(message)));
540        if let Some(status) = err.status {
541            dict.insert("status".to_string(), VmValue::Int(i64::from(status)));
542        }
543        if let Some(retry_after_ms) = err.retry_after_ms {
544            dict.insert(
545                "retry_after_ms".to_string(),
546                VmValue::Int(retry_after_ms as i64),
547            );
548        }
549        return VmError::Thrown(VmValue::Dict(Rc::new(dict)));
550    }
551
552    VmError::CategorizedError {
553        message,
554        category: err.category.clone(),
555    }
556}
557
558fn mock_error_message(err: &MockError) -> String {
559    // Embed legacy category-only retry hints into the message so the
560    // same parser that handles live provider headers populates
561    // `retry_after_ms` on the final thrown dict.
562    let Some(ms) = err.retry_after_ms else {
563        return err.message.clone();
564    };
565    if err.has_provider_envelope() {
566        return err.message.clone();
567    }
568    let secs = (ms as f64 / 1000.0).max(0.0);
569    let sep = if err.message.is_empty() || err.message.ends_with('\n') {
570        ""
571    } else {
572        "\n"
573    };
574    format!("{}{sep}retry-after: {secs}\n", err.message)
575}
576
577/// Try to find and return a matching mock response. Returns
578/// `Some(Ok(LlmResult))` on a text/tool_call match, `Some(Err(VmError))`
579/// on an error-mock match, and `None` to fall through to default.
580fn try_match_mock_queue(
581    mocks: &mut Vec<LlmMock>,
582    match_text: &str,
583) -> Option<Result<LlmResult, VmError>> {
584    if let Some(idx) = mocks.iter().position(|m| m.match_pattern.is_none()) {
585        let mock = mocks.remove(idx);
586        return Some(match &mock.error {
587            Some(err) => Err(mock_error_to_vm_error(err)),
588            None => Ok(build_mock_result(&mock, match_text.len())),
589        });
590    }
591
592    for idx in 0..mocks.len() {
593        let mock = &mocks[idx];
594        if let Some(ref pattern) = mock.match_pattern {
595            if mock_glob_match(pattern, match_text) {
596                if mock.consume_on_match {
597                    let mock = mocks.remove(idx);
598                    return Some(match &mock.error {
599                        Some(err) => Err(mock_error_to_vm_error(err)),
600                        None => Ok(build_mock_result(&mock, match_text.len())),
601                    });
602                }
603                return Some(match &mock.error {
604                    Some(err) => Err(mock_error_to_vm_error(err)),
605                    None => Ok(build_mock_result(mock, match_text.len())),
606                });
607            }
608        }
609    }
610
611    None
612}
613
614fn try_match_builtin_mock(match_text: &str) -> Option<Result<LlmResult, VmError>> {
615    LLM_MOCKS.with(|mocks| try_match_mock_queue(&mut mocks.borrow_mut(), match_text))
616}
617
618fn try_match_cli_mock(match_text: &str) -> Option<Result<LlmResult, VmError>> {
619    CLI_LLM_MOCKS.with(|mocks| try_match_mock_queue(&mut mocks.borrow_mut(), match_text))
620}
621
622pub(crate) fn record_cli_llm_result(result: &LlmResult) {
623    record_unified_tape_llm_call(result);
624    if !CLI_LLM_MOCK_MODE.with(|mode| *mode.borrow() == CliLlmMockMode::Record) {
625        return;
626    }
627    CLI_LLM_RECORDINGS.with(|recordings| {
628        recordings.borrow_mut().push(LlmMock {
629            text: result.text.clone(),
630            tool_calls: result.tool_calls.clone(),
631            match_pattern: None,
632            consume_on_match: false,
633            input_tokens: Some(result.input_tokens),
634            output_tokens: Some(result.output_tokens),
635            cache_read_tokens: Some(result.cache_read_tokens),
636            cache_write_tokens: Some(result.cache_write_tokens),
637            thinking: result.thinking.clone(),
638            thinking_summary: result.thinking_summary.clone(),
639            stop_reason: result.stop_reason.clone(),
640            model: result.model.clone(),
641            provider: Some(result.provider.clone()),
642            blocks: Some(result.blocks.clone()),
643            logprobs: result.logprobs.clone(),
644            error: None,
645        });
646    });
647}
648
649/// Append an `LlmCall` record to the unified-tape recorder when one is
650/// active. The request digest is built from the most recently recorded
651/// `LlmMockCall` so the same hashing surface used for fixture matching
652/// drives the fidelity oracle's request comparison; falls back to a
653/// hash of the response text alone when no matching call is on record
654/// (e.g. when `record_llm_mock_call` was bypassed).
655fn record_unified_tape_llm_call(result: &LlmResult) {
656    if crate::testbench::tape::active_recorder().is_none() {
657        return;
658    }
659    let response_json = serde_json::to_vec(result).unwrap_or_else(|_| Vec::new());
660    let request_digest = LLM_MOCK_CALLS
661        .with(|calls| calls.borrow().last().cloned())
662        .map(|call| {
663            let mut request = serde_json::Map::new();
664            request.insert("messages".to_string(), serde_json::json!(call.messages));
665            request.insert("system".to_string(), serde_json::json!(call.system));
666            request.insert("tools".to_string(), serde_json::json!(call.tools));
667            request.insert(
668                "tool_choice".to_string(),
669                serde_json::json!(call.tool_choice),
670            );
671            request.insert("thinking".to_string(), serde_json::json!(call.thinking));
672            request.insert("model".to_string(), serde_json::json!(result.model));
673            if call.api_mode != "chat_completions" {
674                request.insert("api_mode".to_string(), serde_json::json!(call.api_mode));
675            }
676            if call.provider_tools.is_some() {
677                request.insert(
678                    "provider_tools".to_string(),
679                    serde_json::json!(call.provider_tools),
680                );
681            }
682            if call
683                .output_format
684                .get("kind")
685                .and_then(|value| value.as_str())
686                != Some("text")
687            {
688                request.insert(
689                    "output_format".to_string(),
690                    serde_json::json!(call.output_format),
691                );
692            }
693            if call.previous_response_id.is_some() {
694                request.insert(
695                    "previous_response_id".to_string(),
696                    serde_json::json!(call.previous_response_id),
697                );
698            }
699            if call.store.is_some() {
700                request.insert("store".to_string(), serde_json::json!(call.store));
701            }
702            if call.background.is_some() {
703                request.insert("background".to_string(), serde_json::json!(call.background));
704            }
705            if call.truncation.is_some() {
706                request.insert("truncation".to_string(), serde_json::json!(call.truncation));
707            }
708            if call.compact.is_some() {
709                request.insert("compact".to_string(), serde_json::json!(call.compact));
710            }
711            if call.include.is_some() {
712                request.insert("include".to_string(), serde_json::json!(call.include));
713            }
714            if call.max_tool_calls.is_some() {
715                request.insert(
716                    "max_tool_calls".to_string(),
717                    serde_json::json!(call.max_tool_calls),
718                );
719            }
720            let serialized =
721                serde_json::to_vec(&serde_json::Value::Object(request)).unwrap_or_default();
722            crate::testbench::tape::content_hash(&serialized)
723        })
724        .unwrap_or_else(|| {
725            // Fall back to hashing the response — keeps fidelity comparable
726            // across runs even when the request surface wasn't captured.
727            crate::testbench::tape::content_hash(result.text.as_bytes())
728        });
729    crate::testbench::tape::with_active_recorder(|recorder| {
730        let response = recorder.payload_from_bytes(response_json);
731        Some(crate::testbench::tape::TapeRecordKind::LlmCall {
732            request_digest,
733            response,
734        })
735    });
736}
737
738fn unmatched_cli_prompt_error(match_text: &str) -> VmError {
739    let mut snippet: String = match_text.chars().take(200).collect();
740    if match_text.chars().count() > 200 {
741        snippet.push_str("...");
742    }
743    VmError::Runtime(format!("No --llm-mock fixture matched prompt: {snippet:?}"))
744}
745
746/// Set LLM replay mode (record/replay) and fixture directory.
747pub fn set_replay_mode(mode: LlmReplayMode, fixture_dir: &str) {
748    LLM_REPLAY_MODE.with(|v| *v.borrow_mut() = mode);
749    LLM_FIXTURE_DIR.with(|v| *v.borrow_mut() = fixture_dir.to_string());
750}
751
752pub(crate) fn get_replay_mode() -> LlmReplayMode {
753    LLM_REPLAY_MODE.with(|v| *v.borrow())
754}
755
756pub(crate) fn get_fixture_dir() -> String {
757    LLM_FIXTURE_DIR.with(|v| v.borrow().clone())
758}
759
760/// Hash a request for fixture file naming using canonical JSON serialization.
761pub(crate) fn fixture_hash(
762    model: &str,
763    messages: &[serde_json::Value],
764    system: Option<&str>,
765) -> String {
766    use std::hash::{Hash, Hasher};
767    let mut hasher = std::collections::hash_map::DefaultHasher::new();
768    model.hash(&mut hasher);
769    // Canonical JSON hashing is stable across Debug-format changes.
770    serde_json::to_string(messages)
771        .unwrap_or_default()
772        .hash(&mut hasher);
773    system.hash(&mut hasher);
774    format!("{:016x}", hasher.finish())
775}
776
777pub(crate) fn save_fixture(hash: &str, result: &LlmResult) {
778    let dir = get_fixture_dir();
779    if dir.is_empty() {
780        return;
781    }
782    let _ = std::fs::create_dir_all(&dir);
783    let path = format!("{dir}/{hash}.json");
784    let json = serde_json::json!({
785        "text": result.text,
786        "tool_calls": result.tool_calls,
787        "input_tokens": result.input_tokens,
788        "output_tokens": result.output_tokens,
789        "cache_read_tokens": result.cache_read_tokens,
790        "cache_write_tokens": result.cache_write_tokens,
791        "cache_creation_input_tokens": result.cache_write_tokens,
792        "model": result.model,
793        "provider": result.provider,
794        "thinking": result.thinking,
795        "thinking_summary": result.thinking_summary,
796        "stop_reason": result.stop_reason,
797        "blocks": result.blocks,
798        "logprobs": result.logprobs,
799    });
800    let _ = std::fs::write(
801        &path,
802        serde_json::to_string_pretty(&json).unwrap_or_default(),
803    );
804}
805
806pub(crate) fn load_fixture(hash: &str) -> Option<LlmResult> {
807    let dir = get_fixture_dir();
808    if dir.is_empty() {
809        return None;
810    }
811    let path = format!("{dir}/{hash}.json");
812    let content = std::fs::read_to_string(&path).ok()?;
813    let json: serde_json::Value = serde_json::from_str(&content).ok()?;
814    Some(LlmResult {
815        text: json["text"].as_str().unwrap_or("").to_string(),
816        tool_calls: json["tool_calls"].as_array().cloned().unwrap_or_default(),
817        input_tokens: json["input_tokens"].as_i64().unwrap_or(0),
818        output_tokens: json["output_tokens"].as_i64().unwrap_or(0),
819        cache_read_tokens: json["cache_read_tokens"].as_i64().unwrap_or(0),
820        cache_write_tokens: json["cache_write_tokens"]
821            .as_i64()
822            .or_else(|| json["cache_creation_input_tokens"].as_i64())
823            .unwrap_or(0),
824        model: json["model"].as_str().unwrap_or("").to_string(),
825        provider: json["provider"].as_str().unwrap_or("mock").to_string(),
826        thinking: json["thinking"].as_str().map(|s| s.to_string()),
827        thinking_summary: json["thinking_summary"].as_str().map(|s| s.to_string()),
828        stop_reason: json["stop_reason"].as_str().map(|s| s.to_string()),
829        blocks: json["blocks"].as_array().cloned().unwrap_or_default(),
830        logprobs: json["logprobs"].as_array().cloned().unwrap_or_default(),
831        telemetry: serde_json::from_value(json["telemetry"].clone()).unwrap_or_default(),
832    })
833}
834
835/// Generate stub argument values for required parameters in a tool schema.
836/// This makes mock tool calls realistic — a real model would always fill
837/// required fields, so the mock should too.
838fn mock_required_args(tool_schema: &serde_json::Value) -> serde_json::Value {
839    let mut args = serde_json::Map::new();
840    // Anthropic: {name, input_schema: {properties, required}}
841    // OpenAI:    {function: {name, parameters: {properties, required}}}
842    // Harn VM:   {parameters: {name: {type, required}}}  (from tool_define)
843    let input_schema = tool_schema
844        .get("input_schema")
845        .or_else(|| tool_schema.get("inputSchema"))
846        .or_else(|| {
847            tool_schema
848                .get("function")
849                .and_then(|f| f.get("parameters"))
850        })
851        .or_else(|| tool_schema.get("parameters"));
852    let Some(schema) = input_schema else {
853        return serde_json::Value::Object(args);
854    };
855    let required: std::collections::BTreeSet<String> = schema
856        .get("required")
857        .and_then(|r| r.as_array())
858        .map(|arr| {
859            arr.iter()
860                .filter_map(|v| v.as_str().map(|s| s.to_string()))
861                .collect()
862        })
863        .unwrap_or_default();
864    if let Some(props) = schema.get("properties").and_then(|p| p.as_object()) {
865        for (name, prop) in props {
866            if !required.contains(name) {
867                continue;
868            }
869            let ty = prop
870                .get("type")
871                .and_then(|t| t.as_str())
872                .unwrap_or("string");
873            let placeholder = match ty {
874                "integer" => serde_json::json!(0),
875                "number" => serde_json::json!(0.0),
876                "boolean" => serde_json::json!(false),
877                "array" => serde_json::json!([]),
878                "object" => serde_json::json!({}),
879                _ => serde_json::json!(""),
880            };
881            args.insert(name.clone(), placeholder);
882        }
883    }
884    serde_json::Value::Object(args)
885}
886
887fn mock_tool_name(tool: &serde_json::Value) -> Option<&str> {
888    tool.get("name")
889        .or_else(|| {
890            tool.get("function")
891                .and_then(|function| function.get("name"))
892        })
893        .and_then(|name| name.as_str())
894}
895
896fn mock_auto_tool_candidate(tools: &[serde_json::Value]) -> Option<&serde_json::Value> {
897    tools
898        .iter()
899        .find(|tool| mock_tool_name(tool) != Some("agent_await_resumption"))
900}
901
902/// Mock LLM provider -- deterministic responses for testing without API keys.
903/// When configurable mocks have been registered via `llm_mock()`, those are
904/// checked first (FIFO queue, then pattern matching). Falls through to the
905/// default deterministic behavior when no mocks match.
906pub(crate) fn mock_llm_response(
907    request: &super::api::LlmRequestPayload,
908) -> Result<LlmResult, VmError> {
909    record_llm_mock_call(request);
910
911    let messages = &request.messages;
912    let system = request.system.as_deref();
913    let match_text = mock_match_text(messages);
914    let prompt_text = mock_last_prompt_text(messages);
915    let cache_key = mock_prompt_cache_key(&request.model, messages, system);
916
917    if let Some(matched) = try_match_cli_mock(&match_text) {
918        return matched.map(|mut result| {
919            if request.cache {
920                apply_mock_prompt_cache(&mut result, &cache_key);
921            }
922            result
923        });
924    }
925
926    if let Some(matched) = try_match_builtin_mock(&match_text) {
927        return matched.map(|mut result| {
928            if request.cache {
929                apply_mock_prompt_cache(&mut result, &cache_key);
930            }
931            result
932        });
933    }
934
935    if cli_llm_mock_replay_active() {
936        return Err(unmatched_cli_prompt_error(&match_text));
937    }
938
939    // Generate a mock tool call for the first tool, filling required
940    // params with placeholders so the call passes schema validation.
941    if let Some(tools) = request.native_tools.as_deref() {
942        if let Some(first_tool) = mock_auto_tool_candidate(tools) {
943            let tool_name = mock_tool_name(first_tool).unwrap_or("unknown");
944            let mock_args = mock_required_args(first_tool);
945            let mut result = LlmResult {
946                text: String::new(),
947                tool_calls: vec![serde_json::json!({
948                        "id": "mock_call_1",
949                        "type": "tool_call",
950                        "name": tool_name,
951                "arguments": mock_args
952                })],
953                input_tokens: prompt_text.len() as i64,
954                output_tokens: 20,
955                cache_read_tokens: 0,
956                cache_write_tokens: 0,
957                model: request.model.clone(),
958                provider: "mock".to_string(),
959                thinking: None,
960                thinking_summary: None,
961                stop_reason: None,
962                blocks: vec![serde_json::json!({
963                    "type": "tool_call",
964                    "id": "mock_call_1",
965                    "name": tool_name,
966                    "arguments": mock_args,
967                    "visibility": "internal",
968                })],
969                logprobs: Vec::new(),
970                telemetry: ProviderTelemetry::default(),
971            };
972            if request.cache {
973                apply_mock_prompt_cache(&mut result, &cache_key);
974            }
975            return Ok(result);
976        }
977    }
978
979    // Preserve the historical auto-complete behavior for tagged text-tool
980    // prompts only. Bare `##DONE##` in no-tool/native prompts changes
981    // loop semantics by completing runs that used to exhaust budget unless
982    // a fixture explicitly returned the sentinel.
983    let tagged_done = system.is_some_and(|s| s.contains("<done>"));
984
985    let prose_body = if prompt_text.is_empty() {
986        "Mock LLM response".to_string()
987    } else {
988        let word_count = prompt_text.split_whitespace().count();
989        format!(
990            "Mock response to {word_count}-word prompt: {}",
991            prompt_text.chars().take(100).collect::<String>()
992        )
993    };
994    let response = if tagged_done {
995        format!("<assistant_prose>{prose_body}</assistant_prose>\n<done>##DONE##</done>")
996    } else {
997        prose_body
998    };
999
1000    let mut result = LlmResult {
1001        text: response.clone(),
1002        tool_calls: vec![],
1003        input_tokens: prompt_text.len() as i64,
1004        output_tokens: 30,
1005        cache_read_tokens: 0,
1006        cache_write_tokens: 0,
1007        model: request.model.clone(),
1008        provider: "mock".to_string(),
1009        thinking: None,
1010        thinking_summary: None,
1011        stop_reason: None,
1012        blocks: vec![serde_json::json!({
1013            "type": "output_text",
1014            "text": response,
1015            "visibility": "public",
1016        })],
1017        logprobs: Vec::new(),
1018        telemetry: ProviderTelemetry::default(),
1019    };
1020    if request.cache {
1021        apply_mock_prompt_cache(&mut result, &cache_key);
1022    }
1023    Ok(result)
1024}
1025
1026/// Take all recorded tool calls, leaving the buffer empty.
1027pub fn drain_tool_recordings() -> Vec<ToolCallRecord> {
1028    TOOL_RECORDINGS.with(|v| std::mem::take(&mut *v.borrow_mut()))
1029}