Skip to main content

harn_vm/llm/
mock.rs

1use std::cell::RefCell;
2use std::collections::{BTreeMap, BTreeSet};
3
4use super::api::{LlmResult, ProviderTelemetry};
5use crate::orchestration::ToolCallRecord;
6use crate::value::{ErrorCategory, VmError, VmValue};
7
8/// LLM replay mode.
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum LlmReplayMode {
11    Off,
12    Record,
13    Replay,
14}
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17enum CliLlmMockMode {
18    Off,
19    Replay,
20    Record,
21}
22
23/// Categorized error injected by a mock. When present, the mock
24/// short-circuits the provider call and surfaces as
25/// `VmError::CategorizedError`, so `llm_call` throws and
26/// `llm_call_safe` populates its `error` envelope.
27#[derive(Clone)]
28pub struct MockError {
29    pub category: ErrorCategory,
30    pub message: String,
31    pub status: Option<u16>,
32    pub kind: Option<String>,
33    pub reason: Option<String>,
34    /// Optional retry hint. Provider-envelope mocks put this directly
35    /// on the thrown dict; legacy category-only mocks embed it in the
36    /// message so the live-provider parser path still exercises the
37    /// same extraction code.
38    pub retry_after_ms: Option<u64>,
39}
40
41impl MockError {
42    fn has_provider_envelope(&self) -> bool {
43        self.status.is_some() || self.kind.is_some() || self.reason.is_some()
44    }
45}
46
47pub(crate) fn build_mock_error(
48    category: Option<String>,
49    message: Option<String>,
50    status: Option<u16>,
51    kind: Option<String>,
52    reason: Option<String>,
53    retry_after_ms: Option<u64>,
54) -> Result<MockError, String> {
55    if retry_after_ms.is_some_and(|ms| ms > i64::MAX as u64) {
56        return Err("error.retry_after_ms must fit in a signed 64-bit integer".to_string());
57    }
58    let kind = match kind {
59        Some(value) if value.trim().is_empty() => None,
60        Some(value) => {
61            let normalized = value.trim().to_ascii_lowercase();
62            if super::api::LlmErrorKind::parse(&normalized).is_none() {
63                return Err(format!("unknown error kind `{value}`"));
64            }
65            Some(normalized)
66        }
67        None => None,
68    };
69    let reason = reason.and_then(|value| {
70        let trimmed = value.trim();
71        if trimmed.is_empty() {
72            None
73        } else {
74            Some(trimmed.to_string())
75        }
76    });
77    let category_was_provided = category.is_some();
78    let category = match category {
79        Some(value) if value.trim().is_empty() => {
80            return Err("error.category must not be empty".to_string());
81        }
82        Some(value) => {
83            let normalized = value.trim().to_ascii_lowercase();
84            let category = ErrorCategory::parse(&normalized);
85            if category.as_str() != normalized {
86                return Err(format!("unknown error category `{value}`"));
87            }
88            category
89        }
90        None => infer_mock_error_category(status, kind.as_deref(), reason.as_deref()),
91    };
92    if !category_was_provided && kind.is_none() && status.is_none() && reason.is_none() {
93        return Err(
94            "error.category is required unless error.status, error.kind, or error.reason is set"
95                .to_string(),
96        );
97    }
98    Ok(MockError {
99        category,
100        message: message.unwrap_or_else(|| {
101            default_mock_error_message(status, kind.as_deref(), reason.as_deref())
102        }),
103        status,
104        kind,
105        reason,
106        retry_after_ms,
107    })
108}
109
110pub(crate) fn validate_mock_error_status(status: i64) -> Result<u16, String> {
111    let status = u16::try_from(status)
112        .map_err(|_| "error.status must be an HTTP status code".to_string())?;
113    reqwest::StatusCode::from_u16(status)
114        .map_err(|_| "error.status must be an HTTP status code".to_string())?;
115    Ok(status)
116}
117
118fn infer_mock_error_category(
119    status: Option<u16>,
120    kind: Option<&str>,
121    reason: Option<&str>,
122) -> ErrorCategory {
123    if let Some(status) = status {
124        match status {
125            401 | 403 => return ErrorCategory::Auth,
126            404 | 410 => return ErrorCategory::NotFound,
127            408 | 504 | 522 | 524 => return ErrorCategory::Timeout,
128            429 => return ErrorCategory::RateLimit,
129            503 | 529 => return ErrorCategory::Overloaded,
130            500 | 502 => return ErrorCategory::ServerError,
131            _ => {}
132        }
133    }
134    if let Some(reason) = reason {
135        match reason {
136            "rate_limit" => return ErrorCategory::RateLimit,
137            "timeout" => return ErrorCategory::Timeout,
138            "network_error" | "transient_network" => return ErrorCategory::TransientNetwork,
139            "server_error" | "provider_error" | "provider_5xx" | "upstream_unavailable" => {
140                return ErrorCategory::ServerError;
141            }
142            "auth_failure" => return ErrorCategory::Auth,
143            "model_unavailable" => return ErrorCategory::NotFound,
144            _ => {}
145        }
146    }
147    if kind == Some("transient") {
148        return ErrorCategory::ServerError;
149    }
150    ErrorCategory::Generic
151}
152
153fn default_mock_error_message(
154    status: Option<u16>,
155    kind: Option<&str>,
156    reason: Option<&str>,
157) -> String {
158    match (status, kind, reason) {
159        (Some(status), Some(kind), Some(reason)) => {
160            format!("HTTP {status} mock LLM error ({kind}/{reason})")
161        }
162        (Some(status), _, Some(reason)) => format!("HTTP {status} mock LLM error ({reason})"),
163        (Some(status), _, _) => format!("HTTP {status} mock LLM error"),
164        (None, Some(kind), Some(reason)) => format!("mock LLM error ({kind}/{reason})"),
165        (None, Some(kind), None) => format!("mock LLM error ({kind})"),
166        (None, None, Some(reason)) => format!("mock LLM error ({reason})"),
167        (None, None, None) => String::new(),
168    }
169}
170
171#[derive(Clone)]
172pub struct LlmMock {
173    pub text: String,
174    pub tool_calls: Vec<serde_json::Value>,
175    pub match_pattern: Option<String>, // None = FIFO (consumed), Some = glob (reusable)
176    pub consume_on_match: bool,
177    pub input_tokens: Option<i64>,
178    pub output_tokens: Option<i64>,
179    pub cache_read_tokens: Option<i64>,
180    pub cache_write_tokens: Option<i64>,
181    pub thinking: Option<String>,
182    pub thinking_summary: Option<String>,
183    pub stop_reason: Option<String>,
184    pub model: String,
185    pub provider: Option<String>,
186    pub blocks: Option<Vec<serde_json::Value>>,
187    pub logprobs: Vec<serde_json::Value>,
188    /// When `Some`, this mock synthesizes an error instead of an
189    /// `LlmResult`. `text`/`tool_calls` are ignored for error mocks.
190    pub error: Option<MockError>,
191}
192
193#[derive(Clone)]
194pub(crate) struct LlmMockCall {
195    pub api_mode: String,
196    pub messages: Vec<serde_json::Value>,
197    pub system: Option<String>,
198    pub tools: Option<Vec<serde_json::Value>>,
199    pub provider_tools: Option<Vec<serde_json::Value>>,
200    pub tool_choice: Option<serde_json::Value>,
201    pub output_format: serde_json::Value,
202    pub thinking: serde_json::Value,
203    pub previous_response_id: Option<String>,
204    pub store: Option<bool>,
205    pub background: Option<bool>,
206    pub truncation: Option<String>,
207    pub compact: Option<bool>,
208    pub include: Option<Vec<String>>,
209    pub max_tool_calls: Option<i64>,
210}
211
212type LlmMockScope = (Vec<LlmMock>, Vec<LlmMockCall>, BTreeSet<String>);
213
214thread_local! {
215    static LLM_REPLAY_MODE: RefCell<LlmReplayMode> = const { RefCell::new(LlmReplayMode::Off) };
216    static LLM_FIXTURE_DIR: RefCell<String> = const { RefCell::new(String::new()) };
217    static TOOL_RECORDINGS: RefCell<Vec<ToolCallRecord>> = const { RefCell::new(Vec::new()) };
218    static LLM_MOCKS: RefCell<Vec<LlmMock>> = const { RefCell::new(Vec::new()) };
219    static CLI_LLM_MOCK_MODE: RefCell<CliLlmMockMode> = const { RefCell::new(CliLlmMockMode::Off) };
220    static CLI_LLM_MOCKS: RefCell<Vec<LlmMock>> = const { RefCell::new(Vec::new()) };
221    static CLI_LLM_RECORDINGS: RefCell<Vec<LlmMock>> = const { RefCell::new(Vec::new()) };
222    static LLM_MOCK_CALLS: RefCell<Vec<LlmMockCall>> = const { RefCell::new(Vec::new()) };
223    static LLM_PROMPT_CACHE: RefCell<BTreeSet<String>> = const { RefCell::new(BTreeSet::new()) };
224    static LLM_MOCK_SCOPES: RefCell<Vec<LlmMockScope>> = const { RefCell::new(Vec::new()) };
225}
226
227pub(crate) fn push_llm_mock(mock: LlmMock) {
228    LLM_MOCKS.with(|v| v.borrow_mut().push(mock));
229}
230
231pub(crate) fn get_llm_mock_calls() -> Vec<LlmMockCall> {
232    LLM_MOCK_CALLS.with(|v| v.borrow().clone())
233}
234
235pub(crate) fn builtin_llm_mock_active() -> bool {
236    LLM_MOCKS.with(|v| !v.borrow().is_empty())
237}
238
239pub(crate) fn reset_llm_mock_state() {
240    LLM_MOCKS.with(|v| v.borrow_mut().clear());
241    CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Off);
242    CLI_LLM_MOCKS.with(|v| v.borrow_mut().clear());
243    CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
244    LLM_MOCK_CALLS.with(|v| v.borrow_mut().clear());
245    LLM_PROMPT_CACHE.with(|v| v.borrow_mut().clear());
246    LLM_MOCK_SCOPES.with(|v| v.borrow_mut().clear());
247}
248
249/// Save the current builtin LLM mock queue and recorded-calls list, then
250/// start a fresh empty scope. Paired with `pop_llm_mock_scope`. Backs
251/// the `with_llm_mocks` helper in `std/testing` so tests reliably
252/// roll back to the prior state, including when the body throws.
253pub(crate) fn push_llm_mock_scope() {
254    let mocks = LLM_MOCKS.with(|v| std::mem::take(&mut *v.borrow_mut()));
255    let calls = LLM_MOCK_CALLS.with(|v| std::mem::take(&mut *v.borrow_mut()));
256    let cache = LLM_PROMPT_CACHE.with(|v| std::mem::take(&mut *v.borrow_mut()));
257    LLM_MOCK_SCOPES.with(|v| v.borrow_mut().push((mocks, calls, cache)));
258}
259
260/// Restore the most recently pushed builtin LLM mock scope. Returns
261/// `false` when there is nothing to pop, so the builtin can surface a
262/// clear "imbalanced scope" error rather than silently corrupting
263/// state. CLI-installed mocks are intentionally untouched: they are an
264/// outer harness and should not flicker on each per-test scope swap.
265pub(crate) fn pop_llm_mock_scope() -> bool {
266    let entry = LLM_MOCK_SCOPES.with(|v| v.borrow_mut().pop());
267    match entry {
268        Some((mocks, calls, cache)) => {
269            LLM_MOCKS.with(|v| *v.borrow_mut() = mocks);
270            LLM_MOCK_CALLS.with(|v| *v.borrow_mut() = calls);
271            LLM_PROMPT_CACHE.with(|v| *v.borrow_mut() = cache);
272            true
273        }
274        None => false,
275    }
276}
277
278pub fn clear_cli_llm_mock_mode() {
279    CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Off);
280    CLI_LLM_MOCKS.with(|v| v.borrow_mut().clear());
281    CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
282}
283
284pub fn install_cli_llm_mocks(mocks: Vec<LlmMock>) {
285    CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Replay);
286    CLI_LLM_MOCKS.with(|v| *v.borrow_mut() = mocks);
287    CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
288}
289
290pub fn enable_cli_llm_mock_recording() {
291    CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Record);
292    CLI_LLM_MOCKS.with(|v| v.borrow_mut().clear());
293    CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
294}
295
296pub fn take_cli_llm_recordings() -> Vec<LlmMock> {
297    CLI_LLM_RECORDINGS.with(|v| std::mem::take(&mut *v.borrow_mut()))
298}
299
300pub(crate) fn cli_llm_mock_replay_active() -> bool {
301    CLI_LLM_MOCK_MODE.with(|v| *v.borrow() == CliLlmMockMode::Replay)
302}
303
304fn record_llm_mock_call(request: &super::api::LlmRequestPayload) {
305    LLM_MOCK_CALLS.with(|v| {
306        v.borrow_mut().push(LlmMockCall {
307            api_mode: request.api_mode.as_str().to_string(),
308            messages: request.messages.clone(),
309            system: request.system.clone(),
310            tools: request.native_tools.clone(),
311            provider_tools: if request.provider_tools.is_empty() {
312                None
313            } else {
314                Some(request.provider_tools.clone())
315            },
316            tool_choice: request.tool_choice.clone(),
317            output_format: serde_json::to_value(&request.output_format).unwrap_or_else(|_| {
318                serde_json::json!({
319                    "kind": "text"
320                })
321            }),
322            thinking: serde_json::to_value(&request.thinking).unwrap_or_else(|_| {
323                serde_json::json!({
324                    "mode": "disabled"
325                })
326            }),
327            previous_response_id: request.previous_response_id.clone(),
328            store: request.store,
329            background: request.background,
330            truncation: request.truncation.clone(),
331            compact: request.compact,
332            include: request.include.clone(),
333            max_tool_calls: request.max_tool_calls,
334        });
335    });
336}
337
338/// Build an LlmResult from a matched mock.
339fn build_mock_result(mock: &LlmMock, last_msg_len: usize) -> LlmResult {
340    let (tool_calls, blocks) = if let Some(blocks) = &mock.blocks {
341        (mock.tool_calls.clone(), blocks.clone())
342    } else {
343        let mut blocks = Vec::new();
344
345        if !mock.text.is_empty() {
346            blocks.push(serde_json::json!({
347                "type": "output_text",
348                "text": mock.text,
349                "visibility": "public",
350            }));
351        }
352
353        let mut tool_calls = Vec::new();
354        for (i, tc) in mock.tool_calls.iter().enumerate() {
355            let id = format!("mock_call_{}", i + 1);
356            let name = tc.get("name").and_then(|n| n.as_str()).unwrap_or("unknown");
357            let arguments = tc
358                .get("arguments")
359                .cloned()
360                .unwrap_or(serde_json::json!({}));
361            tool_calls.push(serde_json::json!({
362                "id": id,
363                "type": "tool_call",
364                "name": name,
365                "arguments": arguments,
366            }));
367            blocks.push(serde_json::json!({
368                "type": "tool_call",
369                "id": id,
370                "name": name,
371                "arguments": arguments,
372                "visibility": "internal",
373            }));
374        }
375
376        (tool_calls, blocks)
377    };
378
379    LlmResult {
380        served_fast: false,
381        text: mock.text.clone(),
382        tool_calls,
383        input_tokens: mock.input_tokens.unwrap_or(last_msg_len as i64),
384        output_tokens: mock.output_tokens.unwrap_or(30),
385        cache_read_tokens: mock.cache_read_tokens.unwrap_or(0),
386        cache_write_tokens: mock.cache_write_tokens.unwrap_or(0),
387        cache_supported: true,
388        model: mock.model.clone(),
389        provider: mock.provider.clone().unwrap_or_else(|| "mock".to_string()),
390        thinking: mock.thinking.clone(),
391        thinking_summary: mock.thinking_summary.clone(),
392        stop_reason: mock.stop_reason.clone(),
393        blocks,
394        logprobs: mock.logprobs.clone(),
395        telemetry: ProviderTelemetry::default(),
396    }
397}
398
399/// Multi-segment glob match: split on `*` and check segments appear in order.
400/// Handles `*`, `prefix*`, `*suffix`, `*contains*`, `pre*mid*suf`, etc.
401fn mock_glob_match(pattern: &str, text: &str) -> bool {
402    if pattern == "*" {
403        return true;
404    }
405    if !pattern.contains('*') {
406        return pattern == text;
407    }
408    let parts: Vec<&str> = pattern.split('*').collect();
409    let mut remaining = text;
410    for (i, part) in parts.iter().enumerate() {
411        if part.is_empty() {
412            continue;
413        }
414        if i == 0 {
415            if !remaining.starts_with(part) {
416                return false;
417            }
418            remaining = &remaining[part.len()..];
419        } else if i == parts.len() - 1 {
420            if !remaining.ends_with(part) {
421                return false;
422            }
423            remaining = "";
424        } else {
425            match remaining.find(part) {
426                Some(pos) => remaining = &remaining[pos + part.len()..],
427                None => return false,
428            }
429        }
430    }
431    true
432}
433
434fn collect_mock_match_strings(value: &serde_json::Value, out: &mut Vec<String>) {
435    match value {
436        serde_json::Value::String(text) if !text.is_empty() => out.push(text.clone()),
437        serde_json::Value::String(_) => {}
438        serde_json::Value::Array(items) => {
439            for item in items {
440                collect_mock_match_strings(item, out);
441            }
442        }
443        serde_json::Value::Object(map) => {
444            for value in map.values() {
445                collect_mock_match_strings(value, out);
446            }
447        }
448        _ => {}
449    }
450}
451
452fn mock_match_text(messages: &[serde_json::Value]) -> String {
453    let mut parts = Vec::new();
454    for message in messages {
455        collect_mock_match_strings(message, &mut parts);
456    }
457    parts.join("\n")
458}
459
460fn mock_last_prompt_text(messages: &[serde_json::Value]) -> String {
461    for message in messages.iter().rev() {
462        let Some(content) = message.get("content") else {
463            continue;
464        };
465        let mut parts = Vec::new();
466        collect_mock_match_strings(content, &mut parts);
467        let text = parts.join("\n");
468        if !text.trim().is_empty() {
469            return text;
470        }
471    }
472    String::new()
473}
474
475fn mock_prompt_cache_key(
476    model: &str,
477    messages: &[serde_json::Value],
478    system: Option<&str>,
479) -> String {
480    serde_json::to_string(&serde_json::json!({
481        "model": model,
482        "system": system,
483        "messages": messages,
484    }))
485    .unwrap_or_default()
486}
487
488fn apply_mock_prompt_cache(result: &mut LlmResult, cache_key: &str) {
489    if result.cache_read_tokens > 0 || result.cache_write_tokens > 0 {
490        return;
491    }
492    let cache_tokens = result.input_tokens.max(0);
493    if cache_tokens == 0 {
494        return;
495    }
496    let cache_hit = LLM_PROMPT_CACHE.with(|cache| {
497        let mut cache = cache.borrow_mut();
498        if cache.contains(cache_key) {
499            true
500        } else {
501            cache.insert(cache_key.to_string());
502            false
503        }
504    });
505    if cache_hit {
506        result.cache_read_tokens = cache_tokens;
507    } else {
508        result.cache_write_tokens = cache_tokens;
509    }
510}
511
512/// Convert a mock's `error` payload into the `VmError` that the
513/// provider path would have raised, so classification, retry, and
514/// `error_category` all behave identically to a real failure.
515fn mock_error_to_vm_error(err: &MockError) -> VmError {
516    let message = mock_error_message(err);
517    if err.has_provider_envelope() {
518        let classified = super::api::classify_llm_error(err.category.clone(), &message);
519        let mut dict = BTreeMap::new();
520        dict.insert(
521            "category".to_string(),
522            VmValue::String(std::sync::Arc::from(err.category.as_str())),
523        );
524        dict.insert(
525            "kind".to_string(),
526            VmValue::String(std::sync::Arc::from(
527                err.kind
528                    .as_deref()
529                    .unwrap_or_else(|| classified.kind.as_str()),
530            )),
531        );
532        dict.insert(
533            "reason".to_string(),
534            VmValue::String(std::sync::Arc::from(
535                err.reason
536                    .as_deref()
537                    .unwrap_or_else(|| classified.reason.as_str()),
538            )),
539        );
540        dict.insert(
541            "message".to_string(),
542            VmValue::String(std::sync::Arc::from(message)),
543        );
544        if let Some(status) = err.status {
545            dict.insert("status".to_string(), VmValue::Int(i64::from(status)));
546        }
547        if let Some(retry_after_ms) = err.retry_after_ms {
548            dict.insert(
549                "retry_after_ms".to_string(),
550                VmValue::Int(retry_after_ms as i64),
551            );
552        }
553        return VmError::Thrown(VmValue::Dict(std::sync::Arc::new(dict)));
554    }
555
556    VmError::CategorizedError {
557        message,
558        category: err.category.clone(),
559    }
560}
561
562fn mock_error_message(err: &MockError) -> String {
563    // Embed legacy category-only retry hints into the message so the
564    // same parser that handles live provider headers populates
565    // `retry_after_ms` on the final thrown dict.
566    let Some(ms) = err.retry_after_ms else {
567        return err.message.clone();
568    };
569    if err.has_provider_envelope() {
570        return err.message.clone();
571    }
572    let secs = (ms as f64 / 1000.0).max(0.0);
573    let sep = if err.message.is_empty() || err.message.ends_with('\n') {
574        ""
575    } else {
576        "\n"
577    };
578    format!("{}{sep}retry-after: {secs}\n", err.message)
579}
580
581/// Try to find and return a matching mock response. Returns
582/// `Some(Ok(LlmResult))` on a text/tool_call match, `Some(Err(VmError))`
583/// on an error-mock match, and `None` to fall through to default.
584fn try_match_mock_queue(
585    mocks: &mut Vec<LlmMock>,
586    match_text: &str,
587) -> Option<Result<LlmResult, VmError>> {
588    if let Some(idx) = mocks.iter().position(|m| m.match_pattern.is_none()) {
589        let mock = mocks.remove(idx);
590        return Some(match &mock.error {
591            Some(err) => Err(mock_error_to_vm_error(err)),
592            None => Ok(build_mock_result(&mock, match_text.len())),
593        });
594    }
595
596    for idx in 0..mocks.len() {
597        let mock = &mocks[idx];
598        if let Some(ref pattern) = mock.match_pattern {
599            if mock_glob_match(pattern, match_text) {
600                if mock.consume_on_match {
601                    let mock = mocks.remove(idx);
602                    return Some(match &mock.error {
603                        Some(err) => Err(mock_error_to_vm_error(err)),
604                        None => Ok(build_mock_result(&mock, match_text.len())),
605                    });
606                }
607                return Some(match &mock.error {
608                    Some(err) => Err(mock_error_to_vm_error(err)),
609                    None => Ok(build_mock_result(mock, match_text.len())),
610                });
611            }
612        }
613    }
614
615    None
616}
617
618fn try_match_builtin_mock(match_text: &str) -> Option<Result<LlmResult, VmError>> {
619    LLM_MOCKS.with(|mocks| try_match_mock_queue(&mut mocks.borrow_mut(), match_text))
620}
621
622fn try_match_cli_mock(match_text: &str) -> Option<Result<LlmResult, VmError>> {
623    CLI_LLM_MOCKS.with(|mocks| try_match_mock_queue(&mut mocks.borrow_mut(), match_text))
624}
625
626pub(crate) fn record_cli_llm_result(result: &LlmResult) {
627    record_unified_tape_llm_call(result);
628    if !CLI_LLM_MOCK_MODE.with(|mode| *mode.borrow() == CliLlmMockMode::Record) {
629        return;
630    }
631    CLI_LLM_RECORDINGS.with(|recordings| {
632        recordings.borrow_mut().push(LlmMock {
633            text: result.text.clone(),
634            tool_calls: result.tool_calls.clone(),
635            match_pattern: None,
636            consume_on_match: false,
637            input_tokens: Some(result.input_tokens),
638            output_tokens: Some(result.output_tokens),
639            cache_read_tokens: Some(result.cache_read_tokens),
640            cache_write_tokens: Some(result.cache_write_tokens),
641            thinking: result.thinking.clone(),
642            thinking_summary: result.thinking_summary.clone(),
643            stop_reason: result.stop_reason.clone(),
644            model: result.model.clone(),
645            provider: Some(result.provider.clone()),
646            blocks: Some(result.blocks.clone()),
647            logprobs: result.logprobs.clone(),
648            error: None,
649        });
650    });
651}
652
653/// Append an `LlmCall` record to the unified-tape recorder when one is
654/// active. The request digest is built from the most recently recorded
655/// `LlmMockCall` so the same hashing surface used for fixture matching
656/// drives the fidelity oracle's request comparison; falls back to a
657/// hash of the response text alone when no matching call is on record
658/// (e.g. when `record_llm_mock_call` was bypassed).
659fn record_unified_tape_llm_call(result: &LlmResult) {
660    if crate::testbench::tape::active_recorder().is_none() {
661        return;
662    }
663    let response_json = serde_json::to_vec(result).unwrap_or_else(|_| Vec::new());
664    let request_digest = LLM_MOCK_CALLS
665        .with(|calls| calls.borrow().last().cloned())
666        .map(|call| {
667            let mut request = serde_json::Map::new();
668            request.insert("messages".to_string(), serde_json::json!(call.messages));
669            request.insert("system".to_string(), serde_json::json!(call.system));
670            request.insert("tools".to_string(), serde_json::json!(call.tools));
671            request.insert(
672                "tool_choice".to_string(),
673                serde_json::json!(call.tool_choice),
674            );
675            request.insert("thinking".to_string(), serde_json::json!(call.thinking));
676            request.insert("model".to_string(), serde_json::json!(result.model));
677            if call.api_mode != "chat_completions" {
678                request.insert("api_mode".to_string(), serde_json::json!(call.api_mode));
679            }
680            if call.provider_tools.is_some() {
681                request.insert(
682                    "provider_tools".to_string(),
683                    serde_json::json!(call.provider_tools),
684                );
685            }
686            if call
687                .output_format
688                .get("kind")
689                .and_then(|value| value.as_str())
690                != Some("text")
691            {
692                request.insert(
693                    "output_format".to_string(),
694                    serde_json::json!(call.output_format),
695                );
696            }
697            if call.previous_response_id.is_some() {
698                request.insert(
699                    "previous_response_id".to_string(),
700                    serde_json::json!(call.previous_response_id),
701                );
702            }
703            if call.store.is_some() {
704                request.insert("store".to_string(), serde_json::json!(call.store));
705            }
706            if call.background.is_some() {
707                request.insert("background".to_string(), serde_json::json!(call.background));
708            }
709            if call.truncation.is_some() {
710                request.insert("truncation".to_string(), serde_json::json!(call.truncation));
711            }
712            if call.compact.is_some() {
713                request.insert("compact".to_string(), serde_json::json!(call.compact));
714            }
715            if call.include.is_some() {
716                request.insert("include".to_string(), serde_json::json!(call.include));
717            }
718            if call.max_tool_calls.is_some() {
719                request.insert(
720                    "max_tool_calls".to_string(),
721                    serde_json::json!(call.max_tool_calls),
722                );
723            }
724            let serialized =
725                serde_json::to_vec(&serde_json::Value::Object(request)).unwrap_or_default();
726            crate::testbench::tape::content_hash(&serialized)
727        })
728        .unwrap_or_else(|| {
729            // Fall back to hashing the response — keeps fidelity comparable
730            // across runs even when the request surface wasn't captured.
731            crate::testbench::tape::content_hash(result.text.as_bytes())
732        });
733    crate::testbench::tape::with_active_recorder(|recorder| {
734        let response = recorder.payload_from_bytes(response_json);
735        Some(crate::testbench::tape::TapeRecordKind::LlmCall {
736            request_digest,
737            response,
738        })
739    });
740}
741
742fn unmatched_cli_prompt_error(match_text: &str) -> VmError {
743    let mut snippet: String = match_text.chars().take(200).collect();
744    if match_text.chars().count() > 200 {
745        snippet.push_str("...");
746    }
747    VmError::Runtime(format!("No --llm-mock fixture matched prompt: {snippet:?}"))
748}
749
750/// Set LLM replay mode (record/replay) and fixture directory.
751pub fn set_replay_mode(mode: LlmReplayMode, fixture_dir: &str) {
752    LLM_REPLAY_MODE.with(|v| *v.borrow_mut() = mode);
753    LLM_FIXTURE_DIR.with(|v| *v.borrow_mut() = fixture_dir.to_string());
754}
755
756pub(crate) fn get_replay_mode() -> LlmReplayMode {
757    LLM_REPLAY_MODE.with(|v| *v.borrow())
758}
759
760pub(crate) fn get_fixture_dir() -> String {
761    LLM_FIXTURE_DIR.with(|v| v.borrow().clone())
762}
763
764/// Hash a request for fixture file naming using canonical JSON serialization.
765pub(crate) fn fixture_hash(
766    model: &str,
767    messages: &[serde_json::Value],
768    system: Option<&str>,
769) -> String {
770    use std::hash::{Hash, Hasher};
771    let mut hasher = std::collections::hash_map::DefaultHasher::new();
772    model.hash(&mut hasher);
773    // Canonical JSON hashing is stable across Debug-format changes.
774    serde_json::to_string(messages)
775        .unwrap_or_default()
776        .hash(&mut hasher);
777    system.hash(&mut hasher);
778    format!("{:016x}", hasher.finish())
779}
780
781pub(crate) fn save_fixture(hash: &str, result: &LlmResult) {
782    let dir = get_fixture_dir();
783    if dir.is_empty() {
784        return;
785    }
786    let _ = std::fs::create_dir_all(&dir);
787    let path = format!("{dir}/{hash}.json");
788    let json = serde_json::json!({
789        "text": result.text,
790        "tool_calls": result.tool_calls,
791        "input_tokens": result.input_tokens,
792        "output_tokens": result.output_tokens,
793        "cache_read_tokens": result.cache_read_tokens,
794        "cache_write_tokens": result.cache_write_tokens,
795        "cache_creation_input_tokens": result.cache_write_tokens,
796        "model": result.model,
797        "provider": result.provider,
798        "thinking": result.thinking,
799        "thinking_summary": result.thinking_summary,
800        "stop_reason": result.stop_reason,
801        "blocks": result.blocks,
802        "logprobs": result.logprobs,
803    });
804    let _ = std::fs::write(
805        &path,
806        serde_json::to_string_pretty(&json).unwrap_or_default(),
807    );
808}
809
810pub(crate) fn load_fixture(hash: &str) -> Option<LlmResult> {
811    let dir = get_fixture_dir();
812    if dir.is_empty() {
813        return None;
814    }
815    let path = format!("{dir}/{hash}.json");
816    let content = std::fs::read_to_string(&path).ok()?;
817    let json: serde_json::Value = serde_json::from_str(&content).ok()?;
818    Some(LlmResult {
819        served_fast: false,
820        text: json["text"].as_str().unwrap_or("").to_string(),
821        tool_calls: json["tool_calls"].as_array().cloned().unwrap_or_default(),
822        input_tokens: json["input_tokens"].as_i64().unwrap_or(0),
823        output_tokens: json["output_tokens"].as_i64().unwrap_or(0),
824        cache_read_tokens: json["cache_read_tokens"].as_i64().unwrap_or(0),
825        cache_write_tokens: json["cache_write_tokens"]
826            .as_i64()
827            .or_else(|| json["cache_creation_input_tokens"].as_i64())
828            .unwrap_or(0),
829        cache_supported: json["cache_supported"].as_bool().unwrap_or(true),
830        model: json["model"].as_str().unwrap_or("").to_string(),
831        provider: json["provider"].as_str().unwrap_or("mock").to_string(),
832        thinking: json["thinking"].as_str().map(|s| s.to_string()),
833        thinking_summary: json["thinking_summary"].as_str().map(|s| s.to_string()),
834        stop_reason: json["stop_reason"].as_str().map(|s| s.to_string()),
835        blocks: json["blocks"].as_array().cloned().unwrap_or_default(),
836        logprobs: json["logprobs"].as_array().cloned().unwrap_or_default(),
837        telemetry: serde_json::from_value(json["telemetry"].clone()).unwrap_or_default(),
838    })
839}
840
841/// Generate stub argument values for required parameters in a tool schema.
842/// This makes mock tool calls realistic — a real model would always fill
843/// required fields, so the mock should too.
844fn mock_required_args(tool_schema: &serde_json::Value) -> serde_json::Value {
845    let mut args = serde_json::Map::new();
846    // Anthropic: {name, input_schema: {properties, required}}
847    // OpenAI:    {function: {name, parameters: {properties, required}}}
848    // Harn VM:   {parameters: {name: {type, required}}}  (from tool_define)
849    let input_schema = tool_schema
850        .get("input_schema")
851        .or_else(|| tool_schema.get("inputSchema"))
852        .or_else(|| {
853            tool_schema
854                .get("function")
855                .and_then(|f| f.get("parameters"))
856        })
857        .or_else(|| tool_schema.get("parameters"));
858    let Some(schema) = input_schema else {
859        return serde_json::Value::Object(args);
860    };
861    let required: std::collections::BTreeSet<String> = schema
862        .get("required")
863        .and_then(|r| r.as_array())
864        .map(|arr| {
865            arr.iter()
866                .filter_map(|v| v.as_str().map(|s| s.to_string()))
867                .collect()
868        })
869        .unwrap_or_default();
870    if let Some(props) = schema.get("properties").and_then(|p| p.as_object()) {
871        for (name, prop) in props {
872            if !required.contains(name) {
873                continue;
874            }
875            let ty = prop
876                .get("type")
877                .and_then(|t| t.as_str())
878                .unwrap_or("string");
879            let placeholder = match ty {
880                "integer" => serde_json::json!(0),
881                "number" => serde_json::json!(0.0),
882                "boolean" => serde_json::json!(false),
883                "array" => serde_json::json!([]),
884                "object" => serde_json::json!({}),
885                _ => serde_json::json!(""),
886            };
887            args.insert(name.clone(), placeholder);
888        }
889    }
890    serde_json::Value::Object(args)
891}
892
893fn mock_tool_name(tool: &serde_json::Value) -> Option<&str> {
894    tool.get("name")
895        .or_else(|| {
896            tool.get("function")
897                .and_then(|function| function.get("name"))
898        })
899        .and_then(|name| name.as_str())
900}
901
902fn mock_auto_tool_candidate(tools: &[serde_json::Value]) -> Option<&serde_json::Value> {
903    tools
904        .iter()
905        .find(|tool| mock_tool_name(tool) != Some("agent_await_resumption"))
906}
907
908/// Mock LLM provider -- deterministic responses for testing without API keys.
909/// When configurable mocks have been registered via `llm_mock()`, those are
910/// checked first (FIFO queue, then pattern matching). Falls through to the
911/// default deterministic behavior when no mocks match.
912pub(crate) fn mock_llm_response(
913    request: &super::api::LlmRequestPayload,
914) -> Result<LlmResult, VmError> {
915    record_llm_mock_call(request);
916
917    let messages = &request.messages;
918    let system = request.system.as_deref();
919    let match_text = mock_match_text(messages);
920    let prompt_text = mock_last_prompt_text(messages);
921    let cache_key = mock_prompt_cache_key(&request.model, messages, system);
922
923    if let Some(matched) = try_match_cli_mock(&match_text) {
924        return matched.map(|mut result| {
925            if request.cache {
926                apply_mock_prompt_cache(&mut result, &cache_key);
927            }
928            result
929        });
930    }
931
932    if let Some(matched) = try_match_builtin_mock(&match_text) {
933        return matched.map(|mut result| {
934            if request.cache {
935                apply_mock_prompt_cache(&mut result, &cache_key);
936            }
937            result
938        });
939    }
940
941    if cli_llm_mock_replay_active() {
942        return Err(unmatched_cli_prompt_error(&match_text));
943    }
944
945    // Generate a mock tool call for the first tool, filling required
946    // params with placeholders so the call passes schema validation.
947    if let Some(tools) = request.native_tools.as_deref() {
948        if let Some(first_tool) = mock_auto_tool_candidate(tools) {
949            let tool_name = mock_tool_name(first_tool).unwrap_or("unknown");
950            let mock_args = mock_required_args(first_tool);
951            let mut result = LlmResult {
952                served_fast: false,
953                text: String::new(),
954                tool_calls: vec![serde_json::json!({
955                        "id": "mock_call_1",
956                        "type": "tool_call",
957                        "name": tool_name,
958                "arguments": mock_args
959                })],
960                input_tokens: prompt_text.len() as i64,
961                output_tokens: 20,
962                cache_read_tokens: 0,
963                cache_write_tokens: 0,
964                cache_supported: true,
965                model: request.model.clone(),
966                provider: "mock".to_string(),
967                thinking: None,
968                thinking_summary: None,
969                stop_reason: None,
970                blocks: vec![serde_json::json!({
971                    "type": "tool_call",
972                    "id": "mock_call_1",
973                    "name": tool_name,
974                    "arguments": mock_args,
975                    "visibility": "internal",
976                })],
977                logprobs: Vec::new(),
978                telemetry: ProviderTelemetry::default(),
979            };
980            if request.cache {
981                apply_mock_prompt_cache(&mut result, &cache_key);
982            }
983            return Ok(result);
984        }
985    }
986
987    // Preserve the historical auto-complete behavior for tagged text-tool
988    // prompts only. Bare `##DONE##` in no-tool/native prompts changes
989    // loop semantics by completing runs that used to exhaust budget unless
990    // a fixture explicitly returned the sentinel.
991    let tagged_done = system.is_some_and(|s| s.contains("<done>"));
992
993    let prose_body = if prompt_text.is_empty() {
994        "Mock LLM response".to_string()
995    } else {
996        let word_count = prompt_text.split_whitespace().count();
997        format!(
998            "Mock response to {word_count}-word prompt: {}",
999            prompt_text.chars().take(100).collect::<String>()
1000        )
1001    };
1002    let response = if tagged_done {
1003        format!("<assistant_prose>{prose_body}</assistant_prose>\n<done>##DONE##</done>")
1004    } else {
1005        prose_body
1006    };
1007
1008    let mut result = LlmResult {
1009        served_fast: false,
1010        text: response.clone(),
1011        tool_calls: vec![],
1012        input_tokens: prompt_text.len() as i64,
1013        output_tokens: 30,
1014        cache_read_tokens: 0,
1015        cache_write_tokens: 0,
1016        cache_supported: true,
1017        model: request.model.clone(),
1018        provider: "mock".to_string(),
1019        thinking: None,
1020        thinking_summary: None,
1021        stop_reason: None,
1022        blocks: vec![serde_json::json!({
1023            "type": "output_text",
1024            "text": response,
1025            "visibility": "public",
1026        })],
1027        logprobs: Vec::new(),
1028        telemetry: ProviderTelemetry::default(),
1029    };
1030    if request.cache {
1031        apply_mock_prompt_cache(&mut result, &cache_key);
1032    }
1033    Ok(result)
1034}
1035
1036/// Take all recorded tool calls, leaving the buffer empty.
1037pub fn drain_tool_recordings() -> Vec<ToolCallRecord> {
1038    TOOL_RECORDINGS.with(|v| std::mem::take(&mut *v.borrow_mut()))
1039}