Skip to main content

harn_vm/llm/
mock.rs

1use std::cell::RefCell;
2use std::collections::BTreeSet;
3
4use super::api::LlmResult;
5use crate::orchestration::ToolCallRecord;
6use crate::value::{ErrorCategory, VmError};
7
8/// LLM replay mode.
9#[derive(Debug, Clone, Copy, PartialEq)]
10pub enum LlmReplayMode {
11    Off,
12    Record,
13    Replay,
14}
15
16/// Tool recording mode — mirrors LLM replay for tool call results.
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum ToolRecordingMode {
19    Off,
20    Record,
21    Replay,
22}
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25enum CliLlmMockMode {
26    Off,
27    Replay,
28    Record,
29}
30
31/// Categorized error injected by a mock. When present, the mock
32/// short-circuits the provider call and surfaces as
33/// `VmError::CategorizedError`, so `llm_call` throws and
34/// `llm_call_safe` populates its `error` envelope.
35#[derive(Clone)]
36pub struct MockError {
37    pub category: ErrorCategory,
38    pub message: String,
39    /// Optional hint echoed into the error message as a synthetic
40    /// `retry-after:` header so the existing `extract_retry_after_ms`
41    /// parser recovers it — matches how real provider errors embed
42    /// the value. Lets tests assert that `e.retry_after_ms` flows
43    /// end-to-end on the thrown dict.
44    pub retry_after_ms: Option<u64>,
45}
46
47#[derive(Clone)]
48pub struct LlmMock {
49    pub text: String,
50    pub tool_calls: Vec<serde_json::Value>,
51    pub match_pattern: Option<String>, // None = FIFO (consumed), Some = glob (reusable)
52    pub consume_on_match: bool,
53    pub input_tokens: Option<i64>,
54    pub output_tokens: Option<i64>,
55    pub cache_read_tokens: Option<i64>,
56    pub cache_write_tokens: Option<i64>,
57    pub thinking: Option<String>,
58    pub thinking_summary: Option<String>,
59    pub stop_reason: Option<String>,
60    pub model: String,
61    pub provider: Option<String>,
62    pub blocks: Option<Vec<serde_json::Value>>,
63    /// When `Some`, this mock synthesizes an error instead of an
64    /// `LlmResult`. `text`/`tool_calls` are ignored for error mocks.
65    pub error: Option<MockError>,
66}
67
68#[derive(Clone)]
69pub(crate) struct LlmMockCall {
70    pub messages: Vec<serde_json::Value>,
71    pub system: Option<String>,
72    pub tools: Option<Vec<serde_json::Value>>,
73    pub thinking: serde_json::Value,
74}
75
76type LlmMockScope = (Vec<LlmMock>, Vec<LlmMockCall>, BTreeSet<String>);
77
78thread_local! {
79    static LLM_REPLAY_MODE: RefCell<LlmReplayMode> = const { RefCell::new(LlmReplayMode::Off) };
80    static LLM_FIXTURE_DIR: RefCell<String> = const { RefCell::new(String::new()) };
81    static TOOL_RECORDING_MODE: RefCell<ToolRecordingMode> = const { RefCell::new(ToolRecordingMode::Off) };
82    static TOOL_RECORDINGS: RefCell<Vec<ToolCallRecord>> = const { RefCell::new(Vec::new()) };
83    static TOOL_REPLAY_FIXTURES: RefCell<Vec<ToolCallRecord>> = const { RefCell::new(Vec::new()) };
84    static LLM_MOCKS: RefCell<Vec<LlmMock>> = const { RefCell::new(Vec::new()) };
85    static CLI_LLM_MOCK_MODE: RefCell<CliLlmMockMode> = const { RefCell::new(CliLlmMockMode::Off) };
86    static CLI_LLM_MOCKS: RefCell<Vec<LlmMock>> = const { RefCell::new(Vec::new()) };
87    static CLI_LLM_RECORDINGS: RefCell<Vec<LlmMock>> = const { RefCell::new(Vec::new()) };
88    static LLM_MOCK_CALLS: RefCell<Vec<LlmMockCall>> = const { RefCell::new(Vec::new()) };
89    static LLM_PROMPT_CACHE: RefCell<BTreeSet<String>> = const { RefCell::new(BTreeSet::new()) };
90    static LLM_MOCK_SCOPES: RefCell<Vec<LlmMockScope>> = const { RefCell::new(Vec::new()) };
91}
92
93pub(crate) fn push_llm_mock(mock: LlmMock) {
94    LLM_MOCKS.with(|v| v.borrow_mut().push(mock));
95}
96
97pub(crate) fn get_llm_mock_calls() -> Vec<LlmMockCall> {
98    LLM_MOCK_CALLS.with(|v| v.borrow().clone())
99}
100
101pub(crate) fn builtin_llm_mock_active() -> bool {
102    LLM_MOCKS.with(|v| !v.borrow().is_empty())
103}
104
105pub(crate) fn reset_llm_mock_state() {
106    LLM_MOCKS.with(|v| v.borrow_mut().clear());
107    CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Off);
108    CLI_LLM_MOCKS.with(|v| v.borrow_mut().clear());
109    CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
110    LLM_MOCK_CALLS.with(|v| v.borrow_mut().clear());
111    LLM_PROMPT_CACHE.with(|v| v.borrow_mut().clear());
112    LLM_MOCK_SCOPES.with(|v| v.borrow_mut().clear());
113}
114
115/// Save the current builtin LLM mock queue and recorded-calls list, then
116/// start a fresh empty scope. Paired with `pop_llm_mock_scope`. Backs
117/// the `with_llm_mocks` helper in `std/testing` so tests reliably
118/// roll back to the prior state, including when the body throws.
119pub(crate) fn push_llm_mock_scope() {
120    let mocks = LLM_MOCKS.with(|v| std::mem::take(&mut *v.borrow_mut()));
121    let calls = LLM_MOCK_CALLS.with(|v| std::mem::take(&mut *v.borrow_mut()));
122    let cache = LLM_PROMPT_CACHE.with(|v| std::mem::take(&mut *v.borrow_mut()));
123    LLM_MOCK_SCOPES.with(|v| v.borrow_mut().push((mocks, calls, cache)));
124}
125
126/// Restore the most recently pushed builtin LLM mock scope. Returns
127/// `false` when there is nothing to pop, so the builtin can surface a
128/// clear "imbalanced scope" error rather than silently corrupting
129/// state. CLI-installed mocks are intentionally untouched: they are an
130/// outer harness and should not flicker on each per-test scope swap.
131pub(crate) fn pop_llm_mock_scope() -> bool {
132    let entry = LLM_MOCK_SCOPES.with(|v| v.borrow_mut().pop());
133    match entry {
134        Some((mocks, calls, cache)) => {
135            LLM_MOCKS.with(|v| *v.borrow_mut() = mocks);
136            LLM_MOCK_CALLS.with(|v| *v.borrow_mut() = calls);
137            LLM_PROMPT_CACHE.with(|v| *v.borrow_mut() = cache);
138            true
139        }
140        None => false,
141    }
142}
143
144pub fn clear_cli_llm_mock_mode() {
145    CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Off);
146    CLI_LLM_MOCKS.with(|v| v.borrow_mut().clear());
147    CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
148}
149
150pub fn install_cli_llm_mocks(mocks: Vec<LlmMock>) {
151    CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Replay);
152    CLI_LLM_MOCKS.with(|v| *v.borrow_mut() = mocks);
153    CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
154}
155
156pub fn enable_cli_llm_mock_recording() {
157    CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Record);
158    CLI_LLM_MOCKS.with(|v| v.borrow_mut().clear());
159    CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
160}
161
162pub fn take_cli_llm_recordings() -> Vec<LlmMock> {
163    CLI_LLM_RECORDINGS.with(|v| std::mem::take(&mut *v.borrow_mut()))
164}
165
166pub(crate) fn cli_llm_mock_replay_active() -> bool {
167    CLI_LLM_MOCK_MODE.with(|v| *v.borrow() == CliLlmMockMode::Replay)
168}
169
170fn record_llm_mock_call(
171    messages: &[serde_json::Value],
172    system: Option<&str>,
173    native_tools: Option<&[serde_json::Value]>,
174    thinking: &super::api::ThinkingConfig,
175) {
176    LLM_MOCK_CALLS.with(|v| {
177        v.borrow_mut().push(LlmMockCall {
178            messages: messages.to_vec(),
179            system: system.map(|s| s.to_string()),
180            tools: native_tools.map(|t| t.to_vec()),
181            thinking: serde_json::to_value(thinking).unwrap_or_else(|_| {
182                serde_json::json!({
183                    "mode": "disabled"
184                })
185            }),
186        });
187    });
188}
189
190/// Build an LlmResult from a matched mock.
191fn build_mock_result(mock: &LlmMock, last_msg_len: usize) -> LlmResult {
192    let (tool_calls, blocks) = if let Some(blocks) = &mock.blocks {
193        (mock.tool_calls.clone(), blocks.clone())
194    } else {
195        let mut blocks = Vec::new();
196
197        if !mock.text.is_empty() {
198            blocks.push(serde_json::json!({
199                "type": "output_text",
200                "text": mock.text,
201                "visibility": "public",
202            }));
203        }
204
205        let mut tool_calls = Vec::new();
206        for (i, tc) in mock.tool_calls.iter().enumerate() {
207            let id = format!("mock_call_{}", i + 1);
208            let name = tc.get("name").and_then(|n| n.as_str()).unwrap_or("unknown");
209            let arguments = tc
210                .get("arguments")
211                .cloned()
212                .unwrap_or(serde_json::json!({}));
213            tool_calls.push(serde_json::json!({
214                "id": id,
215                "type": "tool_call",
216                "name": name,
217                "arguments": arguments,
218            }));
219            blocks.push(serde_json::json!({
220                "type": "tool_call",
221                "id": id,
222                "name": name,
223                "arguments": arguments,
224                "visibility": "internal",
225            }));
226        }
227
228        (tool_calls, blocks)
229    };
230
231    LlmResult {
232        text: mock.text.clone(),
233        tool_calls,
234        input_tokens: mock.input_tokens.unwrap_or(last_msg_len as i64),
235        output_tokens: mock.output_tokens.unwrap_or(30),
236        cache_read_tokens: mock.cache_read_tokens.unwrap_or(0),
237        cache_write_tokens: mock.cache_write_tokens.unwrap_or(0),
238        model: mock.model.clone(),
239        provider: mock.provider.clone().unwrap_or_else(|| "mock".to_string()),
240        thinking: mock.thinking.clone(),
241        thinking_summary: mock.thinking_summary.clone(),
242        stop_reason: mock.stop_reason.clone(),
243        blocks,
244    }
245}
246
247/// Multi-segment glob match: split on `*` and check segments appear in order.
248/// Handles `*`, `prefix*`, `*suffix`, `*contains*`, `pre*mid*suf`, etc.
249fn mock_glob_match(pattern: &str, text: &str) -> bool {
250    if pattern == "*" {
251        return true;
252    }
253    if !pattern.contains('*') {
254        return pattern == text;
255    }
256    let parts: Vec<&str> = pattern.split('*').collect();
257    let mut remaining = text;
258    for (i, part) in parts.iter().enumerate() {
259        if part.is_empty() {
260            continue;
261        }
262        if i == 0 {
263            if !remaining.starts_with(part) {
264                return false;
265            }
266            remaining = &remaining[part.len()..];
267        } else if i == parts.len() - 1 {
268            if !remaining.ends_with(part) {
269                return false;
270            }
271            remaining = "";
272        } else {
273            match remaining.find(part) {
274                Some(pos) => remaining = &remaining[pos + part.len()..],
275                None => return false,
276            }
277        }
278    }
279    true
280}
281
282fn collect_mock_match_strings(value: &serde_json::Value, out: &mut Vec<String>) {
283    match value {
284        serde_json::Value::String(text) if !text.is_empty() => out.push(text.clone()),
285        serde_json::Value::String(_) => {}
286        serde_json::Value::Array(items) => {
287            for item in items {
288                collect_mock_match_strings(item, out);
289            }
290        }
291        serde_json::Value::Object(map) => {
292            for value in map.values() {
293                collect_mock_match_strings(value, out);
294            }
295        }
296        _ => {}
297    }
298}
299
300fn mock_match_text(messages: &[serde_json::Value]) -> String {
301    let mut parts = Vec::new();
302    for message in messages {
303        collect_mock_match_strings(message, &mut parts);
304    }
305    parts.join("\n")
306}
307
308fn mock_last_prompt_text(messages: &[serde_json::Value]) -> String {
309    for message in messages.iter().rev() {
310        let Some(content) = message.get("content") else {
311            continue;
312        };
313        let mut parts = Vec::new();
314        collect_mock_match_strings(content, &mut parts);
315        let text = parts.join("\n");
316        if !text.trim().is_empty() {
317            return text;
318        }
319    }
320    String::new()
321}
322
323fn mock_prompt_cache_key(
324    model: &str,
325    messages: &[serde_json::Value],
326    system: Option<&str>,
327) -> String {
328    serde_json::to_string(&serde_json::json!({
329        "model": model,
330        "system": system,
331        "messages": messages,
332    }))
333    .unwrap_or_default()
334}
335
336fn apply_mock_prompt_cache(result: &mut LlmResult, cache_key: &str) {
337    if result.cache_read_tokens > 0 || result.cache_write_tokens > 0 {
338        return;
339    }
340    let cache_tokens = result.input_tokens.max(0);
341    if cache_tokens == 0 {
342        return;
343    }
344    let cache_hit = LLM_PROMPT_CACHE.with(|cache| {
345        let mut cache = cache.borrow_mut();
346        if cache.contains(cache_key) {
347            true
348        } else {
349            cache.insert(cache_key.to_string());
350            false
351        }
352    });
353    if cache_hit {
354        result.cache_read_tokens = cache_tokens;
355    } else {
356        result.cache_write_tokens = cache_tokens;
357    }
358}
359
360/// Convert a mock's `error` payload into the `VmError` that the
361/// provider path would have raised, so classification, retry, and
362/// `error_category` all behave identically to a real failure.
363fn mock_error_to_vm_error(err: &MockError) -> VmError {
364    // Embed `retry_after_ms` as a synthetic `retry-after:` header on
365    // the message so `agent_observe::extract_retry_after_ms` — the
366    // same parser that handles real HTTP 429s — surfaces the value
367    // on the caller's thrown dict. Keeps the mock path byte-for-byte
368    // compatible with a real rate-limit response.
369    let message = match err.retry_after_ms {
370        Some(ms) => {
371            let secs = (ms as f64 / 1000.0).max(0.0);
372            let sep = if err.message.is_empty() || err.message.ends_with('\n') {
373                ""
374            } else {
375                "\n"
376            };
377            format!("{}{sep}retry-after: {secs}\n", err.message)
378        }
379        None => err.message.clone(),
380    };
381    VmError::CategorizedError {
382        message,
383        category: err.category.clone(),
384    }
385}
386
387/// Try to find and return a matching mock response. Returns
388/// `Some(Ok(LlmResult))` on a text/tool_call match, `Some(Err(VmError))`
389/// on an error-mock match, and `None` to fall through to default.
390fn try_match_mock_queue(
391    mocks: &mut Vec<LlmMock>,
392    match_text: &str,
393) -> Option<Result<LlmResult, VmError>> {
394    if let Some(idx) = mocks.iter().position(|m| m.match_pattern.is_none()) {
395        let mock = mocks.remove(idx);
396        return Some(match &mock.error {
397            Some(err) => Err(mock_error_to_vm_error(err)),
398            None => Ok(build_mock_result(&mock, match_text.len())),
399        });
400    }
401
402    for idx in 0..mocks.len() {
403        let mock = &mocks[idx];
404        if let Some(ref pattern) = mock.match_pattern {
405            if mock_glob_match(pattern, match_text) {
406                if mock.consume_on_match {
407                    let mock = mocks.remove(idx);
408                    return Some(match &mock.error {
409                        Some(err) => Err(mock_error_to_vm_error(err)),
410                        None => Ok(build_mock_result(&mock, match_text.len())),
411                    });
412                }
413                return Some(match &mock.error {
414                    Some(err) => Err(mock_error_to_vm_error(err)),
415                    None => Ok(build_mock_result(mock, match_text.len())),
416                });
417            }
418        }
419    }
420
421    None
422}
423
424fn try_match_builtin_mock(match_text: &str) -> Option<Result<LlmResult, VmError>> {
425    LLM_MOCKS.with(|mocks| try_match_mock_queue(&mut mocks.borrow_mut(), match_text))
426}
427
428fn try_match_cli_mock(match_text: &str) -> Option<Result<LlmResult, VmError>> {
429    CLI_LLM_MOCKS.with(|mocks| try_match_mock_queue(&mut mocks.borrow_mut(), match_text))
430}
431
432pub(crate) fn record_cli_llm_result(result: &LlmResult) {
433    if !CLI_LLM_MOCK_MODE.with(|mode| *mode.borrow() == CliLlmMockMode::Record) {
434        return;
435    }
436    CLI_LLM_RECORDINGS.with(|recordings| {
437        recordings.borrow_mut().push(LlmMock {
438            text: result.text.clone(),
439            tool_calls: result.tool_calls.clone(),
440            match_pattern: None,
441            consume_on_match: false,
442            input_tokens: Some(result.input_tokens),
443            output_tokens: Some(result.output_tokens),
444            cache_read_tokens: Some(result.cache_read_tokens),
445            cache_write_tokens: Some(result.cache_write_tokens),
446            thinking: result.thinking.clone(),
447            thinking_summary: result.thinking_summary.clone(),
448            stop_reason: result.stop_reason.clone(),
449            model: result.model.clone(),
450            provider: Some(result.provider.clone()),
451            blocks: Some(result.blocks.clone()),
452            error: None,
453        });
454    });
455}
456
457fn unmatched_cli_prompt_error(match_text: &str) -> VmError {
458    let mut snippet: String = match_text.chars().take(200).collect();
459    if match_text.chars().count() > 200 {
460        snippet.push_str("...");
461    }
462    VmError::Runtime(format!("No --llm-mock fixture matched prompt: {snippet:?}"))
463}
464
465/// Set LLM replay mode (record/replay) and fixture directory.
466pub fn set_replay_mode(mode: LlmReplayMode, fixture_dir: &str) {
467    LLM_REPLAY_MODE.with(|v| *v.borrow_mut() = mode);
468    LLM_FIXTURE_DIR.with(|v| *v.borrow_mut() = fixture_dir.to_string());
469}
470
471pub(crate) fn get_replay_mode() -> LlmReplayMode {
472    LLM_REPLAY_MODE.with(|v| *v.borrow())
473}
474
475pub(crate) fn get_fixture_dir() -> String {
476    LLM_FIXTURE_DIR.with(|v| v.borrow().clone())
477}
478
479/// Hash a request for fixture file naming using canonical JSON serialization.
480pub(crate) fn fixture_hash(
481    model: &str,
482    messages: &[serde_json::Value],
483    system: Option<&str>,
484) -> String {
485    use std::hash::{Hash, Hasher};
486    let mut hasher = std::collections::hash_map::DefaultHasher::new();
487    model.hash(&mut hasher);
488    // Canonical JSON hashing is stable across Debug-format changes.
489    serde_json::to_string(messages)
490        .unwrap_or_default()
491        .hash(&mut hasher);
492    system.hash(&mut hasher);
493    format!("{:016x}", hasher.finish())
494}
495
496pub(crate) fn save_fixture(hash: &str, result: &LlmResult) {
497    let dir = get_fixture_dir();
498    if dir.is_empty() {
499        return;
500    }
501    let _ = std::fs::create_dir_all(&dir);
502    let path = format!("{dir}/{hash}.json");
503    let json = serde_json::json!({
504        "text": result.text,
505        "tool_calls": result.tool_calls,
506        "input_tokens": result.input_tokens,
507        "output_tokens": result.output_tokens,
508        "cache_read_tokens": result.cache_read_tokens,
509        "cache_write_tokens": result.cache_write_tokens,
510        "cache_creation_input_tokens": result.cache_write_tokens,
511        "model": result.model,
512        "provider": result.provider,
513        "thinking": result.thinking,
514        "thinking_summary": result.thinking_summary,
515        "stop_reason": result.stop_reason,
516        "blocks": result.blocks,
517    });
518    let _ = std::fs::write(
519        &path,
520        serde_json::to_string_pretty(&json).unwrap_or_default(),
521    );
522}
523
524pub(crate) fn load_fixture(hash: &str) -> Option<LlmResult> {
525    let dir = get_fixture_dir();
526    if dir.is_empty() {
527        return None;
528    }
529    let path = format!("{dir}/{hash}.json");
530    let content = std::fs::read_to_string(&path).ok()?;
531    let json: serde_json::Value = serde_json::from_str(&content).ok()?;
532    Some(LlmResult {
533        text: json["text"].as_str().unwrap_or("").to_string(),
534        tool_calls: json["tool_calls"].as_array().cloned().unwrap_or_default(),
535        input_tokens: json["input_tokens"].as_i64().unwrap_or(0),
536        output_tokens: json["output_tokens"].as_i64().unwrap_or(0),
537        cache_read_tokens: json["cache_read_tokens"].as_i64().unwrap_or(0),
538        cache_write_tokens: json["cache_write_tokens"]
539            .as_i64()
540            .or_else(|| json["cache_creation_input_tokens"].as_i64())
541            .unwrap_or(0),
542        model: json["model"].as_str().unwrap_or("").to_string(),
543        provider: json["provider"].as_str().unwrap_or("mock").to_string(),
544        thinking: json["thinking"].as_str().map(|s| s.to_string()),
545        thinking_summary: json["thinking_summary"].as_str().map(|s| s.to_string()),
546        stop_reason: json["stop_reason"].as_str().map(|s| s.to_string()),
547        blocks: json["blocks"].as_array().cloned().unwrap_or_default(),
548    })
549}
550
551/// Generate stub argument values for required parameters in a tool schema.
552/// This makes mock tool calls realistic — a real model would always fill
553/// required fields, so the mock should too.
554fn mock_required_args(tool_schema: &serde_json::Value) -> serde_json::Value {
555    let mut args = serde_json::Map::new();
556    // Anthropic: {name, input_schema: {properties, required}}
557    // OpenAI:    {function: {name, parameters: {properties, required}}}
558    // Harn VM:   {parameters: {name: {type, required}}}  (from tool_define)
559    let input_schema = tool_schema
560        .get("input_schema")
561        .or_else(|| tool_schema.get("inputSchema"))
562        .or_else(|| {
563            tool_schema
564                .get("function")
565                .and_then(|f| f.get("parameters"))
566        })
567        .or_else(|| tool_schema.get("parameters"));
568    let Some(schema) = input_schema else {
569        return serde_json::Value::Object(args);
570    };
571    let required: std::collections::BTreeSet<String> = schema
572        .get("required")
573        .and_then(|r| r.as_array())
574        .map(|arr| {
575            arr.iter()
576                .filter_map(|v| v.as_str().map(|s| s.to_string()))
577                .collect()
578        })
579        .unwrap_or_default();
580    if let Some(props) = schema.get("properties").and_then(|p| p.as_object()) {
581        for (name, prop) in props {
582            if !required.contains(name) {
583                continue;
584            }
585            let ty = prop
586                .get("type")
587                .and_then(|t| t.as_str())
588                .unwrap_or("string");
589            let placeholder = match ty {
590                "integer" => serde_json::json!(0),
591                "number" => serde_json::json!(0.0),
592                "boolean" => serde_json::json!(false),
593                "array" => serde_json::json!([]),
594                "object" => serde_json::json!({}),
595                _ => serde_json::json!(""),
596            };
597            args.insert(name.clone(), placeholder);
598        }
599    }
600    serde_json::Value::Object(args)
601}
602
603/// Mock LLM provider -- deterministic responses for testing without API keys.
604/// When configurable mocks have been registered via `llm_mock()`, those are
605/// checked first (FIFO queue, then pattern matching). Falls through to the
606/// default deterministic behavior when no mocks match.
607pub(crate) fn mock_llm_response(
608    messages: &[serde_json::Value],
609    system: Option<&str>,
610    native_tools: Option<&[serde_json::Value]>,
611    thinking: &super::api::ThinkingConfig,
612    model: &str,
613    cache: bool,
614) -> Result<LlmResult, VmError> {
615    record_llm_mock_call(messages, system, native_tools, thinking);
616
617    let match_text = mock_match_text(messages);
618    let prompt_text = mock_last_prompt_text(messages);
619    let cache_key = mock_prompt_cache_key(model, messages, system);
620
621    if let Some(matched) = try_match_cli_mock(&match_text) {
622        return matched.map(|mut result| {
623            if cache {
624                apply_mock_prompt_cache(&mut result, &cache_key);
625            }
626            result
627        });
628    }
629
630    if let Some(matched) = try_match_builtin_mock(&match_text) {
631        return matched.map(|mut result| {
632            if cache {
633                apply_mock_prompt_cache(&mut result, &cache_key);
634            }
635            result
636        });
637    }
638
639    if cli_llm_mock_replay_active() {
640        return Err(unmatched_cli_prompt_error(&match_text));
641    }
642
643    // Generate a mock tool call for the first tool, filling required
644    // params with placeholders so the call passes schema validation.
645    if let Some(tools) = native_tools {
646        if let Some(first_tool) = tools.first() {
647            let tool_name = first_tool
648                .get("name")
649                .or_else(|| first_tool.get("function").and_then(|f| f.get("name")))
650                .and_then(|n| n.as_str())
651                .unwrap_or("unknown");
652            let mock_args = mock_required_args(first_tool);
653            let mut result = LlmResult {
654                text: String::new(),
655                tool_calls: vec![serde_json::json!({
656                        "id": "mock_call_1",
657                        "type": "tool_call",
658                        "name": tool_name,
659                "arguments": mock_args
660                })],
661                input_tokens: prompt_text.len() as i64,
662                output_tokens: 20,
663                cache_read_tokens: 0,
664                cache_write_tokens: 0,
665                model: model.to_string(),
666                provider: "mock".to_string(),
667                thinking: None,
668                thinking_summary: None,
669                stop_reason: None,
670                blocks: vec![serde_json::json!({
671                    "type": "tool_call",
672                    "id": "mock_call_1",
673                    "name": tool_name,
674                    "arguments": mock_args,
675                    "visibility": "internal",
676                })],
677            };
678            if cache {
679                apply_mock_prompt_cache(&mut result, &cache_key);
680            }
681            return Ok(result);
682        }
683    }
684
685    // Preserve the historical auto-complete behavior for tagged text-tool
686    // prompts only. Bare `##DONE##` in no-tool/native prompts changes
687    // loop semantics by completing runs that used to exhaust budget unless
688    // a fixture explicitly returned the sentinel.
689    let tagged_done = system.is_some_and(|s| s.contains("<done>"));
690
691    let prose_body = if prompt_text.is_empty() {
692        "Mock LLM response".to_string()
693    } else {
694        let word_count = prompt_text.split_whitespace().count();
695        format!(
696            "Mock response to {word_count}-word prompt: {}",
697            prompt_text.chars().take(100).collect::<String>()
698        )
699    };
700    let response = if tagged_done {
701        format!("<assistant_prose>{prose_body}</assistant_prose>\n<done>##DONE##</done>")
702    } else {
703        prose_body
704    };
705
706    let mut result = LlmResult {
707        text: response.clone(),
708        tool_calls: vec![],
709        input_tokens: prompt_text.len() as i64,
710        output_tokens: 30,
711        cache_read_tokens: 0,
712        cache_write_tokens: 0,
713        model: model.to_string(),
714        provider: "mock".to_string(),
715        thinking: None,
716        thinking_summary: None,
717        stop_reason: None,
718        blocks: vec![serde_json::json!({
719            "type": "output_text",
720            "text": response,
721            "visibility": "public",
722        })],
723    };
724    if cache {
725        apply_mock_prompt_cache(&mut result, &cache_key);
726    }
727    Ok(result)
728}
729
730pub fn set_tool_recording_mode(mode: ToolRecordingMode) {
731    TOOL_RECORDING_MODE.with(|v| *v.borrow_mut() = mode);
732}
733
734pub(crate) fn get_tool_recording_mode() -> ToolRecordingMode {
735    TOOL_RECORDING_MODE.with(|v| *v.borrow())
736}
737
738/// Append a tool call record during recording mode.
739pub(crate) fn record_tool_call(record: ToolCallRecord) {
740    TOOL_RECORDINGS.with(|v| v.borrow_mut().push(record));
741}
742
743/// Take all recorded tool calls, leaving the buffer empty.
744pub fn drain_tool_recordings() -> Vec<ToolCallRecord> {
745    TOOL_RECORDINGS.with(|v| std::mem::take(&mut *v.borrow_mut()))
746}
747
748/// Load tool call fixtures for replay mode.
749pub fn load_tool_replay_fixtures(records: Vec<ToolCallRecord>) {
750    TOOL_REPLAY_FIXTURES.with(|v| *v.borrow_mut() = records);
751}
752
753/// Look up a recorded fixture by tool name + args hash.
754pub(crate) fn find_tool_replay_fixture(
755    tool_name: &str,
756    args: &serde_json::Value,
757) -> Option<ToolCallRecord> {
758    let hash = crate::orchestration::tool_fixture_hash(tool_name, args);
759    TOOL_REPLAY_FIXTURES.with(|v| {
760        v.borrow()
761            .iter()
762            .find(|r| r.tool_name == tool_name && r.args_hash == hash)
763            .cloned()
764    })
765}