Skip to main content

harn_vm/llm/
mock.rs

1use std::cell::RefCell;
2use std::collections::BTreeSet;
3
4use super::api::LlmResult;
5use crate::orchestration::ToolCallRecord;
6use crate::value::{ErrorCategory, VmError};
7
8/// LLM replay mode.
9#[derive(Debug, Clone, Copy, PartialEq)]
10pub enum LlmReplayMode {
11    Off,
12    Record,
13    Replay,
14}
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17enum CliLlmMockMode {
18    Off,
19    Replay,
20    Record,
21}
22
23/// Categorized error injected by a mock. When present, the mock
24/// short-circuits the provider call and surfaces as
25/// `VmError::CategorizedError`, so `llm_call` throws and
26/// `llm_call_safe` populates its `error` envelope.
27#[derive(Clone)]
28pub struct MockError {
29    pub category: ErrorCategory,
30    pub message: String,
31    /// Optional hint echoed into the error message as a synthetic
32    /// `retry-after:` header so the existing `extract_retry_after_ms`
33    /// parser recovers it — matches how real provider errors embed
34    /// the value. Lets tests assert that `e.retry_after_ms` flows
35    /// end-to-end on the thrown dict.
36    pub retry_after_ms: Option<u64>,
37}
38
39#[derive(Clone)]
40pub struct LlmMock {
41    pub text: String,
42    pub tool_calls: Vec<serde_json::Value>,
43    pub match_pattern: Option<String>, // None = FIFO (consumed), Some = glob (reusable)
44    pub consume_on_match: bool,
45    pub input_tokens: Option<i64>,
46    pub output_tokens: Option<i64>,
47    pub cache_read_tokens: Option<i64>,
48    pub cache_write_tokens: Option<i64>,
49    pub thinking: Option<String>,
50    pub thinking_summary: Option<String>,
51    pub stop_reason: Option<String>,
52    pub model: String,
53    pub provider: Option<String>,
54    pub blocks: Option<Vec<serde_json::Value>>,
55    /// When `Some`, this mock synthesizes an error instead of an
56    /// `LlmResult`. `text`/`tool_calls` are ignored for error mocks.
57    pub error: Option<MockError>,
58}
59
60#[derive(Clone)]
61pub(crate) struct LlmMockCall {
62    pub messages: Vec<serde_json::Value>,
63    pub system: Option<String>,
64    pub tools: Option<Vec<serde_json::Value>>,
65    pub tool_choice: Option<serde_json::Value>,
66    pub thinking: serde_json::Value,
67}
68
69type LlmMockScope = (Vec<LlmMock>, Vec<LlmMockCall>, BTreeSet<String>);
70
71thread_local! {
72    static LLM_REPLAY_MODE: RefCell<LlmReplayMode> = const { RefCell::new(LlmReplayMode::Off) };
73    static LLM_FIXTURE_DIR: RefCell<String> = const { RefCell::new(String::new()) };
74    static TOOL_RECORDINGS: RefCell<Vec<ToolCallRecord>> = const { RefCell::new(Vec::new()) };
75    static LLM_MOCKS: RefCell<Vec<LlmMock>> = const { RefCell::new(Vec::new()) };
76    static CLI_LLM_MOCK_MODE: RefCell<CliLlmMockMode> = const { RefCell::new(CliLlmMockMode::Off) };
77    static CLI_LLM_MOCKS: RefCell<Vec<LlmMock>> = const { RefCell::new(Vec::new()) };
78    static CLI_LLM_RECORDINGS: RefCell<Vec<LlmMock>> = const { RefCell::new(Vec::new()) };
79    static LLM_MOCK_CALLS: RefCell<Vec<LlmMockCall>> = const { RefCell::new(Vec::new()) };
80    static LLM_PROMPT_CACHE: RefCell<BTreeSet<String>> = const { RefCell::new(BTreeSet::new()) };
81    static LLM_MOCK_SCOPES: RefCell<Vec<LlmMockScope>> = const { RefCell::new(Vec::new()) };
82}
83
84pub(crate) fn push_llm_mock(mock: LlmMock) {
85    LLM_MOCKS.with(|v| v.borrow_mut().push(mock));
86}
87
88pub(crate) fn get_llm_mock_calls() -> Vec<LlmMockCall> {
89    LLM_MOCK_CALLS.with(|v| v.borrow().clone())
90}
91
92pub(crate) fn builtin_llm_mock_active() -> bool {
93    LLM_MOCKS.with(|v| !v.borrow().is_empty())
94}
95
96pub(crate) fn reset_llm_mock_state() {
97    LLM_MOCKS.with(|v| v.borrow_mut().clear());
98    CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Off);
99    CLI_LLM_MOCKS.with(|v| v.borrow_mut().clear());
100    CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
101    LLM_MOCK_CALLS.with(|v| v.borrow_mut().clear());
102    LLM_PROMPT_CACHE.with(|v| v.borrow_mut().clear());
103    LLM_MOCK_SCOPES.with(|v| v.borrow_mut().clear());
104}
105
106/// Save the current builtin LLM mock queue and recorded-calls list, then
107/// start a fresh empty scope. Paired with `pop_llm_mock_scope`. Backs
108/// the `with_llm_mocks` helper in `std/testing` so tests reliably
109/// roll back to the prior state, including when the body throws.
110pub(crate) fn push_llm_mock_scope() {
111    let mocks = LLM_MOCKS.with(|v| std::mem::take(&mut *v.borrow_mut()));
112    let calls = LLM_MOCK_CALLS.with(|v| std::mem::take(&mut *v.borrow_mut()));
113    let cache = LLM_PROMPT_CACHE.with(|v| std::mem::take(&mut *v.borrow_mut()));
114    LLM_MOCK_SCOPES.with(|v| v.borrow_mut().push((mocks, calls, cache)));
115}
116
117/// Restore the most recently pushed builtin LLM mock scope. Returns
118/// `false` when there is nothing to pop, so the builtin can surface a
119/// clear "imbalanced scope" error rather than silently corrupting
120/// state. CLI-installed mocks are intentionally untouched: they are an
121/// outer harness and should not flicker on each per-test scope swap.
122pub(crate) fn pop_llm_mock_scope() -> bool {
123    let entry = LLM_MOCK_SCOPES.with(|v| v.borrow_mut().pop());
124    match entry {
125        Some((mocks, calls, cache)) => {
126            LLM_MOCKS.with(|v| *v.borrow_mut() = mocks);
127            LLM_MOCK_CALLS.with(|v| *v.borrow_mut() = calls);
128            LLM_PROMPT_CACHE.with(|v| *v.borrow_mut() = cache);
129            true
130        }
131        None => false,
132    }
133}
134
135pub fn clear_cli_llm_mock_mode() {
136    CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Off);
137    CLI_LLM_MOCKS.with(|v| v.borrow_mut().clear());
138    CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
139}
140
141pub fn install_cli_llm_mocks(mocks: Vec<LlmMock>) {
142    CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Replay);
143    CLI_LLM_MOCKS.with(|v| *v.borrow_mut() = mocks);
144    CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
145}
146
147pub fn enable_cli_llm_mock_recording() {
148    CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Record);
149    CLI_LLM_MOCKS.with(|v| v.borrow_mut().clear());
150    CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
151}
152
153pub fn take_cli_llm_recordings() -> Vec<LlmMock> {
154    CLI_LLM_RECORDINGS.with(|v| std::mem::take(&mut *v.borrow_mut()))
155}
156
157pub(crate) fn cli_llm_mock_replay_active() -> bool {
158    CLI_LLM_MOCK_MODE.with(|v| *v.borrow() == CliLlmMockMode::Replay)
159}
160
161fn record_llm_mock_call(
162    messages: &[serde_json::Value],
163    system: Option<&str>,
164    native_tools: Option<&[serde_json::Value]>,
165    tool_choice: Option<&serde_json::Value>,
166    thinking: &super::api::ThinkingConfig,
167) {
168    LLM_MOCK_CALLS.with(|v| {
169        v.borrow_mut().push(LlmMockCall {
170            messages: messages.to_vec(),
171            system: system.map(|s| s.to_string()),
172            tools: native_tools.map(|t| t.to_vec()),
173            tool_choice: tool_choice.cloned(),
174            thinking: serde_json::to_value(thinking).unwrap_or_else(|_| {
175                serde_json::json!({
176                    "mode": "disabled"
177                })
178            }),
179        });
180    });
181}
182
183/// Build an LlmResult from a matched mock.
184fn build_mock_result(mock: &LlmMock, last_msg_len: usize) -> LlmResult {
185    let (tool_calls, blocks) = if let Some(blocks) = &mock.blocks {
186        (mock.tool_calls.clone(), blocks.clone())
187    } else {
188        let mut blocks = Vec::new();
189
190        if !mock.text.is_empty() {
191            blocks.push(serde_json::json!({
192                "type": "output_text",
193                "text": mock.text,
194                "visibility": "public",
195            }));
196        }
197
198        let mut tool_calls = Vec::new();
199        for (i, tc) in mock.tool_calls.iter().enumerate() {
200            let id = format!("mock_call_{}", i + 1);
201            let name = tc.get("name").and_then(|n| n.as_str()).unwrap_or("unknown");
202            let arguments = tc
203                .get("arguments")
204                .cloned()
205                .unwrap_or(serde_json::json!({}));
206            tool_calls.push(serde_json::json!({
207                "id": id,
208                "type": "tool_call",
209                "name": name,
210                "arguments": arguments,
211            }));
212            blocks.push(serde_json::json!({
213                "type": "tool_call",
214                "id": id,
215                "name": name,
216                "arguments": arguments,
217                "visibility": "internal",
218            }));
219        }
220
221        (tool_calls, blocks)
222    };
223
224    LlmResult {
225        text: mock.text.clone(),
226        tool_calls,
227        input_tokens: mock.input_tokens.unwrap_or(last_msg_len as i64),
228        output_tokens: mock.output_tokens.unwrap_or(30),
229        cache_read_tokens: mock.cache_read_tokens.unwrap_or(0),
230        cache_write_tokens: mock.cache_write_tokens.unwrap_or(0),
231        model: mock.model.clone(),
232        provider: mock.provider.clone().unwrap_or_else(|| "mock".to_string()),
233        thinking: mock.thinking.clone(),
234        thinking_summary: mock.thinking_summary.clone(),
235        stop_reason: mock.stop_reason.clone(),
236        blocks,
237    }
238}
239
240/// Multi-segment glob match: split on `*` and check segments appear in order.
241/// Handles `*`, `prefix*`, `*suffix`, `*contains*`, `pre*mid*suf`, etc.
242fn mock_glob_match(pattern: &str, text: &str) -> bool {
243    if pattern == "*" {
244        return true;
245    }
246    if !pattern.contains('*') {
247        return pattern == text;
248    }
249    let parts: Vec<&str> = pattern.split('*').collect();
250    let mut remaining = text;
251    for (i, part) in parts.iter().enumerate() {
252        if part.is_empty() {
253            continue;
254        }
255        if i == 0 {
256            if !remaining.starts_with(part) {
257                return false;
258            }
259            remaining = &remaining[part.len()..];
260        } else if i == parts.len() - 1 {
261            if !remaining.ends_with(part) {
262                return false;
263            }
264            remaining = "";
265        } else {
266            match remaining.find(part) {
267                Some(pos) => remaining = &remaining[pos + part.len()..],
268                None => return false,
269            }
270        }
271    }
272    true
273}
274
275fn collect_mock_match_strings(value: &serde_json::Value, out: &mut Vec<String>) {
276    match value {
277        serde_json::Value::String(text) if !text.is_empty() => out.push(text.clone()),
278        serde_json::Value::String(_) => {}
279        serde_json::Value::Array(items) => {
280            for item in items {
281                collect_mock_match_strings(item, out);
282            }
283        }
284        serde_json::Value::Object(map) => {
285            for value in map.values() {
286                collect_mock_match_strings(value, out);
287            }
288        }
289        _ => {}
290    }
291}
292
293fn mock_match_text(messages: &[serde_json::Value]) -> String {
294    let mut parts = Vec::new();
295    for message in messages {
296        collect_mock_match_strings(message, &mut parts);
297    }
298    parts.join("\n")
299}
300
301fn mock_last_prompt_text(messages: &[serde_json::Value]) -> String {
302    for message in messages.iter().rev() {
303        let Some(content) = message.get("content") else {
304            continue;
305        };
306        let mut parts = Vec::new();
307        collect_mock_match_strings(content, &mut parts);
308        let text = parts.join("\n");
309        if !text.trim().is_empty() {
310            return text;
311        }
312    }
313    String::new()
314}
315
316fn mock_prompt_cache_key(
317    model: &str,
318    messages: &[serde_json::Value],
319    system: Option<&str>,
320) -> String {
321    serde_json::to_string(&serde_json::json!({
322        "model": model,
323        "system": system,
324        "messages": messages,
325    }))
326    .unwrap_or_default()
327}
328
329fn apply_mock_prompt_cache(result: &mut LlmResult, cache_key: &str) {
330    if result.cache_read_tokens > 0 || result.cache_write_tokens > 0 {
331        return;
332    }
333    let cache_tokens = result.input_tokens.max(0);
334    if cache_tokens == 0 {
335        return;
336    }
337    let cache_hit = LLM_PROMPT_CACHE.with(|cache| {
338        let mut cache = cache.borrow_mut();
339        if cache.contains(cache_key) {
340            true
341        } else {
342            cache.insert(cache_key.to_string());
343            false
344        }
345    });
346    if cache_hit {
347        result.cache_read_tokens = cache_tokens;
348    } else {
349        result.cache_write_tokens = cache_tokens;
350    }
351}
352
353/// Convert a mock's `error` payload into the `VmError` that the
354/// provider path would have raised, so classification, retry, and
355/// `error_category` all behave identically to a real failure.
356fn mock_error_to_vm_error(err: &MockError) -> VmError {
357    // Embed `retry_after_ms` as a synthetic `retry-after:` header on
358    // the message so `agent_observe::extract_retry_after_ms` — the
359    // same parser that handles real HTTP 429s — surfaces the value
360    // on the caller's thrown dict. Keeps the mock path byte-for-byte
361    // compatible with a real rate-limit response.
362    let message = match err.retry_after_ms {
363        Some(ms) => {
364            let secs = (ms as f64 / 1000.0).max(0.0);
365            let sep = if err.message.is_empty() || err.message.ends_with('\n') {
366                ""
367            } else {
368                "\n"
369            };
370            format!("{}{sep}retry-after: {secs}\n", err.message)
371        }
372        None => err.message.clone(),
373    };
374    VmError::CategorizedError {
375        message,
376        category: err.category.clone(),
377    }
378}
379
380/// Try to find and return a matching mock response. Returns
381/// `Some(Ok(LlmResult))` on a text/tool_call match, `Some(Err(VmError))`
382/// on an error-mock match, and `None` to fall through to default.
383fn try_match_mock_queue(
384    mocks: &mut Vec<LlmMock>,
385    match_text: &str,
386) -> Option<Result<LlmResult, VmError>> {
387    if let Some(idx) = mocks.iter().position(|m| m.match_pattern.is_none()) {
388        let mock = mocks.remove(idx);
389        return Some(match &mock.error {
390            Some(err) => Err(mock_error_to_vm_error(err)),
391            None => Ok(build_mock_result(&mock, match_text.len())),
392        });
393    }
394
395    for idx in 0..mocks.len() {
396        let mock = &mocks[idx];
397        if let Some(ref pattern) = mock.match_pattern {
398            if mock_glob_match(pattern, match_text) {
399                if mock.consume_on_match {
400                    let mock = mocks.remove(idx);
401                    return Some(match &mock.error {
402                        Some(err) => Err(mock_error_to_vm_error(err)),
403                        None => Ok(build_mock_result(&mock, match_text.len())),
404                    });
405                }
406                return Some(match &mock.error {
407                    Some(err) => Err(mock_error_to_vm_error(err)),
408                    None => Ok(build_mock_result(mock, match_text.len())),
409                });
410            }
411        }
412    }
413
414    None
415}
416
417fn try_match_builtin_mock(match_text: &str) -> Option<Result<LlmResult, VmError>> {
418    LLM_MOCKS.with(|mocks| try_match_mock_queue(&mut mocks.borrow_mut(), match_text))
419}
420
421fn try_match_cli_mock(match_text: &str) -> Option<Result<LlmResult, VmError>> {
422    CLI_LLM_MOCKS.with(|mocks| try_match_mock_queue(&mut mocks.borrow_mut(), match_text))
423}
424
425pub(crate) fn record_cli_llm_result(result: &LlmResult) {
426    if !CLI_LLM_MOCK_MODE.with(|mode| *mode.borrow() == CliLlmMockMode::Record) {
427        return;
428    }
429    CLI_LLM_RECORDINGS.with(|recordings| {
430        recordings.borrow_mut().push(LlmMock {
431            text: result.text.clone(),
432            tool_calls: result.tool_calls.clone(),
433            match_pattern: None,
434            consume_on_match: false,
435            input_tokens: Some(result.input_tokens),
436            output_tokens: Some(result.output_tokens),
437            cache_read_tokens: Some(result.cache_read_tokens),
438            cache_write_tokens: Some(result.cache_write_tokens),
439            thinking: result.thinking.clone(),
440            thinking_summary: result.thinking_summary.clone(),
441            stop_reason: result.stop_reason.clone(),
442            model: result.model.clone(),
443            provider: Some(result.provider.clone()),
444            blocks: Some(result.blocks.clone()),
445            error: None,
446        });
447    });
448}
449
450fn unmatched_cli_prompt_error(match_text: &str) -> VmError {
451    let mut snippet: String = match_text.chars().take(200).collect();
452    if match_text.chars().count() > 200 {
453        snippet.push_str("...");
454    }
455    VmError::Runtime(format!("No --llm-mock fixture matched prompt: {snippet:?}"))
456}
457
458/// Set LLM replay mode (record/replay) and fixture directory.
459pub fn set_replay_mode(mode: LlmReplayMode, fixture_dir: &str) {
460    LLM_REPLAY_MODE.with(|v| *v.borrow_mut() = mode);
461    LLM_FIXTURE_DIR.with(|v| *v.borrow_mut() = fixture_dir.to_string());
462}
463
464pub(crate) fn get_replay_mode() -> LlmReplayMode {
465    LLM_REPLAY_MODE.with(|v| *v.borrow())
466}
467
468pub(crate) fn get_fixture_dir() -> String {
469    LLM_FIXTURE_DIR.with(|v| v.borrow().clone())
470}
471
472/// Hash a request for fixture file naming using canonical JSON serialization.
473pub(crate) fn fixture_hash(
474    model: &str,
475    messages: &[serde_json::Value],
476    system: Option<&str>,
477) -> String {
478    use std::hash::{Hash, Hasher};
479    let mut hasher = std::collections::hash_map::DefaultHasher::new();
480    model.hash(&mut hasher);
481    // Canonical JSON hashing is stable across Debug-format changes.
482    serde_json::to_string(messages)
483        .unwrap_or_default()
484        .hash(&mut hasher);
485    system.hash(&mut hasher);
486    format!("{:016x}", hasher.finish())
487}
488
489pub(crate) fn save_fixture(hash: &str, result: &LlmResult) {
490    let dir = get_fixture_dir();
491    if dir.is_empty() {
492        return;
493    }
494    let _ = std::fs::create_dir_all(&dir);
495    let path = format!("{dir}/{hash}.json");
496    let json = serde_json::json!({
497        "text": result.text,
498        "tool_calls": result.tool_calls,
499        "input_tokens": result.input_tokens,
500        "output_tokens": result.output_tokens,
501        "cache_read_tokens": result.cache_read_tokens,
502        "cache_write_tokens": result.cache_write_tokens,
503        "cache_creation_input_tokens": result.cache_write_tokens,
504        "model": result.model,
505        "provider": result.provider,
506        "thinking": result.thinking,
507        "thinking_summary": result.thinking_summary,
508        "stop_reason": result.stop_reason,
509        "blocks": result.blocks,
510    });
511    let _ = std::fs::write(
512        &path,
513        serde_json::to_string_pretty(&json).unwrap_or_default(),
514    );
515}
516
517pub(crate) fn load_fixture(hash: &str) -> Option<LlmResult> {
518    let dir = get_fixture_dir();
519    if dir.is_empty() {
520        return None;
521    }
522    let path = format!("{dir}/{hash}.json");
523    let content = std::fs::read_to_string(&path).ok()?;
524    let json: serde_json::Value = serde_json::from_str(&content).ok()?;
525    Some(LlmResult {
526        text: json["text"].as_str().unwrap_or("").to_string(),
527        tool_calls: json["tool_calls"].as_array().cloned().unwrap_or_default(),
528        input_tokens: json["input_tokens"].as_i64().unwrap_or(0),
529        output_tokens: json["output_tokens"].as_i64().unwrap_or(0),
530        cache_read_tokens: json["cache_read_tokens"].as_i64().unwrap_or(0),
531        cache_write_tokens: json["cache_write_tokens"]
532            .as_i64()
533            .or_else(|| json["cache_creation_input_tokens"].as_i64())
534            .unwrap_or(0),
535        model: json["model"].as_str().unwrap_or("").to_string(),
536        provider: json["provider"].as_str().unwrap_or("mock").to_string(),
537        thinking: json["thinking"].as_str().map(|s| s.to_string()),
538        thinking_summary: json["thinking_summary"].as_str().map(|s| s.to_string()),
539        stop_reason: json["stop_reason"].as_str().map(|s| s.to_string()),
540        blocks: json["blocks"].as_array().cloned().unwrap_or_default(),
541    })
542}
543
544/// Generate stub argument values for required parameters in a tool schema.
545/// This makes mock tool calls realistic — a real model would always fill
546/// required fields, so the mock should too.
547fn mock_required_args(tool_schema: &serde_json::Value) -> serde_json::Value {
548    let mut args = serde_json::Map::new();
549    // Anthropic: {name, input_schema: {properties, required}}
550    // OpenAI:    {function: {name, parameters: {properties, required}}}
551    // Harn VM:   {parameters: {name: {type, required}}}  (from tool_define)
552    let input_schema = tool_schema
553        .get("input_schema")
554        .or_else(|| tool_schema.get("inputSchema"))
555        .or_else(|| {
556            tool_schema
557                .get("function")
558                .and_then(|f| f.get("parameters"))
559        })
560        .or_else(|| tool_schema.get("parameters"));
561    let Some(schema) = input_schema else {
562        return serde_json::Value::Object(args);
563    };
564    let required: std::collections::BTreeSet<String> = schema
565        .get("required")
566        .and_then(|r| r.as_array())
567        .map(|arr| {
568            arr.iter()
569                .filter_map(|v| v.as_str().map(|s| s.to_string()))
570                .collect()
571        })
572        .unwrap_or_default();
573    if let Some(props) = schema.get("properties").and_then(|p| p.as_object()) {
574        for (name, prop) in props {
575            if !required.contains(name) {
576                continue;
577            }
578            let ty = prop
579                .get("type")
580                .and_then(|t| t.as_str())
581                .unwrap_or("string");
582            let placeholder = match ty {
583                "integer" => serde_json::json!(0),
584                "number" => serde_json::json!(0.0),
585                "boolean" => serde_json::json!(false),
586                "array" => serde_json::json!([]),
587                "object" => serde_json::json!({}),
588                _ => serde_json::json!(""),
589            };
590            args.insert(name.clone(), placeholder);
591        }
592    }
593    serde_json::Value::Object(args)
594}
595
596/// Mock LLM provider -- deterministic responses for testing without API keys.
597/// When configurable mocks have been registered via `llm_mock()`, those are
598/// checked first (FIFO queue, then pattern matching). Falls through to the
599/// default deterministic behavior when no mocks match.
600pub(crate) fn mock_llm_response(
601    messages: &[serde_json::Value],
602    system: Option<&str>,
603    native_tools: Option<&[serde_json::Value]>,
604    tool_choice: Option<&serde_json::Value>,
605    thinking: &super::api::ThinkingConfig,
606    model: &str,
607    cache: bool,
608) -> Result<LlmResult, VmError> {
609    record_llm_mock_call(messages, system, native_tools, tool_choice, thinking);
610
611    let match_text = mock_match_text(messages);
612    let prompt_text = mock_last_prompt_text(messages);
613    let cache_key = mock_prompt_cache_key(model, messages, system);
614
615    if let Some(matched) = try_match_cli_mock(&match_text) {
616        return matched.map(|mut result| {
617            if cache {
618                apply_mock_prompt_cache(&mut result, &cache_key);
619            }
620            result
621        });
622    }
623
624    if let Some(matched) = try_match_builtin_mock(&match_text) {
625        return matched.map(|mut result| {
626            if cache {
627                apply_mock_prompt_cache(&mut result, &cache_key);
628            }
629            result
630        });
631    }
632
633    if cli_llm_mock_replay_active() {
634        return Err(unmatched_cli_prompt_error(&match_text));
635    }
636
637    // Generate a mock tool call for the first tool, filling required
638    // params with placeholders so the call passes schema validation.
639    if let Some(tools) = native_tools {
640        if let Some(first_tool) = tools.first() {
641            let tool_name = first_tool
642                .get("name")
643                .or_else(|| first_tool.get("function").and_then(|f| f.get("name")))
644                .and_then(|n| n.as_str())
645                .unwrap_or("unknown");
646            let mock_args = mock_required_args(first_tool);
647            let mut result = LlmResult {
648                text: String::new(),
649                tool_calls: vec![serde_json::json!({
650                        "id": "mock_call_1",
651                        "type": "tool_call",
652                        "name": tool_name,
653                "arguments": mock_args
654                })],
655                input_tokens: prompt_text.len() as i64,
656                output_tokens: 20,
657                cache_read_tokens: 0,
658                cache_write_tokens: 0,
659                model: model.to_string(),
660                provider: "mock".to_string(),
661                thinking: None,
662                thinking_summary: None,
663                stop_reason: None,
664                blocks: vec![serde_json::json!({
665                    "type": "tool_call",
666                    "id": "mock_call_1",
667                    "name": tool_name,
668                    "arguments": mock_args,
669                    "visibility": "internal",
670                })],
671            };
672            if cache {
673                apply_mock_prompt_cache(&mut result, &cache_key);
674            }
675            return Ok(result);
676        }
677    }
678
679    // Preserve the historical auto-complete behavior for tagged text-tool
680    // prompts only. Bare `##DONE##` in no-tool/native prompts changes
681    // loop semantics by completing runs that used to exhaust budget unless
682    // a fixture explicitly returned the sentinel.
683    let tagged_done = system.is_some_and(|s| s.contains("<done>"));
684
685    let prose_body = if prompt_text.is_empty() {
686        "Mock LLM response".to_string()
687    } else {
688        let word_count = prompt_text.split_whitespace().count();
689        format!(
690            "Mock response to {word_count}-word prompt: {}",
691            prompt_text.chars().take(100).collect::<String>()
692        )
693    };
694    let response = if tagged_done {
695        format!("<assistant_prose>{prose_body}</assistant_prose>\n<done>##DONE##</done>")
696    } else {
697        prose_body
698    };
699
700    let mut result = LlmResult {
701        text: response.clone(),
702        tool_calls: vec![],
703        input_tokens: prompt_text.len() as i64,
704        output_tokens: 30,
705        cache_read_tokens: 0,
706        cache_write_tokens: 0,
707        model: model.to_string(),
708        provider: "mock".to_string(),
709        thinking: None,
710        thinking_summary: None,
711        stop_reason: None,
712        blocks: vec![serde_json::json!({
713            "type": "output_text",
714            "text": response,
715            "visibility": "public",
716        })],
717    };
718    if cache {
719        apply_mock_prompt_cache(&mut result, &cache_key);
720    }
721    Ok(result)
722}
723
724/// Take all recorded tool calls, leaving the buffer empty.
725pub fn drain_tool_recordings() -> Vec<ToolCallRecord> {
726    TOOL_RECORDINGS.with(|v| std::mem::take(&mut *v.borrow_mut()))
727}