Skip to main content

harn_vm/llm/
mock.rs

1use std::cell::RefCell;
2use std::collections::BTreeSet;
3
4use super::api::{LlmResult, ProviderTelemetry};
5use crate::orchestration::ToolCallRecord;
6use crate::value::{ErrorCategory, VmError};
7
8/// LLM replay mode.
9#[derive(Debug, Clone, Copy, PartialEq)]
10pub enum LlmReplayMode {
11    Off,
12    Record,
13    Replay,
14}
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17enum CliLlmMockMode {
18    Off,
19    Replay,
20    Record,
21}
22
23/// Categorized error injected by a mock. When present, the mock
24/// short-circuits the provider call and surfaces as
25/// `VmError::CategorizedError`, so `llm_call` throws and
26/// `llm_call_safe` populates its `error` envelope.
27#[derive(Clone)]
28pub struct MockError {
29    pub category: ErrorCategory,
30    pub message: String,
31    /// Optional hint echoed into the error message as a synthetic
32    /// `retry-after:` header so the existing `extract_retry_after_ms`
33    /// parser recovers it — matches how real provider errors embed
34    /// the value. Lets tests assert that `e.retry_after_ms` flows
35    /// end-to-end on the thrown dict.
36    pub retry_after_ms: Option<u64>,
37}
38
39#[derive(Clone)]
40pub struct LlmMock {
41    pub text: String,
42    pub tool_calls: Vec<serde_json::Value>,
43    pub match_pattern: Option<String>, // None = FIFO (consumed), Some = glob (reusable)
44    pub consume_on_match: bool,
45    pub input_tokens: Option<i64>,
46    pub output_tokens: Option<i64>,
47    pub cache_read_tokens: Option<i64>,
48    pub cache_write_tokens: Option<i64>,
49    pub thinking: Option<String>,
50    pub thinking_summary: Option<String>,
51    pub stop_reason: Option<String>,
52    pub model: String,
53    pub provider: Option<String>,
54    pub blocks: Option<Vec<serde_json::Value>>,
55    pub logprobs: Vec<serde_json::Value>,
56    /// When `Some`, this mock synthesizes an error instead of an
57    /// `LlmResult`. `text`/`tool_calls` are ignored for error mocks.
58    pub error: Option<MockError>,
59}
60
61#[derive(Clone)]
62pub(crate) struct LlmMockCall {
63    pub messages: Vec<serde_json::Value>,
64    pub system: Option<String>,
65    pub tools: Option<Vec<serde_json::Value>>,
66    pub tool_choice: Option<serde_json::Value>,
67    pub thinking: serde_json::Value,
68}
69
70type LlmMockScope = (Vec<LlmMock>, Vec<LlmMockCall>, BTreeSet<String>);
71
72thread_local! {
73    static LLM_REPLAY_MODE: RefCell<LlmReplayMode> = const { RefCell::new(LlmReplayMode::Off) };
74    static LLM_FIXTURE_DIR: RefCell<String> = const { RefCell::new(String::new()) };
75    static TOOL_RECORDINGS: RefCell<Vec<ToolCallRecord>> = const { RefCell::new(Vec::new()) };
76    static LLM_MOCKS: RefCell<Vec<LlmMock>> = const { RefCell::new(Vec::new()) };
77    static CLI_LLM_MOCK_MODE: RefCell<CliLlmMockMode> = const { RefCell::new(CliLlmMockMode::Off) };
78    static CLI_LLM_MOCKS: RefCell<Vec<LlmMock>> = const { RefCell::new(Vec::new()) };
79    static CLI_LLM_RECORDINGS: RefCell<Vec<LlmMock>> = const { RefCell::new(Vec::new()) };
80    static LLM_MOCK_CALLS: RefCell<Vec<LlmMockCall>> = const { RefCell::new(Vec::new()) };
81    static LLM_PROMPT_CACHE: RefCell<BTreeSet<String>> = const { RefCell::new(BTreeSet::new()) };
82    static LLM_MOCK_SCOPES: RefCell<Vec<LlmMockScope>> = const { RefCell::new(Vec::new()) };
83}
84
85pub(crate) fn push_llm_mock(mock: LlmMock) {
86    LLM_MOCKS.with(|v| v.borrow_mut().push(mock));
87}
88
89pub(crate) fn get_llm_mock_calls() -> Vec<LlmMockCall> {
90    LLM_MOCK_CALLS.with(|v| v.borrow().clone())
91}
92
93pub(crate) fn builtin_llm_mock_active() -> bool {
94    LLM_MOCKS.with(|v| !v.borrow().is_empty())
95}
96
97pub(crate) fn reset_llm_mock_state() {
98    LLM_MOCKS.with(|v| v.borrow_mut().clear());
99    CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Off);
100    CLI_LLM_MOCKS.with(|v| v.borrow_mut().clear());
101    CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
102    LLM_MOCK_CALLS.with(|v| v.borrow_mut().clear());
103    LLM_PROMPT_CACHE.with(|v| v.borrow_mut().clear());
104    LLM_MOCK_SCOPES.with(|v| v.borrow_mut().clear());
105}
106
107/// Save the current builtin LLM mock queue and recorded-calls list, then
108/// start a fresh empty scope. Paired with `pop_llm_mock_scope`. Backs
109/// the `with_llm_mocks` helper in `std/testing` so tests reliably
110/// roll back to the prior state, including when the body throws.
111pub(crate) fn push_llm_mock_scope() {
112    let mocks = LLM_MOCKS.with(|v| std::mem::take(&mut *v.borrow_mut()));
113    let calls = LLM_MOCK_CALLS.with(|v| std::mem::take(&mut *v.borrow_mut()));
114    let cache = LLM_PROMPT_CACHE.with(|v| std::mem::take(&mut *v.borrow_mut()));
115    LLM_MOCK_SCOPES.with(|v| v.borrow_mut().push((mocks, calls, cache)));
116}
117
118/// Restore the most recently pushed builtin LLM mock scope. Returns
119/// `false` when there is nothing to pop, so the builtin can surface a
120/// clear "imbalanced scope" error rather than silently corrupting
121/// state. CLI-installed mocks are intentionally untouched: they are an
122/// outer harness and should not flicker on each per-test scope swap.
123pub(crate) fn pop_llm_mock_scope() -> bool {
124    let entry = LLM_MOCK_SCOPES.with(|v| v.borrow_mut().pop());
125    match entry {
126        Some((mocks, calls, cache)) => {
127            LLM_MOCKS.with(|v| *v.borrow_mut() = mocks);
128            LLM_MOCK_CALLS.with(|v| *v.borrow_mut() = calls);
129            LLM_PROMPT_CACHE.with(|v| *v.borrow_mut() = cache);
130            true
131        }
132        None => false,
133    }
134}
135
136pub fn clear_cli_llm_mock_mode() {
137    CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Off);
138    CLI_LLM_MOCKS.with(|v| v.borrow_mut().clear());
139    CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
140}
141
142pub fn install_cli_llm_mocks(mocks: Vec<LlmMock>) {
143    CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Replay);
144    CLI_LLM_MOCKS.with(|v| *v.borrow_mut() = mocks);
145    CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
146}
147
148pub fn enable_cli_llm_mock_recording() {
149    CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Record);
150    CLI_LLM_MOCKS.with(|v| v.borrow_mut().clear());
151    CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
152}
153
154pub fn take_cli_llm_recordings() -> Vec<LlmMock> {
155    CLI_LLM_RECORDINGS.with(|v| std::mem::take(&mut *v.borrow_mut()))
156}
157
158pub(crate) fn cli_llm_mock_replay_active() -> bool {
159    CLI_LLM_MOCK_MODE.with(|v| *v.borrow() == CliLlmMockMode::Replay)
160}
161
162fn record_llm_mock_call(
163    messages: &[serde_json::Value],
164    system: Option<&str>,
165    native_tools: Option<&[serde_json::Value]>,
166    tool_choice: Option<&serde_json::Value>,
167    thinking: &super::api::ThinkingConfig,
168) {
169    LLM_MOCK_CALLS.with(|v| {
170        v.borrow_mut().push(LlmMockCall {
171            messages: messages.to_vec(),
172            system: system.map(|s| s.to_string()),
173            tools: native_tools.map(|t| t.to_vec()),
174            tool_choice: tool_choice.cloned(),
175            thinking: serde_json::to_value(thinking).unwrap_or_else(|_| {
176                serde_json::json!({
177                    "mode": "disabled"
178                })
179            }),
180        });
181    });
182}
183
184/// Build an LlmResult from a matched mock.
185fn build_mock_result(mock: &LlmMock, last_msg_len: usize) -> LlmResult {
186    let (tool_calls, blocks) = if let Some(blocks) = &mock.blocks {
187        (mock.tool_calls.clone(), blocks.clone())
188    } else {
189        let mut blocks = Vec::new();
190
191        if !mock.text.is_empty() {
192            blocks.push(serde_json::json!({
193                "type": "output_text",
194                "text": mock.text,
195                "visibility": "public",
196            }));
197        }
198
199        let mut tool_calls = Vec::new();
200        for (i, tc) in mock.tool_calls.iter().enumerate() {
201            let id = format!("mock_call_{}", i + 1);
202            let name = tc.get("name").and_then(|n| n.as_str()).unwrap_or("unknown");
203            let arguments = tc
204                .get("arguments")
205                .cloned()
206                .unwrap_or(serde_json::json!({}));
207            tool_calls.push(serde_json::json!({
208                "id": id,
209                "type": "tool_call",
210                "name": name,
211                "arguments": arguments,
212            }));
213            blocks.push(serde_json::json!({
214                "type": "tool_call",
215                "id": id,
216                "name": name,
217                "arguments": arguments,
218                "visibility": "internal",
219            }));
220        }
221
222        (tool_calls, blocks)
223    };
224
225    LlmResult {
226        text: mock.text.clone(),
227        tool_calls,
228        input_tokens: mock.input_tokens.unwrap_or(last_msg_len as i64),
229        output_tokens: mock.output_tokens.unwrap_or(30),
230        cache_read_tokens: mock.cache_read_tokens.unwrap_or(0),
231        cache_write_tokens: mock.cache_write_tokens.unwrap_or(0),
232        model: mock.model.clone(),
233        provider: mock.provider.clone().unwrap_or_else(|| "mock".to_string()),
234        thinking: mock.thinking.clone(),
235        thinking_summary: mock.thinking_summary.clone(),
236        stop_reason: mock.stop_reason.clone(),
237        blocks,
238        logprobs: mock.logprobs.clone(),
239        telemetry: ProviderTelemetry::default(),
240    }
241}
242
243/// Multi-segment glob match: split on `*` and check segments appear in order.
244/// Handles `*`, `prefix*`, `*suffix`, `*contains*`, `pre*mid*suf`, etc.
245fn mock_glob_match(pattern: &str, text: &str) -> bool {
246    if pattern == "*" {
247        return true;
248    }
249    if !pattern.contains('*') {
250        return pattern == text;
251    }
252    let parts: Vec<&str> = pattern.split('*').collect();
253    let mut remaining = text;
254    for (i, part) in parts.iter().enumerate() {
255        if part.is_empty() {
256            continue;
257        }
258        if i == 0 {
259            if !remaining.starts_with(part) {
260                return false;
261            }
262            remaining = &remaining[part.len()..];
263        } else if i == parts.len() - 1 {
264            if !remaining.ends_with(part) {
265                return false;
266            }
267            remaining = "";
268        } else {
269            match remaining.find(part) {
270                Some(pos) => remaining = &remaining[pos + part.len()..],
271                None => return false,
272            }
273        }
274    }
275    true
276}
277
278fn collect_mock_match_strings(value: &serde_json::Value, out: &mut Vec<String>) {
279    match value {
280        serde_json::Value::String(text) if !text.is_empty() => out.push(text.clone()),
281        serde_json::Value::String(_) => {}
282        serde_json::Value::Array(items) => {
283            for item in items {
284                collect_mock_match_strings(item, out);
285            }
286        }
287        serde_json::Value::Object(map) => {
288            for value in map.values() {
289                collect_mock_match_strings(value, out);
290            }
291        }
292        _ => {}
293    }
294}
295
296fn mock_match_text(messages: &[serde_json::Value]) -> String {
297    let mut parts = Vec::new();
298    for message in messages {
299        collect_mock_match_strings(message, &mut parts);
300    }
301    parts.join("\n")
302}
303
304fn mock_last_prompt_text(messages: &[serde_json::Value]) -> String {
305    for message in messages.iter().rev() {
306        let Some(content) = message.get("content") else {
307            continue;
308        };
309        let mut parts = Vec::new();
310        collect_mock_match_strings(content, &mut parts);
311        let text = parts.join("\n");
312        if !text.trim().is_empty() {
313            return text;
314        }
315    }
316    String::new()
317}
318
319fn mock_prompt_cache_key(
320    model: &str,
321    messages: &[serde_json::Value],
322    system: Option<&str>,
323) -> String {
324    serde_json::to_string(&serde_json::json!({
325        "model": model,
326        "system": system,
327        "messages": messages,
328    }))
329    .unwrap_or_default()
330}
331
332fn apply_mock_prompt_cache(result: &mut LlmResult, cache_key: &str) {
333    if result.cache_read_tokens > 0 || result.cache_write_tokens > 0 {
334        return;
335    }
336    let cache_tokens = result.input_tokens.max(0);
337    if cache_tokens == 0 {
338        return;
339    }
340    let cache_hit = LLM_PROMPT_CACHE.with(|cache| {
341        let mut cache = cache.borrow_mut();
342        if cache.contains(cache_key) {
343            true
344        } else {
345            cache.insert(cache_key.to_string());
346            false
347        }
348    });
349    if cache_hit {
350        result.cache_read_tokens = cache_tokens;
351    } else {
352        result.cache_write_tokens = cache_tokens;
353    }
354}
355
356/// Convert a mock's `error` payload into the `VmError` that the
357/// provider path would have raised, so classification, retry, and
358/// `error_category` all behave identically to a real failure.
359fn mock_error_to_vm_error(err: &MockError) -> VmError {
360    // Embed `retry_after_ms` as a synthetic `retry-after:` header on
361    // the message so `agent_observe::extract_retry_after_ms` — the
362    // same parser that handles real HTTP 429s — surfaces the value
363    // on the caller's thrown dict. Keeps the mock path byte-for-byte
364    // compatible with a real rate-limit response.
365    let message = match err.retry_after_ms {
366        Some(ms) => {
367            let secs = (ms as f64 / 1000.0).max(0.0);
368            let sep = if err.message.is_empty() || err.message.ends_with('\n') {
369                ""
370            } else {
371                "\n"
372            };
373            format!("{}{sep}retry-after: {secs}\n", err.message)
374        }
375        None => err.message.clone(),
376    };
377    VmError::CategorizedError {
378        message,
379        category: err.category.clone(),
380    }
381}
382
383/// Try to find and return a matching mock response. Returns
384/// `Some(Ok(LlmResult))` on a text/tool_call match, `Some(Err(VmError))`
385/// on an error-mock match, and `None` to fall through to default.
386fn try_match_mock_queue(
387    mocks: &mut Vec<LlmMock>,
388    match_text: &str,
389) -> Option<Result<LlmResult, VmError>> {
390    if let Some(idx) = mocks.iter().position(|m| m.match_pattern.is_none()) {
391        let mock = mocks.remove(idx);
392        return Some(match &mock.error {
393            Some(err) => Err(mock_error_to_vm_error(err)),
394            None => Ok(build_mock_result(&mock, match_text.len())),
395        });
396    }
397
398    for idx in 0..mocks.len() {
399        let mock = &mocks[idx];
400        if let Some(ref pattern) = mock.match_pattern {
401            if mock_glob_match(pattern, match_text) {
402                if mock.consume_on_match {
403                    let mock = mocks.remove(idx);
404                    return Some(match &mock.error {
405                        Some(err) => Err(mock_error_to_vm_error(err)),
406                        None => Ok(build_mock_result(&mock, match_text.len())),
407                    });
408                }
409                return Some(match &mock.error {
410                    Some(err) => Err(mock_error_to_vm_error(err)),
411                    None => Ok(build_mock_result(mock, match_text.len())),
412                });
413            }
414        }
415    }
416
417    None
418}
419
420fn try_match_builtin_mock(match_text: &str) -> Option<Result<LlmResult, VmError>> {
421    LLM_MOCKS.with(|mocks| try_match_mock_queue(&mut mocks.borrow_mut(), match_text))
422}
423
424fn try_match_cli_mock(match_text: &str) -> Option<Result<LlmResult, VmError>> {
425    CLI_LLM_MOCKS.with(|mocks| try_match_mock_queue(&mut mocks.borrow_mut(), match_text))
426}
427
428pub(crate) fn record_cli_llm_result(result: &LlmResult) {
429    record_unified_tape_llm_call(result);
430    if !CLI_LLM_MOCK_MODE.with(|mode| *mode.borrow() == CliLlmMockMode::Record) {
431        return;
432    }
433    CLI_LLM_RECORDINGS.with(|recordings| {
434        recordings.borrow_mut().push(LlmMock {
435            text: result.text.clone(),
436            tool_calls: result.tool_calls.clone(),
437            match_pattern: None,
438            consume_on_match: false,
439            input_tokens: Some(result.input_tokens),
440            output_tokens: Some(result.output_tokens),
441            cache_read_tokens: Some(result.cache_read_tokens),
442            cache_write_tokens: Some(result.cache_write_tokens),
443            thinking: result.thinking.clone(),
444            thinking_summary: result.thinking_summary.clone(),
445            stop_reason: result.stop_reason.clone(),
446            model: result.model.clone(),
447            provider: Some(result.provider.clone()),
448            blocks: Some(result.blocks.clone()),
449            logprobs: result.logprobs.clone(),
450            error: None,
451        });
452    });
453}
454
455/// Append an `LlmCall` record to the unified-tape recorder when one is
456/// active. The request digest is built from the most recently recorded
457/// `LlmMockCall` so the same hashing surface used for fixture matching
458/// drives the fidelity oracle's request comparison; falls back to a
459/// hash of the response text alone when no matching call is on record
460/// (e.g. when `record_llm_mock_call` was bypassed).
461fn record_unified_tape_llm_call(result: &LlmResult) {
462    if crate::testbench::tape::active_recorder().is_none() {
463        return;
464    }
465    let response_json = serde_json::to_vec(result).unwrap_or_else(|_| Vec::new());
466    let request_digest = LLM_MOCK_CALLS
467        .with(|calls| calls.borrow().last().cloned())
468        .map(|call| {
469            let serialized = serde_json::to_vec(&serde_json::json!({
470                "messages": call.messages,
471                "system": call.system,
472                "tools": call.tools,
473                "tool_choice": call.tool_choice,
474                "thinking": call.thinking,
475                "model": result.model,
476            }))
477            .unwrap_or_default();
478            crate::testbench::tape::content_hash(&serialized)
479        })
480        .unwrap_or_else(|| {
481            // Fall back to hashing the response — keeps fidelity comparable
482            // across runs even when the request surface wasn't captured.
483            crate::testbench::tape::content_hash(result.text.as_bytes())
484        });
485    crate::testbench::tape::with_active_recorder(|recorder| {
486        let response = recorder.payload_from_bytes(response_json);
487        Some(crate::testbench::tape::TapeRecordKind::LlmCall {
488            request_digest,
489            response,
490        })
491    });
492}
493
494fn unmatched_cli_prompt_error(match_text: &str) -> VmError {
495    let mut snippet: String = match_text.chars().take(200).collect();
496    if match_text.chars().count() > 200 {
497        snippet.push_str("...");
498    }
499    VmError::Runtime(format!("No --llm-mock fixture matched prompt: {snippet:?}"))
500}
501
502/// Set LLM replay mode (record/replay) and fixture directory.
503pub fn set_replay_mode(mode: LlmReplayMode, fixture_dir: &str) {
504    LLM_REPLAY_MODE.with(|v| *v.borrow_mut() = mode);
505    LLM_FIXTURE_DIR.with(|v| *v.borrow_mut() = fixture_dir.to_string());
506}
507
508pub(crate) fn get_replay_mode() -> LlmReplayMode {
509    LLM_REPLAY_MODE.with(|v| *v.borrow())
510}
511
512pub(crate) fn get_fixture_dir() -> String {
513    LLM_FIXTURE_DIR.with(|v| v.borrow().clone())
514}
515
516/// Hash a request for fixture file naming using canonical JSON serialization.
517pub(crate) fn fixture_hash(
518    model: &str,
519    messages: &[serde_json::Value],
520    system: Option<&str>,
521) -> String {
522    use std::hash::{Hash, Hasher};
523    let mut hasher = std::collections::hash_map::DefaultHasher::new();
524    model.hash(&mut hasher);
525    // Canonical JSON hashing is stable across Debug-format changes.
526    serde_json::to_string(messages)
527        .unwrap_or_default()
528        .hash(&mut hasher);
529    system.hash(&mut hasher);
530    format!("{:016x}", hasher.finish())
531}
532
533pub(crate) fn save_fixture(hash: &str, result: &LlmResult) {
534    let dir = get_fixture_dir();
535    if dir.is_empty() {
536        return;
537    }
538    let _ = std::fs::create_dir_all(&dir);
539    let path = format!("{dir}/{hash}.json");
540    let json = serde_json::json!({
541        "text": result.text,
542        "tool_calls": result.tool_calls,
543        "input_tokens": result.input_tokens,
544        "output_tokens": result.output_tokens,
545        "cache_read_tokens": result.cache_read_tokens,
546        "cache_write_tokens": result.cache_write_tokens,
547        "cache_creation_input_tokens": result.cache_write_tokens,
548        "model": result.model,
549        "provider": result.provider,
550        "thinking": result.thinking,
551        "thinking_summary": result.thinking_summary,
552        "stop_reason": result.stop_reason,
553        "blocks": result.blocks,
554        "logprobs": result.logprobs,
555    });
556    let _ = std::fs::write(
557        &path,
558        serde_json::to_string_pretty(&json).unwrap_or_default(),
559    );
560}
561
562pub(crate) fn load_fixture(hash: &str) -> Option<LlmResult> {
563    let dir = get_fixture_dir();
564    if dir.is_empty() {
565        return None;
566    }
567    let path = format!("{dir}/{hash}.json");
568    let content = std::fs::read_to_string(&path).ok()?;
569    let json: serde_json::Value = serde_json::from_str(&content).ok()?;
570    Some(LlmResult {
571        text: json["text"].as_str().unwrap_or("").to_string(),
572        tool_calls: json["tool_calls"].as_array().cloned().unwrap_or_default(),
573        input_tokens: json["input_tokens"].as_i64().unwrap_or(0),
574        output_tokens: json["output_tokens"].as_i64().unwrap_or(0),
575        cache_read_tokens: json["cache_read_tokens"].as_i64().unwrap_or(0),
576        cache_write_tokens: json["cache_write_tokens"]
577            .as_i64()
578            .or_else(|| json["cache_creation_input_tokens"].as_i64())
579            .unwrap_or(0),
580        model: json["model"].as_str().unwrap_or("").to_string(),
581        provider: json["provider"].as_str().unwrap_or("mock").to_string(),
582        thinking: json["thinking"].as_str().map(|s| s.to_string()),
583        thinking_summary: json["thinking_summary"].as_str().map(|s| s.to_string()),
584        stop_reason: json["stop_reason"].as_str().map(|s| s.to_string()),
585        blocks: json["blocks"].as_array().cloned().unwrap_or_default(),
586        logprobs: json["logprobs"].as_array().cloned().unwrap_or_default(),
587        telemetry: serde_json::from_value(json["telemetry"].clone()).unwrap_or_default(),
588    })
589}
590
591/// Generate stub argument values for required parameters in a tool schema.
592/// This makes mock tool calls realistic — a real model would always fill
593/// required fields, so the mock should too.
594fn mock_required_args(tool_schema: &serde_json::Value) -> serde_json::Value {
595    let mut args = serde_json::Map::new();
596    // Anthropic: {name, input_schema: {properties, required}}
597    // OpenAI:    {function: {name, parameters: {properties, required}}}
598    // Harn VM:   {parameters: {name: {type, required}}}  (from tool_define)
599    let input_schema = tool_schema
600        .get("input_schema")
601        .or_else(|| tool_schema.get("inputSchema"))
602        .or_else(|| {
603            tool_schema
604                .get("function")
605                .and_then(|f| f.get("parameters"))
606        })
607        .or_else(|| tool_schema.get("parameters"));
608    let Some(schema) = input_schema else {
609        return serde_json::Value::Object(args);
610    };
611    let required: std::collections::BTreeSet<String> = schema
612        .get("required")
613        .and_then(|r| r.as_array())
614        .map(|arr| {
615            arr.iter()
616                .filter_map(|v| v.as_str().map(|s| s.to_string()))
617                .collect()
618        })
619        .unwrap_or_default();
620    if let Some(props) = schema.get("properties").and_then(|p| p.as_object()) {
621        for (name, prop) in props {
622            if !required.contains(name) {
623                continue;
624            }
625            let ty = prop
626                .get("type")
627                .and_then(|t| t.as_str())
628                .unwrap_or("string");
629            let placeholder = match ty {
630                "integer" => serde_json::json!(0),
631                "number" => serde_json::json!(0.0),
632                "boolean" => serde_json::json!(false),
633                "array" => serde_json::json!([]),
634                "object" => serde_json::json!({}),
635                _ => serde_json::json!(""),
636            };
637            args.insert(name.clone(), placeholder);
638        }
639    }
640    serde_json::Value::Object(args)
641}
642
643/// Mock LLM provider -- deterministic responses for testing without API keys.
644/// When configurable mocks have been registered via `llm_mock()`, those are
645/// checked first (FIFO queue, then pattern matching). Falls through to the
646/// default deterministic behavior when no mocks match.
647pub(crate) fn mock_llm_response(
648    messages: &[serde_json::Value],
649    system: Option<&str>,
650    native_tools: Option<&[serde_json::Value]>,
651    tool_choice: Option<&serde_json::Value>,
652    thinking: &super::api::ThinkingConfig,
653    model: &str,
654    cache: bool,
655) -> Result<LlmResult, VmError> {
656    record_llm_mock_call(messages, system, native_tools, tool_choice, thinking);
657
658    let match_text = mock_match_text(messages);
659    let prompt_text = mock_last_prompt_text(messages);
660    let cache_key = mock_prompt_cache_key(model, messages, system);
661
662    if let Some(matched) = try_match_cli_mock(&match_text) {
663        return matched.map(|mut result| {
664            if cache {
665                apply_mock_prompt_cache(&mut result, &cache_key);
666            }
667            result
668        });
669    }
670
671    if let Some(matched) = try_match_builtin_mock(&match_text) {
672        return matched.map(|mut result| {
673            if cache {
674                apply_mock_prompt_cache(&mut result, &cache_key);
675            }
676            result
677        });
678    }
679
680    if cli_llm_mock_replay_active() {
681        return Err(unmatched_cli_prompt_error(&match_text));
682    }
683
684    // Generate a mock tool call for the first tool, filling required
685    // params with placeholders so the call passes schema validation.
686    if let Some(tools) = native_tools {
687        if let Some(first_tool) = tools.first() {
688            let tool_name = first_tool
689                .get("name")
690                .or_else(|| first_tool.get("function").and_then(|f| f.get("name")))
691                .and_then(|n| n.as_str())
692                .unwrap_or("unknown");
693            let mock_args = mock_required_args(first_tool);
694            let mut result = LlmResult {
695                text: String::new(),
696                tool_calls: vec![serde_json::json!({
697                        "id": "mock_call_1",
698                        "type": "tool_call",
699                        "name": tool_name,
700                "arguments": mock_args
701                })],
702                input_tokens: prompt_text.len() as i64,
703                output_tokens: 20,
704                cache_read_tokens: 0,
705                cache_write_tokens: 0,
706                model: model.to_string(),
707                provider: "mock".to_string(),
708                thinking: None,
709                thinking_summary: None,
710                stop_reason: None,
711                blocks: vec![serde_json::json!({
712                    "type": "tool_call",
713                    "id": "mock_call_1",
714                    "name": tool_name,
715                    "arguments": mock_args,
716                    "visibility": "internal",
717                })],
718                logprobs: Vec::new(),
719                telemetry: ProviderTelemetry::default(),
720            };
721            if cache {
722                apply_mock_prompt_cache(&mut result, &cache_key);
723            }
724            return Ok(result);
725        }
726    }
727
728    // Preserve the historical auto-complete behavior for tagged text-tool
729    // prompts only. Bare `##DONE##` in no-tool/native prompts changes
730    // loop semantics by completing runs that used to exhaust budget unless
731    // a fixture explicitly returned the sentinel.
732    let tagged_done = system.is_some_and(|s| s.contains("<done>"));
733
734    let prose_body = if prompt_text.is_empty() {
735        "Mock LLM response".to_string()
736    } else {
737        let word_count = prompt_text.split_whitespace().count();
738        format!(
739            "Mock response to {word_count}-word prompt: {}",
740            prompt_text.chars().take(100).collect::<String>()
741        )
742    };
743    let response = if tagged_done {
744        format!("<assistant_prose>{prose_body}</assistant_prose>\n<done>##DONE##</done>")
745    } else {
746        prose_body
747    };
748
749    let mut result = LlmResult {
750        text: response.clone(),
751        tool_calls: vec![],
752        input_tokens: prompt_text.len() as i64,
753        output_tokens: 30,
754        cache_read_tokens: 0,
755        cache_write_tokens: 0,
756        model: model.to_string(),
757        provider: "mock".to_string(),
758        thinking: None,
759        thinking_summary: None,
760        stop_reason: None,
761        blocks: vec![serde_json::json!({
762            "type": "output_text",
763            "text": response,
764            "visibility": "public",
765        })],
766        logprobs: Vec::new(),
767        telemetry: ProviderTelemetry::default(),
768    };
769    if cache {
770        apply_mock_prompt_cache(&mut result, &cache_key);
771    }
772    Ok(result)
773}
774
775/// Take all recorded tool calls, leaving the buffer empty.
776pub fn drain_tool_recordings() -> Vec<ToolCallRecord> {
777    TOOL_RECORDINGS.with(|v| std::mem::take(&mut *v.borrow_mut()))
778}