1use std::cell::RefCell;
2use std::collections::{BTreeMap, BTreeSet};
3
4use super::api::{LlmResult, ProviderTelemetry};
5use crate::orchestration::ToolCallRecord;
6use crate::value::{ErrorCategory, VmError, VmValue};
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum LlmReplayMode {
11 Off,
12 Record,
13 Replay,
14}
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17enum CliLlmMockMode {
18 Off,
19 Replay,
20 Record,
21}
22
23#[derive(Clone)]
28pub struct MockError {
29 pub category: ErrorCategory,
30 pub message: String,
31 pub status: Option<u16>,
32 pub kind: Option<String>,
33 pub reason: Option<String>,
34 pub retry_after_ms: Option<u64>,
39}
40
41impl MockError {
42 fn has_provider_envelope(&self) -> bool {
43 self.status.is_some() || self.kind.is_some() || self.reason.is_some()
44 }
45}
46
47pub(crate) fn build_mock_error(
48 category: Option<String>,
49 message: Option<String>,
50 status: Option<u16>,
51 kind: Option<String>,
52 reason: Option<String>,
53 retry_after_ms: Option<u64>,
54) -> Result<MockError, String> {
55 if retry_after_ms.is_some_and(|ms| ms > i64::MAX as u64) {
56 return Err("error.retry_after_ms must fit in a signed 64-bit integer".to_string());
57 }
58 let kind = match kind {
59 Some(value) if value.trim().is_empty() => None,
60 Some(value) => {
61 let normalized = value.trim().to_ascii_lowercase();
62 if super::api::LlmErrorKind::parse(&normalized).is_none() {
63 return Err(format!("unknown error kind `{value}`"));
64 }
65 Some(normalized)
66 }
67 None => None,
68 };
69 let reason = reason.and_then(|value| {
70 let trimmed = value.trim();
71 if trimmed.is_empty() {
72 None
73 } else {
74 Some(trimmed.to_string())
75 }
76 });
77 let category_was_provided = category.is_some();
78 let category = match category {
79 Some(value) if value.trim().is_empty() => {
80 return Err("error.category must not be empty".to_string());
81 }
82 Some(value) => {
83 let normalized = value.trim().to_ascii_lowercase();
84 let category = ErrorCategory::parse(&normalized);
85 if category.as_str() != normalized {
86 return Err(format!("unknown error category `{value}`"));
87 }
88 category
89 }
90 None => infer_mock_error_category(status, kind.as_deref(), reason.as_deref()),
91 };
92 if !category_was_provided && kind.is_none() && status.is_none() && reason.is_none() {
93 return Err(
94 "error.category is required unless error.status, error.kind, or error.reason is set"
95 .to_string(),
96 );
97 }
98 Ok(MockError {
99 category,
100 message: message.unwrap_or_else(|| {
101 default_mock_error_message(status, kind.as_deref(), reason.as_deref())
102 }),
103 status,
104 kind,
105 reason,
106 retry_after_ms,
107 })
108}
109
110pub(crate) fn validate_mock_error_status(status: i64) -> Result<u16, String> {
111 let status = u16::try_from(status)
112 .map_err(|_| "error.status must be an HTTP status code".to_string())?;
113 reqwest::StatusCode::from_u16(status)
114 .map_err(|_| "error.status must be an HTTP status code".to_string())?;
115 Ok(status)
116}
117
118fn infer_mock_error_category(
119 status: Option<u16>,
120 kind: Option<&str>,
121 reason: Option<&str>,
122) -> ErrorCategory {
123 if let Some(status) = status {
124 match status {
125 401 | 403 => return ErrorCategory::Auth,
126 404 | 410 => return ErrorCategory::NotFound,
127 408 | 504 | 522 | 524 => return ErrorCategory::Timeout,
128 429 => return ErrorCategory::RateLimit,
129 503 | 529 => return ErrorCategory::Overloaded,
130 500 | 502 => return ErrorCategory::ServerError,
131 _ => {}
132 }
133 }
134 if let Some(reason) = reason {
135 match reason {
136 "rate_limit" => return ErrorCategory::RateLimit,
137 "timeout" => return ErrorCategory::Timeout,
138 "network_error" | "transient_network" => return ErrorCategory::TransientNetwork,
139 "server_error" | "provider_error" | "provider_5xx" | "upstream_unavailable" => {
140 return ErrorCategory::ServerError;
141 }
142 "auth_failure" => return ErrorCategory::Auth,
143 "model_unavailable" => return ErrorCategory::NotFound,
144 _ => {}
145 }
146 }
147 if kind == Some("transient") {
148 return ErrorCategory::ServerError;
149 }
150 ErrorCategory::Generic
151}
152
153fn default_mock_error_message(
154 status: Option<u16>,
155 kind: Option<&str>,
156 reason: Option<&str>,
157) -> String {
158 match (status, kind, reason) {
159 (Some(status), Some(kind), Some(reason)) => {
160 format!("HTTP {status} mock LLM error ({kind}/{reason})")
161 }
162 (Some(status), _, Some(reason)) => format!("HTTP {status} mock LLM error ({reason})"),
163 (Some(status), _, _) => format!("HTTP {status} mock LLM error"),
164 (None, Some(kind), Some(reason)) => format!("mock LLM error ({kind}/{reason})"),
165 (None, Some(kind), None) => format!("mock LLM error ({kind})"),
166 (None, None, Some(reason)) => format!("mock LLM error ({reason})"),
167 (None, None, None) => String::new(),
168 }
169}
170
171#[derive(Clone)]
172pub struct LlmMock {
173 pub text: String,
174 pub tool_calls: Vec<serde_json::Value>,
175 pub match_pattern: Option<String>, pub consume_on_match: bool,
177 pub input_tokens: Option<i64>,
178 pub output_tokens: Option<i64>,
179 pub cache_read_tokens: Option<i64>,
180 pub cache_write_tokens: Option<i64>,
181 pub thinking: Option<String>,
182 pub thinking_summary: Option<String>,
183 pub stop_reason: Option<String>,
184 pub model: String,
185 pub provider: Option<String>,
186 pub blocks: Option<Vec<serde_json::Value>>,
187 pub logprobs: Vec<serde_json::Value>,
188 pub error: Option<MockError>,
191}
192
193#[derive(Clone)]
194pub(crate) struct LlmMockCall {
195 pub api_mode: String,
196 pub messages: Vec<serde_json::Value>,
197 pub system: Option<String>,
198 pub tools: Option<Vec<serde_json::Value>>,
199 pub provider_tools: Option<Vec<serde_json::Value>>,
200 pub tool_choice: Option<serde_json::Value>,
201 pub output_format: serde_json::Value,
202 pub thinking: serde_json::Value,
203 pub previous_response_id: Option<String>,
204 pub store: Option<bool>,
205 pub background: Option<bool>,
206 pub truncation: Option<String>,
207 pub compact: Option<bool>,
208 pub include: Option<Vec<String>>,
209 pub max_tool_calls: Option<i64>,
210}
211
212type LlmMockScope = (Vec<LlmMock>, Vec<LlmMockCall>, BTreeSet<String>);
213
214thread_local! {
215 static LLM_REPLAY_MODE: RefCell<LlmReplayMode> = const { RefCell::new(LlmReplayMode::Off) };
216 static LLM_FIXTURE_DIR: RefCell<String> = const { RefCell::new(String::new()) };
217 static TOOL_RECORDINGS: RefCell<Vec<ToolCallRecord>> = const { RefCell::new(Vec::new()) };
218 static LLM_MOCKS: RefCell<Vec<LlmMock>> = const { RefCell::new(Vec::new()) };
219 static CLI_LLM_MOCK_MODE: RefCell<CliLlmMockMode> = const { RefCell::new(CliLlmMockMode::Off) };
220 static CLI_LLM_MOCKS: RefCell<Vec<LlmMock>> = const { RefCell::new(Vec::new()) };
221 static CLI_LLM_RECORDINGS: RefCell<Vec<LlmMock>> = const { RefCell::new(Vec::new()) };
222 static LLM_MOCK_CALLS: RefCell<Vec<LlmMockCall>> = const { RefCell::new(Vec::new()) };
223 static LLM_PROMPT_CACHE: RefCell<BTreeSet<String>> = const { RefCell::new(BTreeSet::new()) };
224 static LLM_MOCK_SCOPES: RefCell<Vec<LlmMockScope>> = const { RefCell::new(Vec::new()) };
225}
226
227pub(crate) fn push_llm_mock(mock: LlmMock) {
228 LLM_MOCKS.with(|v| v.borrow_mut().push(mock));
229}
230
231pub(crate) fn get_llm_mock_calls() -> Vec<LlmMockCall> {
232 LLM_MOCK_CALLS.with(|v| v.borrow().clone())
233}
234
235pub(crate) fn builtin_llm_mock_active() -> bool {
236 LLM_MOCKS.with(|v| !v.borrow().is_empty())
237}
238
239pub(crate) fn reset_llm_mock_state() {
240 LLM_MOCKS.with(|v| v.borrow_mut().clear());
241 CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Off);
242 CLI_LLM_MOCKS.with(|v| v.borrow_mut().clear());
243 CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
244 LLM_MOCK_CALLS.with(|v| v.borrow_mut().clear());
245 LLM_PROMPT_CACHE.with(|v| v.borrow_mut().clear());
246 LLM_MOCK_SCOPES.with(|v| v.borrow_mut().clear());
247}
248
249pub(crate) fn push_llm_mock_scope() {
254 let mocks = LLM_MOCKS.with(|v| std::mem::take(&mut *v.borrow_mut()));
255 let calls = LLM_MOCK_CALLS.with(|v| std::mem::take(&mut *v.borrow_mut()));
256 let cache = LLM_PROMPT_CACHE.with(|v| std::mem::take(&mut *v.borrow_mut()));
257 LLM_MOCK_SCOPES.with(|v| v.borrow_mut().push((mocks, calls, cache)));
258}
259
260pub(crate) fn pop_llm_mock_scope() -> bool {
266 let entry = LLM_MOCK_SCOPES.with(|v| v.borrow_mut().pop());
267 match entry {
268 Some((mocks, calls, cache)) => {
269 LLM_MOCKS.with(|v| *v.borrow_mut() = mocks);
270 LLM_MOCK_CALLS.with(|v| *v.borrow_mut() = calls);
271 LLM_PROMPT_CACHE.with(|v| *v.borrow_mut() = cache);
272 true
273 }
274 None => false,
275 }
276}
277
278pub fn clear_cli_llm_mock_mode() {
279 CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Off);
280 CLI_LLM_MOCKS.with(|v| v.borrow_mut().clear());
281 CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
282}
283
284pub fn install_cli_llm_mocks(mocks: Vec<LlmMock>) {
285 CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Replay);
286 CLI_LLM_MOCKS.with(|v| *v.borrow_mut() = mocks);
287 CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
288}
289
290pub fn enable_cli_llm_mock_recording() {
291 CLI_LLM_MOCK_MODE.with(|v| *v.borrow_mut() = CliLlmMockMode::Record);
292 CLI_LLM_MOCKS.with(|v| v.borrow_mut().clear());
293 CLI_LLM_RECORDINGS.with(|v| v.borrow_mut().clear());
294}
295
296pub fn take_cli_llm_recordings() -> Vec<LlmMock> {
297 CLI_LLM_RECORDINGS.with(|v| std::mem::take(&mut *v.borrow_mut()))
298}
299
300pub(crate) fn cli_llm_mock_replay_active() -> bool {
301 CLI_LLM_MOCK_MODE.with(|v| *v.borrow() == CliLlmMockMode::Replay)
302}
303
304fn record_llm_mock_call(request: &super::api::LlmRequestPayload) {
305 LLM_MOCK_CALLS.with(|v| {
306 v.borrow_mut().push(LlmMockCall {
307 api_mode: request.api_mode.as_str().to_string(),
308 messages: request.messages.clone(),
309 system: request.system.clone(),
310 tools: request.native_tools.clone(),
311 provider_tools: if request.provider_tools.is_empty() {
312 None
313 } else {
314 Some(request.provider_tools.clone())
315 },
316 tool_choice: request.tool_choice.clone(),
317 output_format: serde_json::to_value(&request.output_format).unwrap_or_else(|_| {
318 serde_json::json!({
319 "kind": "text"
320 })
321 }),
322 thinking: serde_json::to_value(&request.thinking).unwrap_or_else(|_| {
323 serde_json::json!({
324 "mode": "disabled"
325 })
326 }),
327 previous_response_id: request.previous_response_id.clone(),
328 store: request.store,
329 background: request.background,
330 truncation: request.truncation.clone(),
331 compact: request.compact,
332 include: request.include.clone(),
333 max_tool_calls: request.max_tool_calls,
334 });
335 });
336}
337
338fn build_mock_result(mock: &LlmMock, last_msg_len: usize) -> LlmResult {
340 let (tool_calls, blocks) = if let Some(blocks) = &mock.blocks {
341 (mock.tool_calls.clone(), blocks.clone())
342 } else {
343 let mut blocks = Vec::new();
344
345 if !mock.text.is_empty() {
346 blocks.push(serde_json::json!({
347 "type": "output_text",
348 "text": mock.text,
349 "visibility": "public",
350 }));
351 }
352
353 let mut tool_calls = Vec::new();
354 for (i, tc) in mock.tool_calls.iter().enumerate() {
355 let id = format!("mock_call_{}", i + 1);
356 let name = tc.get("name").and_then(|n| n.as_str()).unwrap_or("unknown");
357 let arguments = tc
358 .get("arguments")
359 .cloned()
360 .unwrap_or(serde_json::json!({}));
361 tool_calls.push(serde_json::json!({
362 "id": id,
363 "type": "tool_call",
364 "name": name,
365 "arguments": arguments,
366 }));
367 blocks.push(serde_json::json!({
368 "type": "tool_call",
369 "id": id,
370 "name": name,
371 "arguments": arguments,
372 "visibility": "internal",
373 }));
374 }
375
376 (tool_calls, blocks)
377 };
378
379 LlmResult {
380 served_fast: false,
381 text: mock.text.clone(),
382 tool_calls,
383 input_tokens: mock.input_tokens.unwrap_or(last_msg_len as i64),
384 output_tokens: mock.output_tokens.unwrap_or(30),
385 cache_read_tokens: mock.cache_read_tokens.unwrap_or(0),
386 cache_write_tokens: mock.cache_write_tokens.unwrap_or(0),
387 cache_supported: true,
388 model: mock.model.clone(),
389 provider: mock.provider.clone().unwrap_or_else(|| "mock".to_string()),
390 thinking: mock.thinking.clone(),
391 thinking_summary: mock.thinking_summary.clone(),
392 stop_reason: mock.stop_reason.clone(),
393 blocks,
394 logprobs: mock.logprobs.clone(),
395 telemetry: ProviderTelemetry::default(),
396 }
397}
398
399fn mock_glob_match(pattern: &str, text: &str) -> bool {
402 if pattern == "*" {
403 return true;
404 }
405 if !pattern.contains('*') {
406 return pattern == text;
407 }
408 let parts: Vec<&str> = pattern.split('*').collect();
409 let mut remaining = text;
410 for (i, part) in parts.iter().enumerate() {
411 if part.is_empty() {
412 continue;
413 }
414 if i == 0 {
415 if !remaining.starts_with(part) {
416 return false;
417 }
418 remaining = &remaining[part.len()..];
419 } else if i == parts.len() - 1 {
420 if !remaining.ends_with(part) {
421 return false;
422 }
423 remaining = "";
424 } else {
425 match remaining.find(part) {
426 Some(pos) => remaining = &remaining[pos + part.len()..],
427 None => return false,
428 }
429 }
430 }
431 true
432}
433
434fn collect_mock_match_strings(value: &serde_json::Value, out: &mut Vec<String>) {
435 match value {
436 serde_json::Value::String(text) if !text.is_empty() => out.push(text.clone()),
437 serde_json::Value::String(_) => {}
438 serde_json::Value::Array(items) => {
439 for item in items {
440 collect_mock_match_strings(item, out);
441 }
442 }
443 serde_json::Value::Object(map) => {
444 for value in map.values() {
445 collect_mock_match_strings(value, out);
446 }
447 }
448 _ => {}
449 }
450}
451
452fn mock_match_text(messages: &[serde_json::Value]) -> String {
453 let mut parts = Vec::new();
454 for message in messages {
455 collect_mock_match_strings(message, &mut parts);
456 }
457 parts.join("\n")
458}
459
460fn mock_last_prompt_text(messages: &[serde_json::Value]) -> String {
461 for message in messages.iter().rev() {
462 let Some(content) = message.get("content") else {
463 continue;
464 };
465 let mut parts = Vec::new();
466 collect_mock_match_strings(content, &mut parts);
467 let text = parts.join("\n");
468 if !text.trim().is_empty() {
469 return text;
470 }
471 }
472 String::new()
473}
474
475fn mock_prompt_cache_key(
476 model: &str,
477 messages: &[serde_json::Value],
478 system: Option<&str>,
479) -> String {
480 serde_json::to_string(&serde_json::json!({
481 "model": model,
482 "system": system,
483 "messages": messages,
484 }))
485 .unwrap_or_default()
486}
487
488fn apply_mock_prompt_cache(result: &mut LlmResult, cache_key: &str) {
489 if result.cache_read_tokens > 0 || result.cache_write_tokens > 0 {
490 return;
491 }
492 let cache_tokens = result.input_tokens.max(0);
493 if cache_tokens == 0 {
494 return;
495 }
496 let cache_hit = LLM_PROMPT_CACHE.with(|cache| {
497 let mut cache = cache.borrow_mut();
498 if cache.contains(cache_key) {
499 true
500 } else {
501 cache.insert(cache_key.to_string());
502 false
503 }
504 });
505 if cache_hit {
506 result.cache_read_tokens = cache_tokens;
507 } else {
508 result.cache_write_tokens = cache_tokens;
509 }
510}
511
512fn mock_error_to_vm_error(err: &MockError) -> VmError {
516 let message = mock_error_message(err);
517 if err.has_provider_envelope() {
518 let classified = super::api::classify_llm_error(err.category.clone(), &message);
519 let mut dict = BTreeMap::new();
520 dict.insert(
521 "category".to_string(),
522 VmValue::String(std::sync::Arc::from(err.category.as_str())),
523 );
524 dict.insert(
525 "kind".to_string(),
526 VmValue::String(std::sync::Arc::from(
527 err.kind
528 .as_deref()
529 .unwrap_or_else(|| classified.kind.as_str()),
530 )),
531 );
532 dict.insert(
533 "reason".to_string(),
534 VmValue::String(std::sync::Arc::from(
535 err.reason
536 .as_deref()
537 .unwrap_or_else(|| classified.reason.as_str()),
538 )),
539 );
540 dict.insert(
541 "message".to_string(),
542 VmValue::String(std::sync::Arc::from(message)),
543 );
544 if let Some(status) = err.status {
545 dict.insert("status".to_string(), VmValue::Int(i64::from(status)));
546 }
547 if let Some(retry_after_ms) = err.retry_after_ms {
548 dict.insert(
549 "retry_after_ms".to_string(),
550 VmValue::Int(retry_after_ms as i64),
551 );
552 }
553 return VmError::Thrown(VmValue::Dict(std::sync::Arc::new(dict)));
554 }
555
556 VmError::CategorizedError {
557 message,
558 category: err.category.clone(),
559 }
560}
561
562fn mock_error_message(err: &MockError) -> String {
563 let Some(ms) = err.retry_after_ms else {
567 return err.message.clone();
568 };
569 if err.has_provider_envelope() {
570 return err.message.clone();
571 }
572 let secs = (ms as f64 / 1000.0).max(0.0);
573 let sep = if err.message.is_empty() || err.message.ends_with('\n') {
574 ""
575 } else {
576 "\n"
577 };
578 format!("{}{sep}retry-after: {secs}\n", err.message)
579}
580
581fn try_match_mock_queue(
585 mocks: &mut Vec<LlmMock>,
586 match_text: &str,
587) -> Option<Result<LlmResult, VmError>> {
588 if let Some(idx) = mocks.iter().position(|m| m.match_pattern.is_none()) {
589 let mock = mocks.remove(idx);
590 return Some(match &mock.error {
591 Some(err) => Err(mock_error_to_vm_error(err)),
592 None => Ok(build_mock_result(&mock, match_text.len())),
593 });
594 }
595
596 for idx in 0..mocks.len() {
597 let mock = &mocks[idx];
598 if let Some(ref pattern) = mock.match_pattern {
599 if mock_glob_match(pattern, match_text) {
600 if mock.consume_on_match {
601 let mock = mocks.remove(idx);
602 return Some(match &mock.error {
603 Some(err) => Err(mock_error_to_vm_error(err)),
604 None => Ok(build_mock_result(&mock, match_text.len())),
605 });
606 }
607 return Some(match &mock.error {
608 Some(err) => Err(mock_error_to_vm_error(err)),
609 None => Ok(build_mock_result(mock, match_text.len())),
610 });
611 }
612 }
613 }
614
615 None
616}
617
618fn try_match_builtin_mock(match_text: &str) -> Option<Result<LlmResult, VmError>> {
619 LLM_MOCKS.with(|mocks| try_match_mock_queue(&mut mocks.borrow_mut(), match_text))
620}
621
622fn try_match_cli_mock(match_text: &str) -> Option<Result<LlmResult, VmError>> {
623 CLI_LLM_MOCKS.with(|mocks| try_match_mock_queue(&mut mocks.borrow_mut(), match_text))
624}
625
626pub(crate) fn record_cli_llm_result(result: &LlmResult) {
627 record_unified_tape_llm_call(result);
628 if !CLI_LLM_MOCK_MODE.with(|mode| *mode.borrow() == CliLlmMockMode::Record) {
629 return;
630 }
631 CLI_LLM_RECORDINGS.with(|recordings| {
632 recordings.borrow_mut().push(LlmMock {
633 text: result.text.clone(),
634 tool_calls: result.tool_calls.clone(),
635 match_pattern: None,
636 consume_on_match: false,
637 input_tokens: Some(result.input_tokens),
638 output_tokens: Some(result.output_tokens),
639 cache_read_tokens: Some(result.cache_read_tokens),
640 cache_write_tokens: Some(result.cache_write_tokens),
641 thinking: result.thinking.clone(),
642 thinking_summary: result.thinking_summary.clone(),
643 stop_reason: result.stop_reason.clone(),
644 model: result.model.clone(),
645 provider: Some(result.provider.clone()),
646 blocks: Some(result.blocks.clone()),
647 logprobs: result.logprobs.clone(),
648 error: None,
649 });
650 });
651}
652
653fn record_unified_tape_llm_call(result: &LlmResult) {
660 if crate::testbench::tape::active_recorder().is_none() {
661 return;
662 }
663 let response_json = serde_json::to_vec(result).unwrap_or_else(|_| Vec::new());
664 let request_digest = LLM_MOCK_CALLS
665 .with(|calls| calls.borrow().last().cloned())
666 .map(|call| {
667 let mut request = serde_json::Map::new();
668 request.insert("messages".to_string(), serde_json::json!(call.messages));
669 request.insert("system".to_string(), serde_json::json!(call.system));
670 request.insert("tools".to_string(), serde_json::json!(call.tools));
671 request.insert(
672 "tool_choice".to_string(),
673 serde_json::json!(call.tool_choice),
674 );
675 request.insert("thinking".to_string(), serde_json::json!(call.thinking));
676 request.insert("model".to_string(), serde_json::json!(result.model));
677 if call.api_mode != "chat_completions" {
678 request.insert("api_mode".to_string(), serde_json::json!(call.api_mode));
679 }
680 if call.provider_tools.is_some() {
681 request.insert(
682 "provider_tools".to_string(),
683 serde_json::json!(call.provider_tools),
684 );
685 }
686 if call
687 .output_format
688 .get("kind")
689 .and_then(|value| value.as_str())
690 != Some("text")
691 {
692 request.insert(
693 "output_format".to_string(),
694 serde_json::json!(call.output_format),
695 );
696 }
697 if call.previous_response_id.is_some() {
698 request.insert(
699 "previous_response_id".to_string(),
700 serde_json::json!(call.previous_response_id),
701 );
702 }
703 if call.store.is_some() {
704 request.insert("store".to_string(), serde_json::json!(call.store));
705 }
706 if call.background.is_some() {
707 request.insert("background".to_string(), serde_json::json!(call.background));
708 }
709 if call.truncation.is_some() {
710 request.insert("truncation".to_string(), serde_json::json!(call.truncation));
711 }
712 if call.compact.is_some() {
713 request.insert("compact".to_string(), serde_json::json!(call.compact));
714 }
715 if call.include.is_some() {
716 request.insert("include".to_string(), serde_json::json!(call.include));
717 }
718 if call.max_tool_calls.is_some() {
719 request.insert(
720 "max_tool_calls".to_string(),
721 serde_json::json!(call.max_tool_calls),
722 );
723 }
724 let serialized =
725 serde_json::to_vec(&serde_json::Value::Object(request)).unwrap_or_default();
726 crate::testbench::tape::content_hash(&serialized)
727 })
728 .unwrap_or_else(|| {
729 crate::testbench::tape::content_hash(result.text.as_bytes())
732 });
733 crate::testbench::tape::with_active_recorder(|recorder| {
734 let response = recorder.payload_from_bytes(response_json);
735 Some(crate::testbench::tape::TapeRecordKind::LlmCall {
736 request_digest,
737 response,
738 })
739 });
740}
741
742fn unmatched_cli_prompt_error(match_text: &str) -> VmError {
743 let mut snippet: String = match_text.chars().take(200).collect();
744 if match_text.chars().count() > 200 {
745 snippet.push_str("...");
746 }
747 VmError::Runtime(format!("No --llm-mock fixture matched prompt: {snippet:?}"))
748}
749
750pub fn set_replay_mode(mode: LlmReplayMode, fixture_dir: &str) {
752 LLM_REPLAY_MODE.with(|v| *v.borrow_mut() = mode);
753 LLM_FIXTURE_DIR.with(|v| *v.borrow_mut() = fixture_dir.to_string());
754}
755
756pub(crate) fn get_replay_mode() -> LlmReplayMode {
757 LLM_REPLAY_MODE.with(|v| *v.borrow())
758}
759
760pub(crate) fn get_fixture_dir() -> String {
761 LLM_FIXTURE_DIR.with(|v| v.borrow().clone())
762}
763
764pub(crate) fn fixture_hash(
766 model: &str,
767 messages: &[serde_json::Value],
768 system: Option<&str>,
769) -> String {
770 use std::hash::{Hash, Hasher};
771 let mut hasher = std::collections::hash_map::DefaultHasher::new();
772 model.hash(&mut hasher);
773 serde_json::to_string(messages)
775 .unwrap_or_default()
776 .hash(&mut hasher);
777 system.hash(&mut hasher);
778 format!("{:016x}", hasher.finish())
779}
780
781pub(crate) fn save_fixture(hash: &str, result: &LlmResult) {
782 let dir = get_fixture_dir();
783 if dir.is_empty() {
784 return;
785 }
786 let _ = std::fs::create_dir_all(&dir);
787 let path = format!("{dir}/{hash}.json");
788 let json = serde_json::json!({
789 "text": result.text,
790 "tool_calls": result.tool_calls,
791 "input_tokens": result.input_tokens,
792 "output_tokens": result.output_tokens,
793 "cache_read_tokens": result.cache_read_tokens,
794 "cache_write_tokens": result.cache_write_tokens,
795 "cache_creation_input_tokens": result.cache_write_tokens,
796 "model": result.model,
797 "provider": result.provider,
798 "thinking": result.thinking,
799 "thinking_summary": result.thinking_summary,
800 "stop_reason": result.stop_reason,
801 "blocks": result.blocks,
802 "logprobs": result.logprobs,
803 });
804 let _ = std::fs::write(
805 &path,
806 serde_json::to_string_pretty(&json).unwrap_or_default(),
807 );
808}
809
810pub(crate) fn load_fixture(hash: &str) -> Option<LlmResult> {
811 let dir = get_fixture_dir();
812 if dir.is_empty() {
813 return None;
814 }
815 let path = format!("{dir}/{hash}.json");
816 let content = std::fs::read_to_string(&path).ok()?;
817 let json: serde_json::Value = serde_json::from_str(&content).ok()?;
818 Some(LlmResult {
819 served_fast: false,
820 text: json["text"].as_str().unwrap_or("").to_string(),
821 tool_calls: json["tool_calls"].as_array().cloned().unwrap_or_default(),
822 input_tokens: json["input_tokens"].as_i64().unwrap_or(0),
823 output_tokens: json["output_tokens"].as_i64().unwrap_or(0),
824 cache_read_tokens: json["cache_read_tokens"].as_i64().unwrap_or(0),
825 cache_write_tokens: json["cache_write_tokens"]
826 .as_i64()
827 .or_else(|| json["cache_creation_input_tokens"].as_i64())
828 .unwrap_or(0),
829 cache_supported: json["cache_supported"].as_bool().unwrap_or(true),
830 model: json["model"].as_str().unwrap_or("").to_string(),
831 provider: json["provider"].as_str().unwrap_or("mock").to_string(),
832 thinking: json["thinking"].as_str().map(|s| s.to_string()),
833 thinking_summary: json["thinking_summary"].as_str().map(|s| s.to_string()),
834 stop_reason: json["stop_reason"].as_str().map(|s| s.to_string()),
835 blocks: json["blocks"].as_array().cloned().unwrap_or_default(),
836 logprobs: json["logprobs"].as_array().cloned().unwrap_or_default(),
837 telemetry: serde_json::from_value(json["telemetry"].clone()).unwrap_or_default(),
838 })
839}
840
841fn mock_required_args(tool_schema: &serde_json::Value) -> serde_json::Value {
845 let mut args = serde_json::Map::new();
846 let input_schema = tool_schema
850 .get("input_schema")
851 .or_else(|| tool_schema.get("inputSchema"))
852 .or_else(|| {
853 tool_schema
854 .get("function")
855 .and_then(|f| f.get("parameters"))
856 })
857 .or_else(|| tool_schema.get("parameters"));
858 let Some(schema) = input_schema else {
859 return serde_json::Value::Object(args);
860 };
861 let required: std::collections::BTreeSet<String> = schema
862 .get("required")
863 .and_then(|r| r.as_array())
864 .map(|arr| {
865 arr.iter()
866 .filter_map(|v| v.as_str().map(|s| s.to_string()))
867 .collect()
868 })
869 .unwrap_or_default();
870 if let Some(props) = schema.get("properties").and_then(|p| p.as_object()) {
871 for (name, prop) in props {
872 if !required.contains(name) {
873 continue;
874 }
875 let ty = prop
876 .get("type")
877 .and_then(|t| t.as_str())
878 .unwrap_or("string");
879 let placeholder = match ty {
880 "integer" => serde_json::json!(0),
881 "number" => serde_json::json!(0.0),
882 "boolean" => serde_json::json!(false),
883 "array" => serde_json::json!([]),
884 "object" => serde_json::json!({}),
885 _ => serde_json::json!(""),
886 };
887 args.insert(name.clone(), placeholder);
888 }
889 }
890 serde_json::Value::Object(args)
891}
892
893fn mock_tool_name(tool: &serde_json::Value) -> Option<&str> {
894 tool.get("name")
895 .or_else(|| {
896 tool.get("function")
897 .and_then(|function| function.get("name"))
898 })
899 .and_then(|name| name.as_str())
900}
901
902fn mock_auto_tool_candidate(tools: &[serde_json::Value]) -> Option<&serde_json::Value> {
903 tools
904 .iter()
905 .find(|tool| mock_tool_name(tool) != Some("agent_await_resumption"))
906}
907
908pub(crate) fn mock_llm_response(
913 request: &super::api::LlmRequestPayload,
914) -> Result<LlmResult, VmError> {
915 record_llm_mock_call(request);
916
917 let messages = &request.messages;
918 let system = request.system.as_deref();
919 let match_text = mock_match_text(messages);
920 let prompt_text = mock_last_prompt_text(messages);
921 let cache_key = mock_prompt_cache_key(&request.model, messages, system);
922
923 if let Some(matched) = try_match_cli_mock(&match_text) {
924 return matched.map(|mut result| {
925 if request.cache {
926 apply_mock_prompt_cache(&mut result, &cache_key);
927 }
928 result
929 });
930 }
931
932 if let Some(matched) = try_match_builtin_mock(&match_text) {
933 return matched.map(|mut result| {
934 if request.cache {
935 apply_mock_prompt_cache(&mut result, &cache_key);
936 }
937 result
938 });
939 }
940
941 if cli_llm_mock_replay_active() {
942 return Err(unmatched_cli_prompt_error(&match_text));
943 }
944
945 if let Some(tools) = request.native_tools.as_deref() {
948 if let Some(first_tool) = mock_auto_tool_candidate(tools) {
949 let tool_name = mock_tool_name(first_tool).unwrap_or("unknown");
950 let mock_args = mock_required_args(first_tool);
951 let mut result = LlmResult {
952 served_fast: false,
953 text: String::new(),
954 tool_calls: vec![serde_json::json!({
955 "id": "mock_call_1",
956 "type": "tool_call",
957 "name": tool_name,
958 "arguments": mock_args
959 })],
960 input_tokens: prompt_text.len() as i64,
961 output_tokens: 20,
962 cache_read_tokens: 0,
963 cache_write_tokens: 0,
964 cache_supported: true,
965 model: request.model.clone(),
966 provider: "mock".to_string(),
967 thinking: None,
968 thinking_summary: None,
969 stop_reason: None,
970 blocks: vec![serde_json::json!({
971 "type": "tool_call",
972 "id": "mock_call_1",
973 "name": tool_name,
974 "arguments": mock_args,
975 "visibility": "internal",
976 })],
977 logprobs: Vec::new(),
978 telemetry: ProviderTelemetry::default(),
979 };
980 if request.cache {
981 apply_mock_prompt_cache(&mut result, &cache_key);
982 }
983 return Ok(result);
984 }
985 }
986
987 let tagged_done = system.is_some_and(|s| s.contains("<done>"));
992
993 let prose_body = if prompt_text.is_empty() {
994 "Mock LLM response".to_string()
995 } else {
996 let word_count = prompt_text.split_whitespace().count();
997 format!(
998 "Mock response to {word_count}-word prompt: {}",
999 prompt_text.chars().take(100).collect::<String>()
1000 )
1001 };
1002 let response = if tagged_done {
1003 format!("<assistant_prose>{prose_body}</assistant_prose>\n<done>##DONE##</done>")
1004 } else {
1005 prose_body
1006 };
1007
1008 let mut result = LlmResult {
1009 served_fast: false,
1010 text: response.clone(),
1011 tool_calls: vec![],
1012 input_tokens: prompt_text.len() as i64,
1013 output_tokens: 30,
1014 cache_read_tokens: 0,
1015 cache_write_tokens: 0,
1016 cache_supported: true,
1017 model: request.model.clone(),
1018 provider: "mock".to_string(),
1019 thinking: None,
1020 thinking_summary: None,
1021 stop_reason: None,
1022 blocks: vec![serde_json::json!({
1023 "type": "output_text",
1024 "text": response,
1025 "visibility": "public",
1026 })],
1027 logprobs: Vec::new(),
1028 telemetry: ProviderTelemetry::default(),
1029 };
1030 if request.cache {
1031 apply_mock_prompt_cache(&mut result, &cache_key);
1032 }
1033 Ok(result)
1034}
1035
1036pub fn drain_tool_recordings() -> Vec<ToolCallRecord> {
1038 TOOL_RECORDINGS.with(|v| std::mem::take(&mut *v.borrow_mut()))
1039}