1use axum::{
29 extract::State,
30 response::{
31 sse::{Event, KeepAlive, Sse},
32 IntoResponse, Response,
33 },
34 routing::{get, post},
35 Json, Router,
36};
37use futures::stream::{self, Stream};
38use serde_json::{json, Value};
39use std::collections::HashMap;
40use std::convert::Infallible;
41use std::path::PathBuf;
42use std::sync::{Arc, Mutex};
43use std::time::Duration;
44
45#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
47pub enum LlmMockMode {
48 #[default]
50 Mock,
51 Proxy,
53 Record,
55 Replay,
57}
58
59impl std::str::FromStr for LlmMockMode {
60 type Err = String;
61 fn from_str(s: &str) -> Result<Self, Self::Err> {
62 match s.trim().to_lowercase().as_str() {
63 "mock" | "canned" | "off" => Ok(Self::Mock),
64 "proxy" | "passthrough" => Ok(Self::Proxy),
65 "record" => Ok(Self::Record),
66 "replay" => Ok(Self::Replay),
67 _ => Err(format!("unknown --llm-mock-mode '{s}' (mock|proxy|record|replay)")),
68 }
69 }
70}
71
72impl std::fmt::Display for LlmMockMode {
73 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
74 f.write_str(match self {
75 Self::Mock => "mock",
76 Self::Proxy => "proxy",
77 Self::Record => "record",
78 Self::Replay => "replay",
79 })
80 }
81}
82
83#[derive(Clone, Debug)]
85pub struct LlmMockConfig {
86 pub canned_reply: String,
88 pub default_model: String,
90 pub echo_prompt: bool,
93 pub stream_chunk_delay_ms: u64,
95 pub mode: LlmMockMode,
97 pub upstream_base_url: Option<String>,
101 pub upstream_api_key: Option<String>,
104 pub cassette_path: Option<PathBuf>,
107}
108
109impl Default for LlmMockConfig {
110 fn default() -> Self {
111 Self {
112 canned_reply: "This is a mock response from MockForge's LLM endpoint.".to_string(),
113 default_model: "mockforge-mock-1".to_string(),
114 echo_prompt: true,
115 stream_chunk_delay_ms: 0,
116 mode: LlmMockMode::Mock,
117 upstream_base_url: None,
118 upstream_api_key: None,
119 cassette_path: None,
120 }
121 }
122}
123
124#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
126struct CassetteEntry {
127 text: String,
128 prompt_tokens: u32,
129 completion_tokens: u32,
130}
131
132#[derive(Default)]
134struct Cassette {
135 entries: HashMap<String, CassetteEntry>,
136 path: Option<PathBuf>,
137}
138
139impl Cassette {
140 fn load(path: Option<PathBuf>) -> Self {
141 let entries = path
142 .as_ref()
143 .and_then(|p| std::fs::read(p).ok())
144 .and_then(|b| serde_json::from_slice::<HashMap<String, CassetteEntry>>(&b).ok())
145 .unwrap_or_default();
146 Self { entries, path }
147 }
148
149 fn save(&self) {
150 if let Some(ref p) = self.path {
151 if let Ok(json) = serde_json::to_string_pretty(&self.entries) {
152 let _ = std::fs::write(p, json);
153 }
154 }
155 }
156}
157
158#[derive(Clone)]
161pub struct LlmMockState {
162 config: LlmMockConfig,
163 cassette: Arc<Mutex<Cassette>>,
164 http: reqwest::Client,
165}
166
167impl LlmMockState {
168 pub fn new(config: LlmMockConfig) -> Self {
170 let cassette = Cassette::load(config.cassette_path.clone());
171 Self {
172 config,
173 cassette: Arc::new(Mutex::new(cassette)),
174 http: reqwest::Client::new(),
175 }
176 }
177}
178
179pub fn router(config: LlmMockConfig) -> Router {
181 Router::new()
182 .route("/v1/chat/completions", post(chat_completions))
183 .route("/v1/models", get(list_models))
184 .route("/v1/messages", post(anthropic_messages))
185 .with_state(LlmMockState::new(config))
186}
187
188#[derive(Clone, Copy, PartialEq, Eq)]
190enum Dialect {
191 OpenAi,
192 Anthropic,
193}
194
195struct Resolved {
197 text: String,
198 prompt_tokens: u32,
199 completion_tokens: u32,
200}
201
202fn approx_tokens(text: &str) -> u32 {
210 text.split_whitespace().count().max(1) as u32
211}
212
213fn last_user_text(messages: &[Value]) -> String {
217 for m in messages.iter().rev() {
218 if m.get("role").and_then(|r| r.as_str()) == Some("user") {
219 return content_to_text(m.get("content"));
220 }
221 }
222 messages.last().map(|m| content_to_text(m.get("content"))).unwrap_or_default()
224}
225
226fn content_to_text(content: Option<&Value>) -> String {
227 match content {
228 Some(Value::String(s)) => s.clone(),
229 Some(Value::Array(parts)) => parts
230 .iter()
231 .filter_map(|p| p.get("text").and_then(|t| t.as_str()))
232 .collect::<Vec<_>>()
233 .join(" "),
234 _ => String::new(),
235 }
236}
237
238fn build_reply(config: &LlmMockConfig, messages: &[Value]) -> String {
240 if config.echo_prompt {
241 let prompt = last_user_text(messages);
242 if !prompt.is_empty() {
243 let trimmed: String = prompt.chars().take(120).collect();
244 return format!("{} (you said: \"{}\")", config.canned_reply, trimmed);
245 }
246 }
247 config.canned_reply.clone()
248}
249
250fn stable_id(prefix: &str, seed: &str) -> String {
254 let mut hash: u64 = 1469598103934665603; for b in seed.bytes() {
256 hash ^= b as u64;
257 hash = hash.wrapping_mul(1099511628211);
258 }
259 format!("{prefix}{hash:016x}")
260}
261
262fn stream_chunks(reply: &str) -> Vec<String> {
265 let mut out = Vec::new();
266 for (i, word) in reply.split_whitespace().enumerate() {
267 if i == 0 {
268 out.push(word.to_string());
269 } else {
270 out.push(format!(" {word}"));
271 }
272 }
273 if out.is_empty() {
274 out.push(reply.to_string());
275 }
276 out
277}
278
279async fn list_models(State(state): State<LlmMockState>) -> Json<Value> {
284 Json(json!({
285 "object": "list",
286 "data": [{
287 "id": state.config.default_model,
288 "object": "model",
289 "created": 0,
290 "owned_by": "mockforge",
291 }],
292 }))
293}
294
295async fn chat_completions(State(state): State<LlmMockState>, Json(body): Json<Value>) -> Response {
300 let model = body
301 .get("model")
302 .and_then(|m| m.as_str())
303 .unwrap_or(&state.config.default_model)
304 .to_string();
305 let messages: Vec<Value> =
306 body.get("messages").and_then(|m| m.as_array()).cloned().unwrap_or_default();
307 let stream = body.get("stream").and_then(|s| s.as_bool()).unwrap_or(false);
308
309 let r = resolve_reply(&state, Dialect::OpenAi, &model, &messages).await;
310 let id = stable_id("chatcmpl-", &r.text);
311
312 if stream {
313 return openai_stream(state.config.stream_chunk_delay_ms, id, model, r.text)
314 .into_response();
315 }
316
317 Json(json!({
318 "id": id,
319 "object": "chat.completion",
320 "created": 0,
321 "model": model,
322 "choices": [{
323 "index": 0,
324 "message": { "role": "assistant", "content": r.text },
325 "finish_reason": "stop",
326 }],
327 "usage": {
328 "prompt_tokens": r.prompt_tokens,
329 "completion_tokens": r.completion_tokens,
330 "total_tokens": r.prompt_tokens + r.completion_tokens,
331 },
332 }))
333 .into_response()
334}
335
336fn openai_stream(
337 delay_ms: u64,
338 id: String,
339 model: String,
340 reply: String,
341) -> Sse<impl Stream<Item = Result<Event, Infallible>>> {
342 let mut events: Vec<Event> = Vec::new();
343 events.push(sse_json(&json!({
345 "id": id, "object": "chat.completion.chunk", "created": 0, "model": model,
346 "choices": [{ "index": 0, "delta": { "role": "assistant" }, "finish_reason": Value::Null }],
347 })));
348 for chunk in stream_chunks(&reply) {
350 events.push(sse_json(&json!({
351 "id": id, "object": "chat.completion.chunk", "created": 0, "model": model,
352 "choices": [{ "index": 0, "delta": { "content": chunk }, "finish_reason": Value::Null }],
353 })));
354 }
355 events.push(sse_json(&json!({
357 "id": id, "object": "chat.completion.chunk", "created": 0, "model": model,
358 "choices": [{ "index": 0, "delta": {}, "finish_reason": "stop" }],
359 })));
360 events.push(Event::default().data("[DONE]"));
361
362 sse_response(events, delay_ms)
363}
364
365async fn anthropic_messages(
370 State(state): State<LlmMockState>,
371 Json(body): Json<Value>,
372) -> Response {
373 let model = body
374 .get("model")
375 .and_then(|m| m.as_str())
376 .unwrap_or(&state.config.default_model)
377 .to_string();
378 let messages: Vec<Value> =
379 body.get("messages").and_then(|m| m.as_array()).cloned().unwrap_or_default();
380 let stream = body.get("stream").and_then(|s| s.as_bool()).unwrap_or(false);
381
382 let r = resolve_reply(&state, Dialect::Anthropic, &model, &messages).await;
383 let id = stable_id("msg_", &r.text);
384
385 if stream {
386 return anthropic_stream(
387 state.config.stream_chunk_delay_ms,
388 id,
389 model,
390 r.text,
391 r.prompt_tokens,
392 r.completion_tokens,
393 )
394 .into_response();
395 }
396
397 Json(json!({
398 "id": id,
399 "type": "message",
400 "role": "assistant",
401 "model": model,
402 "content": [{ "type": "text", "text": r.text }],
403 "stop_reason": "end_turn",
404 "stop_sequence": Value::Null,
405 "usage": { "input_tokens": r.prompt_tokens, "output_tokens": r.completion_tokens },
406 }))
407 .into_response()
408}
409
410fn anthropic_stream(
411 delay_ms: u64,
412 id: String,
413 model: String,
414 reply: String,
415 input_tokens: u32,
416 output_tokens: u32,
417) -> Sse<impl Stream<Item = Result<Event, Infallible>>> {
418 let mut events: Vec<Event> = Vec::new();
419 events.push(sse_named(
420 "message_start",
421 &json!({
422 "type": "message_start",
423 "message": {
424 "id": id, "type": "message", "role": "assistant", "model": model,
425 "content": [], "stop_reason": Value::Null, "stop_sequence": Value::Null,
426 "usage": { "input_tokens": input_tokens, "output_tokens": 0 },
427 },
428 }),
429 ));
430 events.push(sse_named(
431 "content_block_start",
432 &json!({ "type": "content_block_start", "index": 0, "content_block": { "type": "text", "text": "" } }),
433 ));
434 for chunk in stream_chunks(&reply) {
435 events.push(sse_named(
436 "content_block_delta",
437 &json!({ "type": "content_block_delta", "index": 0, "delta": { "type": "text_delta", "text": chunk } }),
438 ));
439 }
440 events.push(sse_named(
441 "content_block_stop",
442 &json!({ "type": "content_block_stop", "index": 0 }),
443 ));
444 events.push(sse_named(
445 "message_delta",
446 &json!({ "type": "message_delta", "delta": { "stop_reason": "end_turn", "stop_sequence": Value::Null }, "usage": { "output_tokens": output_tokens } }),
447 ));
448 events.push(sse_named("message_stop", &json!({ "type": "message_stop" })));
449
450 sse_response(events, delay_ms)
451}
452
453fn cassette_key(dialect: Dialect, model: &str, messages: &[Value]) -> String {
461 let d = match dialect {
462 Dialect::OpenAi => "openai",
463 Dialect::Anthropic => "anthropic",
464 };
465 let msgs = serde_json::to_string(messages).unwrap_or_default();
466 stable_id("", &format!("{d}\n{model}\n{msgs}"))
467}
468
469fn canned_resolved(cfg: &LlmMockConfig, messages: &[Value]) -> Resolved {
472 let text = build_reply(cfg, messages);
473 let prompt_text = messages
474 .iter()
475 .map(|m| content_to_text(m.get("content")))
476 .collect::<Vec<_>>()
477 .join(" ");
478 Resolved {
479 prompt_tokens: approx_tokens(&prompt_text),
480 completion_tokens: approx_tokens(&text),
481 text,
482 }
483}
484
485fn cassette_lookup(state: &LlmMockState, key: &str) -> Option<Resolved> {
486 let e = state.cassette.lock().ok()?.entries.get(key).cloned()?;
487 Some(Resolved {
488 text: e.text,
489 prompt_tokens: e.prompt_tokens,
490 completion_tokens: e.completion_tokens,
491 })
492}
493
494async fn resolve_reply(
498 state: &LlmMockState,
499 dialect: Dialect,
500 model: &str,
501 messages: &[Value],
502) -> Resolved {
503 let cfg = &state.config;
504 match cfg.mode {
505 LlmMockMode::Mock => canned_resolved(cfg, messages),
506 LlmMockMode::Replay => {
507 let key = cassette_key(dialect, model, messages);
508 cassette_lookup(state, &key).unwrap_or_else(|| {
509 tracing::warn!(target: "mockforge::llm_mock", "replay cassette miss (key {key}); serving canned reply");
510 canned_resolved(cfg, messages)
511 })
512 }
513 LlmMockMode::Record => {
514 let key = cassette_key(dialect, model, messages);
515 if let Some(hit) = cassette_lookup(state, &key) {
516 return hit;
517 }
518 match call_upstream(state, dialect, model, messages).await {
519 Ok(r) => {
520 if let Ok(mut c) = state.cassette.lock() {
521 c.entries.insert(
522 key,
523 CassetteEntry {
524 text: r.text.clone(),
525 prompt_tokens: r.prompt_tokens,
526 completion_tokens: r.completion_tokens,
527 },
528 );
529 c.save();
530 }
531 r
532 }
533 Err(e) => {
534 tracing::error!(target: "mockforge::llm_mock", "record: upstream call failed ({e}); serving canned reply");
535 canned_resolved(cfg, messages)
536 }
537 }
538 }
539 LlmMockMode::Proxy => match call_upstream(state, dialect, model, messages).await {
540 Ok(r) => r,
541 Err(e) => {
542 tracing::error!(target: "mockforge::llm_mock", "proxy: upstream call failed ({e}); serving canned reply");
543 canned_resolved(cfg, messages)
544 }
545 },
546 }
547}
548
549async fn call_upstream(
552 state: &LlmMockState,
553 dialect: Dialect,
554 model: &str,
555 messages: &[Value],
556) -> Result<Resolved, String> {
557 let base = state
558 .config
559 .upstream_base_url
560 .as_deref()
561 .ok_or("no upstream configured (set --llm-mock-upstream)")?
562 .trim_end_matches('/');
563
564 let (url, body) = match dialect {
565 Dialect::OpenAi => (
566 format!("{base}/v1/chat/completions"),
567 json!({ "model": model, "messages": messages, "stream": false }),
568 ),
569 Dialect::Anthropic => (
571 format!("{base}/v1/messages"),
572 json!({ "model": model, "messages": messages, "max_tokens": 1024, "stream": false }),
573 ),
574 };
575
576 let mut req = state.http.post(&url).json(&body);
577 if let Some(ref k) = state.config.upstream_api_key {
578 req = match dialect {
579 Dialect::OpenAi => req.header("authorization", format!("Bearer {k}")),
580 Dialect::Anthropic => {
581 req.header("x-api-key", k).header("anthropic-version", "2023-06-01")
582 }
583 };
584 }
585
586 let resp = req.send().await.map_err(|e| e.to_string())?;
587 let status = resp.status();
588 if !status.is_success() {
589 return Err(format!("{url} returned HTTP {status}"));
590 }
591 let v: Value = resp.json().await.map_err(|e| e.to_string())?;
592
593 let (text, pt, ct) = match dialect {
594 Dialect::OpenAi => (
595 v.pointer("/choices/0/message/content")
596 .and_then(|x| x.as_str())
597 .unwrap_or_default(),
598 v.pointer("/usage/prompt_tokens").and_then(|x| x.as_u64()).unwrap_or(0) as u32,
599 v.pointer("/usage/completion_tokens").and_then(|x| x.as_u64()).unwrap_or(0) as u32,
600 ),
601 Dialect::Anthropic => (
602 v.pointer("/content/0/text").and_then(|x| x.as_str()).unwrap_or_default(),
603 v.pointer("/usage/input_tokens").and_then(|x| x.as_u64()).unwrap_or(0) as u32,
604 v.pointer("/usage/output_tokens").and_then(|x| x.as_u64()).unwrap_or(0) as u32,
605 ),
606 };
607 let text = text.to_string();
608 let prompt_tokens = if pt > 0 {
610 pt
611 } else {
612 let pj = messages
613 .iter()
614 .map(|m| content_to_text(m.get("content")))
615 .collect::<Vec<_>>()
616 .join(" ");
617 approx_tokens(&pj)
618 };
619 let completion_tokens = if ct > 0 { ct } else { approx_tokens(&text) };
620 Ok(Resolved {
621 text,
622 prompt_tokens,
623 completion_tokens,
624 })
625}
626
627fn sse_json(value: &Value) -> Event {
632 Event::default().data(value.to_string())
633}
634
635fn sse_named(name: &str, value: &Value) -> Event {
636 Event::default().event(name).data(value.to_string())
637}
638
639fn sse_response(
642 events: Vec<Event>,
643 delay_ms: u64,
644) -> Sse<impl Stream<Item = Result<Event, Infallible>>> {
645 let s = stream::unfold(events.into_iter(), move |mut it| async move {
646 let next = it.next()?;
647 if delay_ms > 0 {
648 tokio::time::sleep(Duration::from_millis(delay_ms)).await;
649 }
650 Some((Ok::<Event, Infallible>(next), it))
651 });
652 Sse::new(s).keep_alive(KeepAlive::new().interval(Duration::from_secs(15)))
653}
654
655#[cfg(test)]
656mod tests {
657 use super::*;
658
659 fn cfg() -> LlmMockConfig {
660 LlmMockConfig {
661 echo_prompt: false,
662 ..Default::default()
663 }
664 }
665
666 fn st() -> LlmMockState {
668 LlmMockState::new(cfg())
669 }
670
671 fn user(text: &str) -> Vec<Value> {
672 vec![json!({"role":"user","content":text})]
673 }
674
675 #[test]
676 fn approx_tokens_counts_words() {
677 assert_eq!(approx_tokens("one two three"), 3);
678 assert_eq!(approx_tokens(""), 1); }
680
681 #[test]
682 fn last_user_text_handles_string_and_array_content() {
683 let msgs = vec![
684 json!({"role":"system","content":"be brief"}),
685 json!({"role":"user","content":"hello world"}),
686 ];
687 assert_eq!(last_user_text(&msgs), "hello world");
688 let arr = vec![
689 json!({"role":"user","content":[{"type":"text","text":"a"},{"type":"text","text":"b"}]}),
690 ];
691 assert_eq!(last_user_text(&arr), "a b");
692 }
693
694 #[test]
695 fn echo_prompt_reflects_user_message() {
696 let c = LlmMockConfig {
697 echo_prompt: true,
698 ..Default::default()
699 };
700 let msgs = vec![json!({"role":"user","content":"ping"})];
701 let reply = build_reply(&c, &msgs);
702 assert!(reply.contains("ping"), "reply should echo the prompt: {reply}");
703 }
704
705 #[test]
706 fn stable_id_is_deterministic_and_prefixed() {
707 let a = stable_id("chatcmpl-", "same");
708 let b = stable_id("chatcmpl-", "same");
709 assert_eq!(a, b);
710 assert!(a.starts_with("chatcmpl-"));
711 assert_ne!(stable_id("chatcmpl-", "x"), stable_id("chatcmpl-", "y"));
712 }
713
714 #[test]
715 fn stream_chunks_preserve_leading_space_after_first() {
716 let chunks = stream_chunks("alpha beta gamma");
717 assert_eq!(chunks, vec!["alpha", " beta", " gamma"]);
718 assert_eq!(chunks.concat(), "alpha beta gamma");
719 }
720
721 #[tokio::test]
722 async fn chat_completions_non_stream_shape() {
723 let body = json!({"model":"gpt-x","messages":[{"role":"user","content":"hi there"}]});
724 let resp = chat_completions(State(st()), Json(body)).await;
725 let bytes = axum::body::to_bytes(resp.into_body(), usize::MAX).await.unwrap();
726 let v: Value = serde_json::from_slice(&bytes).unwrap();
727 assert_eq!(v["object"], "chat.completion");
728 assert_eq!(v["choices"][0]["message"]["role"], "assistant");
729 assert_eq!(v["choices"][0]["finish_reason"], "stop");
730 assert!(v["usage"]["total_tokens"].as_u64().unwrap() >= 2);
731 assert!(v["id"].as_str().unwrap().starts_with("chatcmpl-"));
732 }
733
734 #[tokio::test]
735 async fn anthropic_non_stream_shape() {
736 let body = json!({"model":"claude-x","messages":[{"role":"user","content":"hi"}]});
737 let resp = anthropic_messages(State(st()), Json(body)).await;
738 let bytes = axum::body::to_bytes(resp.into_body(), usize::MAX).await.unwrap();
739 let v: Value = serde_json::from_slice(&bytes).unwrap();
740 assert_eq!(v["type"], "message");
741 assert_eq!(v["content"][0]["type"], "text");
742 assert_eq!(v["stop_reason"], "end_turn");
743 assert!(v["usage"]["output_tokens"].as_u64().unwrap() >= 1);
744 assert!(v["id"].as_str().unwrap().starts_with("msg_"));
745 }
746
747 #[tokio::test]
748 async fn models_list_shape() {
749 let Json(v) = list_models(State(st())).await;
750 assert_eq!(v["object"], "list");
751 assert_eq!(v["data"][0]["owned_by"], "mockforge");
752 }
753
754 #[test]
757 fn mode_parses_and_displays() {
758 for (s, m) in [
759 ("mock", LlmMockMode::Mock),
760 ("off", LlmMockMode::Mock),
761 ("proxy", LlmMockMode::Proxy),
762 ("record", LlmMockMode::Record),
763 ("replay", LlmMockMode::Replay),
764 ] {
765 assert_eq!(s.parse::<LlmMockMode>().unwrap(), m);
766 }
767 assert!("bogus".parse::<LlmMockMode>().is_err());
768 assert_eq!(LlmMockMode::Record.to_string(), "record");
769 }
770
771 #[test]
772 fn cassette_key_is_stable_and_input_sensitive() {
773 let m = user("hello");
774 let k1 = cassette_key(Dialect::OpenAi, "gpt-4o", &m);
775 let k2 = cassette_key(Dialect::OpenAi, "gpt-4o", &m);
776 assert_eq!(k1, k2, "same input -> same key");
777 assert_ne!(k1, cassette_key(Dialect::OpenAi, "gpt-4o", &user("world")));
778 assert_ne!(k1, cassette_key(Dialect::OpenAi, "gpt-5", &m), "model is part of the key");
779 assert_ne!(
780 k1,
781 cassette_key(Dialect::Anthropic, "gpt-4o", &m),
782 "dialect is part of the key"
783 );
784 }
785
786 #[test]
787 fn cassette_roundtrips_through_disk() {
788 let dir = std::env::temp_dir().join(format!("mf-cassette-{}", std::process::id()));
789 std::fs::create_dir_all(&dir).unwrap();
790 let path = dir.join("c.json");
791 let mut c = Cassette::load(Some(path.clone()));
792 c.entries.insert(
793 "k".into(),
794 CassetteEntry {
795 text: "recorded".into(),
796 prompt_tokens: 3,
797 completion_tokens: 1,
798 },
799 );
800 c.save();
801 let reloaded = Cassette::load(Some(path.clone()));
802 assert_eq!(reloaded.entries.get("k").unwrap().text, "recorded");
803 let _ = std::fs::remove_dir_all(&dir);
804 }
805
806 #[tokio::test]
807 async fn mock_mode_serves_canned_without_upstream() {
808 let state = LlmMockState::new(cfg()); let r = resolve_reply(&state, Dialect::OpenAi, "gpt-4o", &user("hi")).await;
810 assert!(r.text.contains("mock response"));
811 }
812
813 #[tokio::test]
814 async fn replay_hit_serves_cassette_miss_serves_canned() {
815 let state = LlmMockState::new(LlmMockConfig {
816 mode: LlmMockMode::Replay,
817 echo_prompt: false,
818 ..Default::default()
819 });
820 let msgs = user("what is 2+2");
822 let key = cassette_key(Dialect::OpenAi, "gpt-4o", &msgs);
823 state.cassette.lock().unwrap().entries.insert(
824 key,
825 CassetteEntry {
826 text: "four".into(),
827 prompt_tokens: 4,
828 completion_tokens: 1,
829 },
830 );
831 let hit = resolve_reply(&state, Dialect::OpenAi, "gpt-4o", &msgs).await;
833 assert_eq!(hit.text, "four");
834 assert_eq!(hit.completion_tokens, 1);
835 let miss = resolve_reply(&state, Dialect::OpenAi, "gpt-4o", &user("unseen")).await;
837 assert!(miss.text.contains("mock response"));
838 }
839
840 #[tokio::test]
841 async fn proxy_without_upstream_degrades_to_canned() {
842 let state = LlmMockState::new(LlmMockConfig {
844 mode: LlmMockMode::Proxy,
845 echo_prompt: false,
846 ..Default::default()
847 });
848 let r = resolve_reply(&state, Dialect::OpenAi, "gpt-4o", &user("hi")).await;
849 assert!(r.text.contains("mock response"), "should fall back to canned, got: {}", r.text);
850 }
851}