1use serde::{Deserialize, Serialize};
21use sha2::{Digest, Sha256};
22use std::collections::HashMap;
23use std::env;
24use std::fs;
25use std::io::Write;
26use std::path::{Path, PathBuf};
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
30pub enum VcrMode {
31 #[default]
33 ReplayStrict,
34 Replay,
36 Record,
38 Auto,
40 Off,
42}
43
44impl VcrMode {
45 pub fn from_env() -> Self {
47 match env::var("ASSAY_VCR_MODE")
48 .unwrap_or_default()
49 .to_lowercase()
50 .as_str()
51 {
52 "record" => VcrMode::Record,
53 "auto" => VcrMode::Auto,
54 "replay" => VcrMode::Replay,
55 "off" => VcrMode::Off,
56 _ => VcrMode::ReplayStrict,
58 }
59 }
60
61 pub fn allows_network(&self) -> bool {
63 matches!(
64 self,
65 VcrMode::Record | VcrMode::Auto | VcrMode::Replay | VcrMode::Off
66 )
67 }
68
69 pub fn fails_on_miss(&self) -> bool {
71 matches!(self, VcrMode::ReplayStrict)
72 }
73}
74
75#[derive(Debug, Clone, Default)]
77pub struct ScrubConfig {
78 pub request_headers: Vec<String>,
80 pub response_headers: Vec<String>,
82 pub request_body_paths: Vec<String>,
84 pub response_body_paths: Vec<String>,
86}
87
88impl ScrubConfig {
89 pub fn default_secure() -> Self {
91 Self {
92 request_headers: vec![
93 "authorization".to_string(),
94 "x-api-key".to_string(),
95 "openai-organization".to_string(),
96 "api-key".to_string(),
97 ],
98 response_headers: vec![
99 "set-cookie".to_string(),
100 "x-request-id".to_string(),
101 "cf-ray".to_string(),
102 ],
103 request_body_paths: vec![],
104 response_body_paths: vec![],
105 }
106 }
107}
108
109#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct CassetteEntry {
112 pub schema_version: u32,
114 pub fingerprint: String,
116 pub method: String,
118 pub url: String,
120 pub request_body: Option<serde_json::Value>,
122 pub status: u16,
124 pub response_body: serde_json::Value,
126 pub meta: CassetteMeta,
128}
129
130#[derive(Debug, Clone, Serialize, Deserialize)]
132pub struct CassetteMeta {
133 pub recorded_at: String,
135 #[serde(skip_serializing_if = "Option::is_none")]
137 pub model: Option<String>,
138 pub provider: String,
140 pub kind: String,
142}
143
144pub struct VcrClient {
146 mode: VcrMode,
147 cassette_dir: PathBuf,
148 scrub_config: ScrubConfig,
149 cache: HashMap<String, CassetteEntry>,
151 inner: reqwest::Client,
152}
153
154impl VcrClient {
155 pub fn from_env() -> Self {
157 let mode = VcrMode::from_env();
158 let cassette_dir = env::var("ASSAY_VCR_DIR")
159 .map(PathBuf::from)
160 .unwrap_or_else(|_| PathBuf::from("tests/fixtures/perf/semantic_vcr/cassettes"));
161
162 Self::new(mode, cassette_dir)
163 }
164
165 pub fn new(mode: VcrMode, cassette_dir: PathBuf) -> Self {
167 let mut client = Self {
168 mode,
169 cassette_dir,
170 scrub_config: ScrubConfig::default_secure(),
171 cache: HashMap::new(),
172 inner: reqwest::Client::new(),
173 };
174
175 if matches!(
177 mode,
178 VcrMode::ReplayStrict | VcrMode::Replay | VcrMode::Auto
179 ) {
180 client.load_cassettes();
181 }
182
183 client
184 }
185
186 pub fn with_scrub_config(mut self, config: ScrubConfig) -> Self {
188 self.scrub_config = config;
189 self
190 }
191
192 pub fn fingerprint(method: &str, url: &str, body: Option<&serde_json::Value>) -> String {
196 let mut hasher = Sha256::new();
197 hasher.update(method.as_bytes());
198 hasher.update(b"|");
199
200 let normalized_url = url.trim_end_matches('/').to_lowercase();
202 hasher.update(normalized_url.as_bytes());
203 hasher.update(b"|");
204
205 if let Some(b) = body {
206 let canonical = serde_jcs::to_string(b).unwrap_or_else(|_| b.to_string());
208 hasher.update(canonical.as_bytes());
209 }
210
211 format!("{:x}", hasher.finalize())
212 }
213
214 fn provider_from_url(url: &str) -> &'static str {
216 if url.contains("openai.com") {
217 "openai"
218 } else if url.contains("anthropic.com") {
219 "anthropic"
220 } else {
221 "unknown"
222 }
223 }
224
225 fn kind_from_url(url: &str) -> &'static str {
227 if url.contains("/embeddings") {
228 "embeddings"
229 } else if url.contains("/chat/completions") {
230 "judge"
231 } else if url.contains("/completions") {
232 "completions"
233 } else {
234 "other"
235 }
236 }
237
238 fn extract_model(body: Option<&serde_json::Value>) -> Option<String> {
240 body.and_then(|b| b.get("model"))
241 .and_then(|m| m.as_str())
242 .map(|s| s.to_string())
243 }
244
245 fn load_cassettes(&mut self) {
247 let cassette_dir = self.cassette_dir.clone();
248 if !cassette_dir.exists() {
249 return;
250 }
251
252 for provider in &["openai", "anthropic", "unknown"] {
254 for kind in &["embeddings", "judge", "completions", "other"] {
255 let dir = cassette_dir.join(provider).join(kind);
256 if dir.exists() {
257 self.load_cassettes_from_dir(&dir);
258 }
259 }
260 }
261
262 for subdir in &["embeddings", "judge"] {
264 let dir = cassette_dir.join(subdir);
265 if dir.exists() {
266 self.load_cassettes_from_dir(&dir);
267 }
268 }
269
270 self.load_cassettes_from_dir(&cassette_dir);
272 }
273
274 fn load_cassettes_from_dir(&mut self, dir: &Path) {
275 let Ok(entries) = fs::read_dir(dir) else {
276 return;
277 };
278
279 for entry in entries.flatten() {
280 let path = entry.path();
281 if path.extension().map(|e| e == "json").unwrap_or(false) {
282 if let Ok(content) = fs::read_to_string(&path) {
283 if let Ok(cassette) = serde_json::from_str::<CassetteEntry>(&content) {
284 self.cache.insert(cassette.fingerprint.clone(), cassette);
285 }
286 }
287 }
288 }
289 }
290
291 fn save_cassette(&self, entry: &CassetteEntry) -> anyhow::Result<()> {
293 let provider = Self::provider_from_url(&entry.url);
294 let kind = Self::kind_from_url(&entry.url);
295
296 let dir = self.cassette_dir.join(provider).join(kind);
298 fs::create_dir_all(&dir)?;
299
300 let fp_prefix = if entry.fingerprint.len() >= 16 {
302 &entry.fingerprint[..16]
303 } else {
304 &entry.fingerprint
305 };
306 let filename = format!("{}.json", fp_prefix);
307 let final_path = dir.join(&filename);
308
309 let temp_path = dir.join(format!(".{}.tmp", fp_prefix));
311 let content = serde_json::to_string_pretty(entry)?;
312
313 {
314 let mut file = fs::File::create(&temp_path)?;
315 file.write_all(content.as_bytes())?;
316 file.sync_all()?;
317 }
318
319 fs::rename(&temp_path, &final_path)?;
320
321 Ok(())
322 }
323
324 pub async fn post_json(
326 &mut self,
327 url: &str,
328 body: &serde_json::Value,
329 auth_header: Option<&str>,
330 ) -> anyhow::Result<VcrResponse> {
331 let fingerprint = Self::fingerprint("POST", url, Some(body));
332
333 match self.mode {
334 VcrMode::ReplayStrict => {
335 if let Some(entry) = self.cache.get(&fingerprint) {
337 Ok(VcrResponse {
338 status: entry.status,
339 body: entry.response_body.clone(),
340 from_cache: true,
341 })
342 } else {
343 anyhow::bail!(
344 "VCR replay_strict: no cassette found for POST {} (fingerprint: {}).\n\
345 Run with ASSAY_VCR_MODE=record to record responses.\n\
346 Cassette dir: {}",
347 url,
348 &fingerprint[..16.min(fingerprint.len())],
349 self.cassette_dir.display()
350 )
351 }
352 }
353 VcrMode::Replay => {
354 if let Some(entry) = self.cache.get(&fingerprint) {
356 Ok(VcrResponse {
357 status: entry.status,
358 body: entry.response_body.clone(),
359 from_cache: true,
360 })
361 } else {
362 tracing::warn!(
364 "VCR replay: cache miss for POST {}, passing through to network",
365 url
366 );
367 self.make_request_and_record(url, body, auth_header, &fingerprint, false)
368 .await
369 }
370 }
371 VcrMode::Auto => {
372 if let Some(entry) = self.cache.get(&fingerprint) {
374 Ok(VcrResponse {
375 status: entry.status,
376 body: entry.response_body.clone(),
377 from_cache: true,
378 })
379 } else {
380 self.make_request_and_record(url, body, auth_header, &fingerprint, true)
381 .await
382 }
383 }
384 VcrMode::Record => {
385 self.make_request_and_record(url, body, auth_header, &fingerprint, true)
387 .await
388 }
389 VcrMode::Off => {
390 crate::providers::network::check_outbound(url)?;
392 let mut req = self.inner.post(url).json(body);
393 if let Some(auth) = auth_header {
394 req = req.header("Authorization", auth);
395 }
396 let resp = req.send().await?;
397 let status = resp.status().as_u16();
398 let response_body: serde_json::Value = resp.json().await?;
399
400 Ok(VcrResponse {
401 status,
402 body: response_body,
403 from_cache: false,
404 })
405 }
406 }
407 }
408
409 async fn make_request_and_record(
411 &mut self,
412 url: &str,
413 body: &serde_json::Value,
414 auth_header: Option<&str>,
415 fingerprint: &str,
416 should_record: bool,
417 ) -> anyhow::Result<VcrResponse> {
418 crate::providers::network::check_outbound(url)?;
419 let mut req = self.inner.post(url).json(body);
420 if let Some(auth) = auth_header {
421 req = req.header("Authorization", auth);
422 }
423 let resp = req.send().await?;
424
425 let status = resp.status().as_u16();
426 let response_body: serde_json::Value = resp.json().await?;
427
428 if should_record {
429 let entry = CassetteEntry {
430 schema_version: 2,
431 fingerprint: fingerprint.to_string(),
432 method: "POST".to_string(),
433 url: url.to_string(),
434 request_body: Some(body.clone()),
435 status,
436 response_body: response_body.clone(),
437 meta: CassetteMeta {
438 recorded_at: chrono::Utc::now().to_rfc3339(),
439 model: Self::extract_model(Some(body)),
440 provider: Self::provider_from_url(url).to_string(),
441 kind: Self::kind_from_url(url).to_string(),
442 },
443 };
444
445 if let Err(e) = self.save_cassette(&entry) {
446 tracing::warn!("VCR: failed to save cassette: {}", e);
447 }
448
449 self.cache.insert(fingerprint.to_string(), entry);
451 }
452
453 Ok(VcrResponse {
454 status,
455 body: response_body,
456 from_cache: false,
457 })
458 }
459
460 pub fn mode(&self) -> VcrMode {
462 self.mode
463 }
464
465 pub fn cassette_count(&self) -> usize {
467 self.cache.len()
468 }
469}
470
471#[derive(Debug)]
473pub struct VcrResponse {
474 pub status: u16,
475 pub body: serde_json::Value,
476 pub from_cache: bool,
478}
479
480impl VcrResponse {
481 pub fn is_success(&self) -> bool {
482 (200..300).contains(&self.status)
483 }
484}
485
486#[cfg(test)]
487mod tests {
488 use super::*;
489 use tempfile::TempDir;
490
491 #[test]
493 fn test_default_secure_scrub_paths() {
494 let cfg = ScrubConfig::default_secure();
495 assert!(
496 cfg.request_headers
497 .iter()
498 .any(|h| h.eq_ignore_ascii_case("authorization")),
499 "Must scrub Authorization"
500 );
501 assert!(
502 cfg.request_headers
503 .iter()
504 .any(|h| h.eq_ignore_ascii_case("x-api-key")),
505 "Must scrub x-api-key"
506 );
507 assert!(
508 cfg.request_headers
509 .iter()
510 .any(|h| h.eq_ignore_ascii_case("api-key")),
511 "Must scrub api-key"
512 );
513 assert!(
514 cfg.response_headers
515 .iter()
516 .any(|h| h.eq_ignore_ascii_case("set-cookie")),
517 "Must scrub set-cookie"
518 );
519 assert!(
520 cfg.request_body_paths.is_empty(),
521 "Default: no body paths (audit: explicit if needed)"
522 );
523 assert!(
524 cfg.response_body_paths.is_empty(),
525 "Default: no response body paths"
526 );
527 }
528
529 #[test]
530 fn test_fingerprint_stability() {
531 let body = serde_json::json!({"input": "hello", "model": "text-embedding-3-small"});
532 let fp1 =
533 VcrClient::fingerprint("POST", "https://api.openai.com/v1/embeddings", Some(&body));
534 let fp2 =
535 VcrClient::fingerprint("POST", "https://api.openai.com/v1/embeddings", Some(&body));
536 assert_eq!(fp1, fp2);
537
538 let body2 = serde_json::json!({"input": "world", "model": "text-embedding-3-small"});
540 let fp3 =
541 VcrClient::fingerprint("POST", "https://api.openai.com/v1/embeddings", Some(&body2));
542 assert_ne!(fp1, fp3);
543 }
544
545 #[test]
546 fn test_fingerprint_key_order_invariant() {
547 let body1 = serde_json::json!({"model": "gpt-4", "input": "hello"});
549 let body2 = serde_json::json!({"input": "hello", "model": "gpt-4"});
550 let fp1 =
551 VcrClient::fingerprint("POST", "https://api.openai.com/v1/embeddings", Some(&body1));
552 let fp2 =
553 VcrClient::fingerprint("POST", "https://api.openai.com/v1/embeddings", Some(&body2));
554 assert_eq!(fp1, fp2, "JCS should normalize key order");
555 }
556
557 #[test]
558 fn test_vcr_mode_from_env() {
559 env::remove_var("ASSAY_VCR_MODE");
560 assert_eq!(VcrMode::from_env(), VcrMode::ReplayStrict);
561
562 env::set_var("ASSAY_VCR_MODE", "record");
563 assert_eq!(VcrMode::from_env(), VcrMode::Record);
564
565 env::set_var("ASSAY_VCR_MODE", "auto");
566 assert_eq!(VcrMode::from_env(), VcrMode::Auto);
567
568 env::set_var("ASSAY_VCR_MODE", "replay");
569 assert_eq!(VcrMode::from_env(), VcrMode::Replay);
570
571 env::set_var("ASSAY_VCR_MODE", "off");
572 assert_eq!(VcrMode::from_env(), VcrMode::Off);
573
574 env::set_var("ASSAY_VCR_MODE", "replay_strict");
575 assert_eq!(VcrMode::from_env(), VcrMode::ReplayStrict);
576
577 env::remove_var("ASSAY_VCR_MODE");
578 }
579
580 #[test]
581 fn test_cassette_save_load_atomic() {
582 let tmp = TempDir::new().unwrap();
583 let client = VcrClient::new(VcrMode::Record, tmp.path().to_path_buf());
584
585 let body = serde_json::json!({"input": "test", "model": "text-embedding-3-small"});
586 let fingerprint =
587 VcrClient::fingerprint("POST", "https://api.openai.com/v1/embeddings", Some(&body));
588
589 let entry = CassetteEntry {
590 schema_version: 2,
591 fingerprint: fingerprint.clone(),
592 method: "POST".to_string(),
593 url: "https://api.openai.com/v1/embeddings".to_string(),
594 request_body: Some(body),
595 status: 200,
596 response_body: serde_json::json!({"data": [{"embedding": [0.1, 0.2]}]}),
597 meta: CassetteMeta {
598 recorded_at: "2026-01-30T12:00:00Z".to_string(),
599 model: Some("text-embedding-3-small".to_string()),
600 provider: "openai".to_string(),
601 kind: "embeddings".to_string(),
602 },
603 };
604
605 client.save_cassette(&entry).unwrap();
606
607 let expected_path = tmp
609 .path()
610 .join("openai")
611 .join("embeddings")
612 .join(format!("{}.json", &fingerprint[..16]));
613 assert!(expected_path.exists(), "Cassette file should exist");
614
615 let mut client2 = VcrClient::new(VcrMode::ReplayStrict, tmp.path().to_path_buf());
617 client2.load_cassettes();
618
619 assert!(client2.cache.contains_key(&fingerprint));
620 assert_eq!(client2.cache.get(&fingerprint).unwrap().status, 200);
621 }
622
623 #[test]
624 fn test_provider_and_kind_detection() {
625 assert_eq!(
626 VcrClient::provider_from_url("https://api.openai.com/v1/embeddings"),
627 "openai"
628 );
629 assert_eq!(
630 VcrClient::kind_from_url("https://api.openai.com/v1/embeddings"),
631 "embeddings"
632 );
633 assert_eq!(
634 VcrClient::kind_from_url("https://api.openai.com/v1/chat/completions"),
635 "judge"
636 );
637 }
638
639 #[tokio::test]
640 async fn test_network_policy_blocks_passthrough_modes() {
641 let _serial = crate::providers::network::lock_test_serial_async().await;
642 let tmp = TempDir::new().unwrap();
643 let mut client = VcrClient::new(VcrMode::Off, tmp.path().to_path_buf());
644 let _guard = crate::providers::network::NetworkPolicyGuard::deny("unit test");
645 let body = serde_json::json!({"input": "test", "model": "gpt-4o-mini"});
646 let err = client
647 .post_json("https://api.openai.com/v1/chat/completions", &body, None)
648 .await
649 .expect_err("deny policy must block passthrough network");
650 assert!(err
651 .to_string()
652 .contains("outbound network blocked by policy"));
653 }
654}