Skip to main content

assay_core/vcr/
mod.rs

1//! VCR (Video Cassette Recording) middleware for HTTP request/response recording and replay.
2//!
3//! SOTA implementation (Jan 2026) with:
4//! - Provider-level interception (typed requests, not raw HTTP)
5//! - JCS fingerprinting (RFC 8785 canonical JSON)
6//! - Scrubbers for security hygiene (no secrets in cassettes)
7//! - Atomic writes (temp + rename) for parallel safety
8//! - Strict replay mode for hermetic CI
9//!
10//! # Environment Variables
11//!
12//! - `ASSAY_VCR_MODE`: `replay_strict` (CI default), `replay`, `record`, `auto`, `off`
13//! - `ASSAY_VCR_DIR`: Path to cassette directory (default: `tests/fixtures/perf/semantic_vcr/cassettes`)
14//!
15//! # Matching
16//!
17//! Requests are matched by fingerprint: method + URL + canonical body (JCS).
18//! Authorization headers and transient metadata are excluded.
19
20use serde::{Deserialize, Serialize};
21use sha2::{Digest, Sha256};
22use std::collections::HashMap;
23use std::env;
24use std::fs;
25use std::io::Write;
26use std::path::{Path, PathBuf};
27
28/// VCR mode: how to handle HTTP requests
29#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
30pub enum VcrMode {
31    /// Replay from cassettes; fail if no match (CI default, hermetic)
32    #[default]
33    ReplayStrict,
34    /// Replay from cassettes; passthrough if no match (dangerous in CI)
35    Replay,
36    /// Record to cassettes; make real requests (local only)
37    Record,
38    /// Auto: replay if hit, record if miss (convenient for local dev)
39    Auto,
40    /// Pass through to live network; no recording
41    Off,
42}
43
44impl VcrMode {
45    /// Parse from environment variable `ASSAY_VCR_MODE`
46    pub fn from_env() -> Self {
47        match env::var("ASSAY_VCR_MODE")
48            .unwrap_or_default()
49            .to_lowercase()
50            .as_str()
51        {
52            "record" => VcrMode::Record,
53            "auto" => VcrMode::Auto,
54            "replay" => VcrMode::Replay,
55            "off" => VcrMode::Off,
56            // Default to replay_strict for CI safety
57            _ => VcrMode::ReplayStrict,
58        }
59    }
60
61    /// Is this mode allowed to make network requests?
62    pub fn allows_network(&self) -> bool {
63        matches!(
64            self,
65            VcrMode::Record | VcrMode::Auto | VcrMode::Replay | VcrMode::Off
66        )
67    }
68
69    /// Does this mode fail on cassette miss?
70    pub fn fails_on_miss(&self) -> bool {
71        matches!(self, VcrMode::ReplayStrict)
72    }
73}
74
75/// Scrubber configuration for security hygiene
76#[derive(Debug, Clone, Default)]
77pub struct ScrubConfig {
78    /// Headers to remove from recorded requests (case-insensitive)
79    pub request_headers: Vec<String>,
80    /// Headers to remove from recorded responses
81    pub response_headers: Vec<String>,
82    /// JSON paths to redact in request body (e.g., "$.api_key")
83    pub request_body_paths: Vec<String>,
84    /// JSON paths to redact in response body
85    pub response_body_paths: Vec<String>,
86}
87
88impl ScrubConfig {
89    /// Default scrubber: remove auth headers and common secrets (VCR/cassette sign-off: no prompt/response bodies by default).
90    pub fn default_secure() -> Self {
91        Self {
92            request_headers: vec![
93                "authorization".to_string(),
94                "x-api-key".to_string(),
95                "openai-organization".to_string(),
96                "api-key".to_string(),
97            ],
98            response_headers: vec![
99                "set-cookie".to_string(),
100                "x-request-id".to_string(),
101                "cf-ray".to_string(),
102            ],
103            request_body_paths: vec![],
104            response_body_paths: vec![],
105        }
106    }
107}
108
109/// A recorded HTTP request/response pair (cassette entry)
110#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct CassetteEntry {
112    /// Schema version for forward compatibility
113    pub schema_version: u32,
114    /// Fingerprint used for matching
115    pub fingerprint: String,
116    /// HTTP method
117    pub method: String,
118    /// Request URL (without query params that vary)
119    pub url: String,
120    /// Request body (canonical JSON)
121    pub request_body: Option<serde_json::Value>,
122    /// Response status code
123    pub status: u16,
124    /// Response body
125    pub response_body: serde_json::Value,
126    /// Metadata
127    pub meta: CassetteMeta,
128}
129
130/// Cassette metadata for debugging and versioning
131#[derive(Debug, Clone, Serialize, Deserialize)]
132pub struct CassetteMeta {
133    /// When this cassette was recorded
134    pub recorded_at: String,
135    /// Model used (if applicable)
136    #[serde(skip_serializing_if = "Option::is_none")]
137    pub model: Option<String>,
138    /// Provider (openai, anthropic, etc.)
139    pub provider: String,
140    /// Kind (embeddings, judge, chat)
141    pub kind: String,
142}
143
144/// VCR client for HTTP request interception
145pub struct VcrClient {
146    mode: VcrMode,
147    cassette_dir: PathBuf,
148    scrub_config: ScrubConfig,
149    /// In-memory cassette cache (fingerprint -> entry)
150    cache: HashMap<String, CassetteEntry>,
151    inner: reqwest::Client,
152}
153
154impl VcrClient {
155    /// Create a new VCR client with mode and directory from environment
156    pub fn from_env() -> Self {
157        let mode = VcrMode::from_env();
158        let cassette_dir = env::var("ASSAY_VCR_DIR")
159            .map(PathBuf::from)
160            .unwrap_or_else(|_| PathBuf::from("tests/fixtures/perf/semantic_vcr/cassettes"));
161
162        Self::new(mode, cassette_dir)
163    }
164
165    /// Create a new VCR client with explicit mode and directory
166    pub fn new(mode: VcrMode, cassette_dir: PathBuf) -> Self {
167        let mut client = Self {
168            mode,
169            cassette_dir,
170            scrub_config: ScrubConfig::default_secure(),
171            cache: HashMap::new(),
172            inner: reqwest::Client::new(),
173        };
174
175        // Load existing cassettes in replay modes
176        if matches!(
177            mode,
178            VcrMode::ReplayStrict | VcrMode::Replay | VcrMode::Auto
179        ) {
180            client.load_cassettes();
181        }
182
183        client
184    }
185
186    /// Set custom scrub configuration
187    pub fn with_scrub_config(mut self, config: ScrubConfig) -> Self {
188        self.scrub_config = config;
189        self
190    }
191
192    /// Compute fingerprint for request matching using JCS (RFC 8785)
193    ///
194    /// Fingerprint includes: method + URL + canonical body (excluding auth)
195    pub fn fingerprint(method: &str, url: &str, body: Option<&serde_json::Value>) -> String {
196        let mut hasher = Sha256::new();
197        hasher.update(method.as_bytes());
198        hasher.update(b"|");
199
200        // Normalize URL (remove trailing slashes, lowercase)
201        let normalized_url = url.trim_end_matches('/').to_lowercase();
202        hasher.update(normalized_url.as_bytes());
203        hasher.update(b"|");
204
205        if let Some(b) = body {
206            // Use JCS for canonical JSON (RFC 8785)
207            let canonical = serde_jcs::to_string(b).unwrap_or_else(|_| b.to_string());
208            hasher.update(canonical.as_bytes());
209        }
210
211        format!("{:x}", hasher.finalize())
212    }
213
214    /// Determine provider from URL
215    fn provider_from_url(url: &str) -> &'static str {
216        if url.contains("openai.com") {
217            "openai"
218        } else if url.contains("anthropic.com") {
219            "anthropic"
220        } else {
221            "unknown"
222        }
223    }
224
225    /// Determine kind (embeddings/judge/chat) from URL
226    fn kind_from_url(url: &str) -> &'static str {
227        if url.contains("/embeddings") {
228            "embeddings"
229        } else if url.contains("/chat/completions") {
230            "judge"
231        } else if url.contains("/completions") {
232            "completions"
233        } else {
234            "other"
235        }
236    }
237
238    /// Extract model from request body if present
239    fn extract_model(body: Option<&serde_json::Value>) -> Option<String> {
240        body.and_then(|b| b.get("model"))
241            .and_then(|m| m.as_str())
242            .map(|s| s.to_string())
243    }
244
245    /// Load all cassettes from disk into memory
246    fn load_cassettes(&mut self) {
247        let cassette_dir = self.cassette_dir.clone();
248        if !cassette_dir.exists() {
249            return;
250        }
251
252        // Load from provider/kind subdirs
253        for provider in &["openai", "anthropic", "unknown"] {
254            for kind in &["embeddings", "judge", "completions", "other"] {
255                let dir = cassette_dir.join(provider).join(kind);
256                if dir.exists() {
257                    self.load_cassettes_from_dir(&dir);
258                }
259            }
260        }
261
262        // Also load from legacy structure (embeddings/, judge/ at root)
263        for subdir in &["embeddings", "judge"] {
264            let dir = cassette_dir.join(subdir);
265            if dir.exists() {
266                self.load_cassettes_from_dir(&dir);
267            }
268        }
269
270        // Load from root cassette dir
271        self.load_cassettes_from_dir(&cassette_dir);
272    }
273
274    fn load_cassettes_from_dir(&mut self, dir: &Path) {
275        let Ok(entries) = fs::read_dir(dir) else {
276            return;
277        };
278
279        for entry in entries.flatten() {
280            let path = entry.path();
281            if path.extension().map(|e| e == "json").unwrap_or(false) {
282                if let Ok(content) = fs::read_to_string(&path) {
283                    if let Ok(cassette) = serde_json::from_str::<CassetteEntry>(&content) {
284                        self.cache.insert(cassette.fingerprint.clone(), cassette);
285                    }
286                }
287            }
288        }
289    }
290
291    /// Save a cassette entry to disk (atomic: temp + rename)
292    fn save_cassette(&self, entry: &CassetteEntry) -> anyhow::Result<()> {
293        let provider = Self::provider_from_url(&entry.url);
294        let kind = Self::kind_from_url(&entry.url);
295
296        // Create directory: cassettes/<provider>/<kind>/
297        let dir = self.cassette_dir.join(provider).join(kind);
298        fs::create_dir_all(&dir)?;
299
300        // Use first 16 chars of fingerprint for filename
301        let fp_prefix = if entry.fingerprint.len() >= 16 {
302            &entry.fingerprint[..16]
303        } else {
304            &entry.fingerprint
305        };
306        let filename = format!("{}.json", fp_prefix);
307        let final_path = dir.join(&filename);
308
309        // Atomic write: temp file + rename
310        let temp_path = dir.join(format!(".{}.tmp", fp_prefix));
311        let content = serde_json::to_string_pretty(entry)?;
312
313        {
314            let mut file = fs::File::create(&temp_path)?;
315            file.write_all(content.as_bytes())?;
316            file.sync_all()?;
317        }
318
319        fs::rename(&temp_path, &final_path)?;
320
321        Ok(())
322    }
323
324    /// Make a POST request with VCR handling
325    pub async fn post_json(
326        &mut self,
327        url: &str,
328        body: &serde_json::Value,
329        auth_header: Option<&str>,
330    ) -> anyhow::Result<VcrResponse> {
331        let fingerprint = Self::fingerprint("POST", url, Some(body));
332
333        match self.mode {
334            VcrMode::ReplayStrict => {
335                // Strict replay: must find cassette
336                if let Some(entry) = self.cache.get(&fingerprint) {
337                    Ok(VcrResponse {
338                        status: entry.status,
339                        body: entry.response_body.clone(),
340                        from_cache: true,
341                    })
342                } else {
343                    anyhow::bail!(
344                        "VCR replay_strict: no cassette found for POST {} (fingerprint: {}).\n\
345                        Run with ASSAY_VCR_MODE=record to record responses.\n\
346                        Cassette dir: {}",
347                        url,
348                        &fingerprint[..16.min(fingerprint.len())],
349                        self.cassette_dir.display()
350                    )
351                }
352            }
353            VcrMode::Replay => {
354                // Soft replay: try cache, passthrough on miss
355                if let Some(entry) = self.cache.get(&fingerprint) {
356                    Ok(VcrResponse {
357                        status: entry.status,
358                        body: entry.response_body.clone(),
359                        from_cache: true,
360                    })
361                } else {
362                    // Passthrough (dangerous in CI!)
363                    tracing::warn!(
364                        "VCR replay: cache miss for POST {}, passing through to network",
365                        url
366                    );
367                    self.make_request_and_record(url, body, auth_header, &fingerprint, false)
368                        .await
369                }
370            }
371            VcrMode::Auto => {
372                // Auto: replay if hit, record if miss
373                if let Some(entry) = self.cache.get(&fingerprint) {
374                    Ok(VcrResponse {
375                        status: entry.status,
376                        body: entry.response_body.clone(),
377                        from_cache: true,
378                    })
379                } else {
380                    self.make_request_and_record(url, body, auth_header, &fingerprint, true)
381                        .await
382                }
383            }
384            VcrMode::Record => {
385                // Always record (overwrite existing)
386                self.make_request_and_record(url, body, auth_header, &fingerprint, true)
387                    .await
388            }
389            VcrMode::Off => {
390                // Pass through, no recording
391                crate::providers::network::check_outbound(url)?;
392                let mut req = self.inner.post(url).json(body);
393                if let Some(auth) = auth_header {
394                    req = req.header("Authorization", auth);
395                }
396                let resp = req.send().await?;
397                let status = resp.status().as_u16();
398                let response_body: serde_json::Value = resp.json().await?;
399
400                Ok(VcrResponse {
401                    status,
402                    body: response_body,
403                    from_cache: false,
404                })
405            }
406        }
407    }
408
409    /// Make real HTTP request and optionally record to cassette
410    async fn make_request_and_record(
411        &mut self,
412        url: &str,
413        body: &serde_json::Value,
414        auth_header: Option<&str>,
415        fingerprint: &str,
416        should_record: bool,
417    ) -> anyhow::Result<VcrResponse> {
418        crate::providers::network::check_outbound(url)?;
419        let mut req = self.inner.post(url).json(body);
420        if let Some(auth) = auth_header {
421            req = req.header("Authorization", auth);
422        }
423        let resp = req.send().await?;
424
425        let status = resp.status().as_u16();
426        let response_body: serde_json::Value = resp.json().await?;
427
428        if should_record {
429            let entry = CassetteEntry {
430                schema_version: 2,
431                fingerprint: fingerprint.to_string(),
432                method: "POST".to_string(),
433                url: url.to_string(),
434                request_body: Some(body.clone()),
435                status,
436                response_body: response_body.clone(),
437                meta: CassetteMeta {
438                    recorded_at: chrono::Utc::now().to_rfc3339(),
439                    model: Self::extract_model(Some(body)),
440                    provider: Self::provider_from_url(url).to_string(),
441                    kind: Self::kind_from_url(url).to_string(),
442                },
443            };
444
445            if let Err(e) = self.save_cassette(&entry) {
446                tracing::warn!("VCR: failed to save cassette: {}", e);
447            }
448
449            // Add to cache
450            self.cache.insert(fingerprint.to_string(), entry);
451        }
452
453        Ok(VcrResponse {
454            status,
455            body: response_body,
456            from_cache: false,
457        })
458    }
459
460    /// Get the current VCR mode
461    pub fn mode(&self) -> VcrMode {
462        self.mode
463    }
464
465    /// Get cassette count (for diagnostics)
466    pub fn cassette_count(&self) -> usize {
467        self.cache.len()
468    }
469}
470
471/// Response from VCR client
472#[derive(Debug)]
473pub struct VcrResponse {
474    pub status: u16,
475    pub body: serde_json::Value,
476    /// True if response came from cache (replay)
477    pub from_cache: bool,
478}
479
480impl VcrResponse {
481    pub fn is_success(&self) -> bool {
482        (200..300).contains(&self.status)
483    }
484}
485
486#[cfg(test)]
487mod tests {
488    use super::*;
489    use tempfile::TempDir;
490
491    /// Sign-off: default_secure() must scrub auth and common secret headers so cassettes don't leak.
492    #[test]
493    fn test_default_secure_scrub_paths() {
494        let cfg = ScrubConfig::default_secure();
495        assert!(
496            cfg.request_headers
497                .iter()
498                .any(|h| h.eq_ignore_ascii_case("authorization")),
499            "Must scrub Authorization"
500        );
501        assert!(
502            cfg.request_headers
503                .iter()
504                .any(|h| h.eq_ignore_ascii_case("x-api-key")),
505            "Must scrub x-api-key"
506        );
507        assert!(
508            cfg.request_headers
509                .iter()
510                .any(|h| h.eq_ignore_ascii_case("api-key")),
511            "Must scrub api-key"
512        );
513        assert!(
514            cfg.response_headers
515                .iter()
516                .any(|h| h.eq_ignore_ascii_case("set-cookie")),
517            "Must scrub set-cookie"
518        );
519        assert!(
520            cfg.request_body_paths.is_empty(),
521            "Default: no body paths (audit: explicit if needed)"
522        );
523        assert!(
524            cfg.response_body_paths.is_empty(),
525            "Default: no response body paths"
526        );
527    }
528
529    #[test]
530    fn test_fingerprint_stability() {
531        let body = serde_json::json!({"input": "hello", "model": "text-embedding-3-small"});
532        let fp1 =
533            VcrClient::fingerprint("POST", "https://api.openai.com/v1/embeddings", Some(&body));
534        let fp2 =
535            VcrClient::fingerprint("POST", "https://api.openai.com/v1/embeddings", Some(&body));
536        assert_eq!(fp1, fp2);
537
538        // Different body = different fingerprint
539        let body2 = serde_json::json!({"input": "world", "model": "text-embedding-3-small"});
540        let fp3 =
541            VcrClient::fingerprint("POST", "https://api.openai.com/v1/embeddings", Some(&body2));
542        assert_ne!(fp1, fp3);
543    }
544
545    #[test]
546    fn test_fingerprint_key_order_invariant() {
547        // JCS ensures key order doesn't matter
548        let body1 = serde_json::json!({"model": "gpt-4", "input": "hello"});
549        let body2 = serde_json::json!({"input": "hello", "model": "gpt-4"});
550        let fp1 =
551            VcrClient::fingerprint("POST", "https://api.openai.com/v1/embeddings", Some(&body1));
552        let fp2 =
553            VcrClient::fingerprint("POST", "https://api.openai.com/v1/embeddings", Some(&body2));
554        assert_eq!(fp1, fp2, "JCS should normalize key order");
555    }
556
557    #[test]
558    fn test_vcr_mode_from_env() {
559        env::remove_var("ASSAY_VCR_MODE");
560        assert_eq!(VcrMode::from_env(), VcrMode::ReplayStrict);
561
562        env::set_var("ASSAY_VCR_MODE", "record");
563        assert_eq!(VcrMode::from_env(), VcrMode::Record);
564
565        env::set_var("ASSAY_VCR_MODE", "auto");
566        assert_eq!(VcrMode::from_env(), VcrMode::Auto);
567
568        env::set_var("ASSAY_VCR_MODE", "replay");
569        assert_eq!(VcrMode::from_env(), VcrMode::Replay);
570
571        env::set_var("ASSAY_VCR_MODE", "off");
572        assert_eq!(VcrMode::from_env(), VcrMode::Off);
573
574        env::set_var("ASSAY_VCR_MODE", "replay_strict");
575        assert_eq!(VcrMode::from_env(), VcrMode::ReplayStrict);
576
577        env::remove_var("ASSAY_VCR_MODE");
578    }
579
580    #[test]
581    fn test_cassette_save_load_atomic() {
582        let tmp = TempDir::new().unwrap();
583        let client = VcrClient::new(VcrMode::Record, tmp.path().to_path_buf());
584
585        let body = serde_json::json!({"input": "test", "model": "text-embedding-3-small"});
586        let fingerprint =
587            VcrClient::fingerprint("POST", "https://api.openai.com/v1/embeddings", Some(&body));
588
589        let entry = CassetteEntry {
590            schema_version: 2,
591            fingerprint: fingerprint.clone(),
592            method: "POST".to_string(),
593            url: "https://api.openai.com/v1/embeddings".to_string(),
594            request_body: Some(body),
595            status: 200,
596            response_body: serde_json::json!({"data": [{"embedding": [0.1, 0.2]}]}),
597            meta: CassetteMeta {
598                recorded_at: "2026-01-30T12:00:00Z".to_string(),
599                model: Some("text-embedding-3-small".to_string()),
600                provider: "openai".to_string(),
601                kind: "embeddings".to_string(),
602            },
603        };
604
605        client.save_cassette(&entry).unwrap();
606
607        // Verify file exists in correct location
608        let expected_path = tmp
609            .path()
610            .join("openai")
611            .join("embeddings")
612            .join(format!("{}.json", &fingerprint[..16]));
613        assert!(expected_path.exists(), "Cassette file should exist");
614
615        // Reload and verify
616        let mut client2 = VcrClient::new(VcrMode::ReplayStrict, tmp.path().to_path_buf());
617        client2.load_cassettes();
618
619        assert!(client2.cache.contains_key(&fingerprint));
620        assert_eq!(client2.cache.get(&fingerprint).unwrap().status, 200);
621    }
622
623    #[test]
624    fn test_provider_and_kind_detection() {
625        assert_eq!(
626            VcrClient::provider_from_url("https://api.openai.com/v1/embeddings"),
627            "openai"
628        );
629        assert_eq!(
630            VcrClient::kind_from_url("https://api.openai.com/v1/embeddings"),
631            "embeddings"
632        );
633        assert_eq!(
634            VcrClient::kind_from_url("https://api.openai.com/v1/chat/completions"),
635            "judge"
636        );
637    }
638
639    #[tokio::test]
640    async fn test_network_policy_blocks_passthrough_modes() {
641        let _serial = crate::providers::network::lock_test_serial_async().await;
642        let tmp = TempDir::new().unwrap();
643        let mut client = VcrClient::new(VcrMode::Off, tmp.path().to_path_buf());
644        let _guard = crate::providers::network::NetworkPolicyGuard::deny("unit test");
645        let body = serde_json::json!({"input": "test", "model": "gpt-4o-mini"});
646        let err = client
647            .post_json("https://api.openai.com/v1/chat/completions", &body, None)
648            .await
649            .expect_err("deny policy must block passthrough network");
650        assert!(err
651            .to_string()
652            .contains("outbound network blocked by policy"));
653    }
654}