http_client_vcr/
lib.rs

1use async_trait::async_trait;
2use http_client::{Error, HttpClient, Request, Response};
3use std::path::PathBuf;
4use std::sync::Arc;
5use tokio::sync::Mutex;
6
7mod cassette;
8mod filter;
9mod form_data;
10mod matcher;
11mod noop_client;
12mod serializable;
13mod utils;
14
15pub use cassette::{Cassette, CassetteFormat, Interaction};
16pub use filter::{
17    BodyFilter, CustomFilter, Filter, FilterChain, HeaderFilter, SmartFormFilter, UrlFilter,
18};
19pub use form_data::{
20    analyze_form_data, filter_form_data, find_credential_fields, parse_form_data, FormDataAnalysis,
21};
22pub use matcher::{DefaultMatcher, ExactMatcher, RequestMatcher};
23pub use noop_client::{NoOpClient, PanickingNoOpClient};
24pub use serializable::{SerializableRequest, SerializableResponse};
25pub use utils::CassetteAnalysis;
26
27#[derive(Debug, Clone)]
28pub enum VcrMode {
29    Record,
30    Replay,
31    Once,
32    None,
33    Filter,
34}
35
36#[derive(Debug)]
37pub struct VcrClient {
38    inner: Box<dyn HttpClient>,
39    cassette: Arc<Mutex<Cassette>>,
40    mode: VcrMode,
41    matcher: Box<dyn RequestMatcher>,
42    filter_chain: FilterChain,
43    recording_started: Arc<Mutex<bool>>,
44    // Track which interactions have been used in replay mode (by index)
45    used_interactions: Arc<Mutex<std::collections::HashSet<usize>>>,
46}
47
48/// Duplicate a request while preserving the body.
49///
50/// Since Request::clone() sets the body to empty, this function properly
51/// duplicates a request by reading the body into memory and restoring it
52/// on both the original and cloned request.
53///
54/// Returns (request_for_sending, request_for_recording)
55async fn duplicate_request_with_body(mut req: Request) -> Result<(Request, Request), Error> {
56    // Read the body into bytes
57    let body_bytes = req
58        .take_body()
59        .into_bytes()
60        .await
61        .map_err(|e| Error::from_str(500, format!("Failed to read request body: {e}")))?;
62
63    // Clone the request (this gets everything except the body)
64    let mut req_for_recording = req.clone();
65
66    // Set the body on both requests (this creates two independent Body instances)
67    req.set_body(body_bytes.clone());
68    req_for_recording.set_body(body_bytes);
69
70    Ok((req, req_for_recording))
71}
72
73impl VcrClient {
74    pub fn new(inner: Box<dyn HttpClient>, mode: VcrMode, cassette: Cassette) -> Self {
75        Self {
76            inner,
77            cassette: Arc::new(Mutex::new(cassette)),
78            mode,
79            matcher: Box::new(DefaultMatcher::new()),
80            filter_chain: FilterChain::new(),
81            recording_started: Arc::new(Mutex::new(false)),
82            used_interactions: Arc::new(Mutex::new(std::collections::HashSet::new())),
83        }
84    }
85
86    /// Synchronous version of directory save for use in Drop
87    fn save_cassette_as_directory_sync(
88        cassette: &Cassette,
89        path: &PathBuf,
90    ) -> Result<(), std::io::Error> {
91        use serde::Serialize;
92
93        // Create the cassette directory and bodies subdirectory
94        std::fs::create_dir_all(path)?;
95        let bodies_dir = path.join("bodies");
96        std::fs::create_dir_all(&bodies_dir)?;
97
98        // Create directory format structures for serialization
99        #[derive(Serialize)]
100        struct DirectoryInteraction {
101            request: DirectorySerializableRequest,
102            response: DirectorySerializableResponse,
103        }
104
105        #[derive(Serialize)]
106        struct DirectorySerializableRequest {
107            method: String,
108            url: String,
109            headers: std::collections::HashMap<String, Vec<String>>,
110            #[serde(skip_serializing_if = "Option::is_none")]
111            body_file: Option<String>,
112            version: String,
113        }
114
115        #[derive(Serialize)]
116        struct DirectorySerializableResponse {
117            status: u16,
118            headers: std::collections::HashMap<String, Vec<String>>,
119            #[serde(skip_serializing_if = "Option::is_none")]
120            body_file: Option<String>,
121            version: String,
122        }
123
124        let mut dir_interactions = Vec::new();
125
126        for (i, interaction) in cassette.interactions.iter().enumerate() {
127            let interaction_num = format!("{:03}", i + 1);
128
129            // Handle request body
130            let request_body_file = if let Some(ref body) = interaction.request.body {
131                if !body.is_empty() {
132                    let filename = format!("req_{interaction_num}.txt");
133                    let body_path = bodies_dir.join(&filename);
134                    std::fs::write(&body_path, body)?;
135                    Some(filename)
136                } else {
137                    None
138                }
139            } else if let Some(ref body_base64) = interaction.request.body_base64 {
140                if !body_base64.is_empty() {
141                    let filename = format!("req_{interaction_num}.b64");
142                    let body_path = bodies_dir.join(&filename);
143                    std::fs::write(&body_path, body_base64)?;
144                    Some(filename)
145                } else {
146                    None
147                }
148            } else {
149                None
150            };
151
152            // Handle response body
153            let response_body_file = if let Some(ref body) = interaction.response.body {
154                if !body.is_empty() {
155                    let filename = format!("resp_{interaction_num}.txt");
156                    let body_path = bodies_dir.join(&filename);
157                    std::fs::write(&body_path, body)?;
158                    Some(filename)
159                } else {
160                    None
161                }
162            } else if let Some(ref body_base64) = interaction.response.body_base64 {
163                if !body_base64.is_empty() {
164                    let filename = format!("resp_{interaction_num}.b64");
165                    let body_path = bodies_dir.join(&filename);
166                    std::fs::write(&body_path, body_base64)?;
167                    Some(filename)
168                } else {
169                    None
170                }
171            } else {
172                None
173            };
174
175            let dir_interaction = DirectoryInteraction {
176                request: DirectorySerializableRequest {
177                    method: interaction.request.method.clone(),
178                    url: interaction.request.url.clone(),
179                    headers: interaction.request.headers.clone(),
180                    body_file: request_body_file,
181                    version: interaction.request.version.clone(),
182                },
183                response: DirectorySerializableResponse {
184                    status: interaction.response.status,
185                    headers: interaction.response.headers.clone(),
186                    body_file: response_body_file,
187                    version: interaction.response.version.clone(),
188                },
189            };
190
191            dir_interactions.push(dir_interaction);
192        }
193
194        // Write the interactions.yaml file
195        let interactions_yaml = serde_yaml::to_string(&dir_interactions)
196            .map_err(|e| std::io::Error::other(format!("Failed to serialize interactions: {e}")))?;
197
198        let interactions_file = path.join("interactions.yaml");
199        std::fs::write(&interactions_file, interactions_yaml)?;
200
201        Ok(())
202    }
203
204    /// Create a pristine response from extracted data, completely independent of VCR processing
205    fn create_pristine_response(
206        status: http_types::StatusCode,
207        headers: &std::collections::HashMap<String, Vec<String>>,
208        body_content: Option<&str>,
209    ) -> Response {
210        let mut return_response = http_types::Response::new(status);
211
212        // Copy all headers from the extracted header map
213        for (name, values) in headers {
214            for value in values {
215                let _ = return_response.append_header(name.as_str(), value.as_str());
216            }
217        }
218
219        // Set the body if we have content
220        if let Some(body) = body_content {
221            return_response.set_body(body);
222        }
223
224        return_response
225    }
226
227    pub fn set_mode(&mut self, mode: VcrMode) {
228        self.mode = mode;
229    }
230
231    pub fn set_matcher(&mut self, matcher: Box<dyn RequestMatcher>) {
232        self.matcher = matcher;
233    }
234
235    pub fn set_filter_chain(&mut self, filter_chain: FilterChain) {
236        self.filter_chain = filter_chain;
237    }
238
239    pub fn add_filter(&mut self, filter: Box<dyn Filter>) {
240        self.filter_chain = std::mem::take(&mut self.filter_chain).add_filter(filter);
241    }
242
243    async fn find_match<'a>(
244        &self,
245        request: &Request,
246        cassette: &'a Cassette,
247    ) -> Option<(usize, &'a Interaction)> {
248        let used_interactions = self.used_interactions.lock().await;
249
250        // Create a filtered copy of the request for matching against stored filtered interactions
251        if let Ok(mut filtered_request) = SerializableRequest::from_request(request.clone()).await {
252            self.filter_chain.filter_request(&mut filtered_request);
253
254            cassette
255                .interactions
256                .iter()
257                .enumerate()
258                .find(|(index, interaction)| {
259                    !used_interactions.contains(index)
260                        && self
261                            .matcher
262                            .matches_serializable(&filtered_request, &interaction.request)
263                })
264        } else {
265            // Fallback to matching against stored interactions directly
266            cassette
267                .interactions
268                .iter()
269                .enumerate()
270                .find(|(index, interaction)| {
271                    !used_interactions.contains(index)
272                        && self.matcher.matches(request, &interaction.request)
273                })
274        }
275    }
276
277    /// Find similar URLs using Levenshtein distance when exact match fails
278    async fn find_similar_urls(
279        &self,
280        request: &Request,
281        cassette: &Cassette,
282    ) -> Vec<(String, usize)> {
283        let request_url = request.url().to_string();
284        let mut similarities = Vec::new();
285
286        for interaction in &cassette.interactions {
287            let recorded_url = &interaction.request.url;
288            let distance = levenshtein::levenshtein(&request_url, recorded_url);
289            similarities.push((recorded_url.clone(), distance));
290        }
291
292        // Sort by distance (smaller distance = more similar)
293        similarities.sort_by_key(|(_, distance)| *distance);
294
295        // Return only the top 5 most similar URLs
296        similarities.into_iter().take(5).collect()
297    }
298
299    /// Generate enhanced error message with URL similarity information
300    async fn generate_no_match_error(&self, request: &Request, mode_description: &str) -> Error {
301        let cassette = self.cassette.lock().await;
302        let request_url = request.url().to_string();
303        let request_method = request.method().to_string();
304
305        let error_msg = {
306            let mut msg = format!(
307                "No matching interaction found in cassette ({mode_description})\n\nRequest details:\n  Method: {request_method}\n  URL: {request_url}"
308            );
309
310            if cassette.interactions.is_empty() {
311                msg.push_str("\n\nCassette is empty - no recorded interactions available.");
312            } else {
313                msg.push_str(&format!(
314                    "\n\nCassette contains {} recorded interactions.",
315                    cassette.interactions.len()
316                ));
317
318                // Find similar URLs
319                let similar_urls = self.find_similar_urls(request, &cassette).await;
320
321                if !similar_urls.is_empty() {
322                    msg.push_str("\n\nMost similar recorded URLs (by Levenshtein distance):");
323                    for (i, (url, distance)) in similar_urls.iter().enumerate() {
324                        msg.push_str(&format!("\n  {}. {} (distance: {})", i + 1, url, distance));
325                    }
326                }
327
328                // Show unique methods in cassette
329                let mut methods: Vec<String> = cassette
330                    .interactions
331                    .iter()
332                    .map(|i| i.request.method.clone())
333                    .collect();
334                methods.sort();
335                methods.dedup();
336
337                msg.push_str(&format!("\n\nRecorded methods: {}", methods.join(", ")));
338            }
339
340            msg
341        };
342
343        // Convert to a static string by leaking memory (acceptable for error cases)
344        Error::from_str(404, Box::leak(error_msg.into_boxed_str()))
345    }
346
347    pub async fn save_cassette(&self) -> Result<(), Error> {
348        let cassette = self.cassette.lock().await;
349        cassette.save_to_file().await
350    }
351
352    /// Apply filters to all interactions in the cassette
353    /// This modifies the cassette in-place by applying the configured filter chain to all interactions
354    pub async fn apply_filters_to_cassette(&self) -> Result<(), Error> {
355        let mut cassette = self.cassette.lock().await;
356
357        // Apply filters to each interaction
358        for interaction in &mut cassette.interactions {
359            self.filter_chain.filter_request(&mut interaction.request);
360            self.filter_chain.filter_response(&mut interaction.response);
361        }
362
363        log::debug!(
364            "Applied filters to {} interactions",
365            cassette.interactions.len()
366        );
367        Ok(())
368    }
369
370    /// Apply filters to all interactions in the cassette and save the filtered version
371    pub async fn filter_and_save_cassette(&self) -> Result<(), Error> {
372        self.apply_filters_to_cassette().await?;
373        self.save_cassette().await
374    }
375
376    pub fn builder<P: Into<PathBuf>>(cassette_path: P) -> VcrClientBuilder {
377        VcrClientBuilder::new(cassette_path)
378    }
379
380    // Helper methods for each VCR mode
381
382    /// Common logic for recording a request/response and returning the pristine response
383    async fn record_and_return_response(
384        &self,
385        req_for_recording: Request,
386        response: &mut Response,
387    ) -> Result<Response, Error> {
388        // IMMEDIATELY create a pristine copy for the caller before any VCR processing
389        let status = response.status();
390        let version = format!("{:?}", response.version());
391
392        let mut headers = std::collections::HashMap::new();
393        for (name, values) in response.iter() {
394            let header_values: Vec<String> =
395                values.iter().map(|v| v.as_str().to_string()).collect();
396            headers.insert(name.as_str().to_string(), header_values);
397        }
398
399        // Read the body once - this consumes it from the original response
400        let body_string = match response.body_string().await {
401            Ok(body) if !body.is_empty() => Some(body),
402            Ok(_) => None, // Empty body
403            Err(e) => {
404                // If we can't read the body, log it but don't fail the whole request
405                eprintln!("Warning: Failed to read response body for VCR: {e}");
406                None
407            }
408        };
409
410        // Create the pristine return response immediately, before any VCR processing
411        let return_response =
412            Self::create_pristine_response(status, &headers, body_string.as_deref());
413
414        // Now do VCR processing with the data we already extracted
415        let mut serializable_request = SerializableRequest::from_request(req_for_recording).await?;
416        let mut serializable_response = crate::SerializableResponse {
417            status: status.into(),
418            headers,
419            body: body_string.clone(),
420            body_base64: None,
421            version,
422        };
423
424        // Apply filters ONLY to what gets stored
425        self.filter_chain.filter_request(&mut serializable_request);
426        self.filter_chain
427            .filter_response(&mut serializable_response);
428
429        let mut cassette = self.cassette.lock().await;
430
431        // In Record mode, clear cassette on first interaction to fully replace
432        if matches!(self.mode, VcrMode::Record) {
433            let mut recording_started = self.recording_started.lock().await;
434            if !*recording_started {
435                cassette.clear();
436                *recording_started = true;
437            }
438        }
439
440        cassette
441            .record_interaction(serializable_request, serializable_response)
442            .await?;
443
444        // Return the pristine response we created before any VCR processing
445        Ok(return_response)
446    }
447
448    async fn handle_none_mode(&self, req: Request) -> Result<Response, Error> {
449        self.inner.send(req).await
450    }
451
452    async fn handle_replay_mode(&self, req: Request) -> Result<Response, Error> {
453        let cassette = self.cassette.lock().await;
454        if let Some((index, _interaction)) = self.find_match(&req, &cassette).await {
455            // Mark this interaction as used
456            drop(cassette); // Release cassette lock before acquiring used_interactions lock
457            let mut used_interactions = self.used_interactions.lock().await;
458            used_interactions.insert(index);
459            drop(used_interactions); // Release used_interactions lock
460
461            // Re-acquire cassette lock to access the interaction
462            let cassette = self.cassette.lock().await;
463            let interaction = &cassette.interactions[index];
464            Ok(interaction.response.to_response().await)
465        } else {
466            drop(cassette); // Release the lock before calling generate_no_match_error
467            Err(self.generate_no_match_error(&req, "Replay mode").await)
468        }
469    }
470
471    async fn handle_record_mode(&self, req: Request) -> Result<Response, Error> {
472        // Duplicate the request to preserve the body for both sending and recording
473        let (req_for_sending, req_for_recording) = duplicate_request_with_body(req).await?;
474
475        // Make the real request with original sensitive data - never match existing interactions
476        let mut response = self.inner.send(req_for_sending).await?;
477        self.record_and_return_response(req_for_recording, &mut response)
478            .await
479    }
480
481    async fn handle_once_mode(&self, req: Request) -> Result<Response, Error> {
482        let cassette = self.cassette.lock().await;
483        if let Some((index, _interaction)) = self.find_match(&req, &cassette).await {
484            // Mark this interaction as used
485            drop(cassette); // Release cassette lock before acquiring used_interactions lock
486            let mut used_interactions = self.used_interactions.lock().await;
487            used_interactions.insert(index);
488            drop(used_interactions); // Release used_interactions lock
489
490            // Re-acquire cassette lock to access the interaction
491            let cassette = self.cassette.lock().await;
492            let interaction = &cassette.interactions[index];
493            return Ok(interaction.response.to_response().await);
494        }
495
496        if !cassette.is_empty() {
497            drop(cassette); // Release the lock before calling generate_no_match_error
498            return Err(self.generate_no_match_error(&req, "Once mode").await);
499        }
500        drop(cassette); // Release the lock before making the request
501
502        // Duplicate the request to preserve the body for both sending and recording
503        let (req_for_sending, req_for_recording) = duplicate_request_with_body(req).await?;
504
505        // Make the real request with original sensitive data
506        let mut response = self.inner.send(req_for_sending).await?;
507        self.record_and_return_response(req_for_recording, &mut response)
508            .await
509    }
510
511    async fn handle_filter_mode(&self, req: Request) -> Result<Response, Error> {
512        let cassette = self.cassette.lock().await;
513        if let Some((index, _interaction)) = self.find_match(&req, &cassette).await {
514            // Mark this interaction as used
515            drop(cassette); // Release cassette lock before acquiring used_interactions lock
516            let mut used_interactions = self.used_interactions.lock().await;
517            used_interactions.insert(index);
518            drop(used_interactions); // Release used_interactions lock
519
520            // Re-acquire cassette lock to access the interaction
521            let cassette = self.cassette.lock().await;
522            let interaction = &cassette.interactions[index];
523            // Return the filtered response (filters are already applied when loading)
524            Ok(interaction.response.to_response().await)
525        } else {
526            drop(cassette); // Release the lock before calling generate_no_match_error
527            Err(self
528                .generate_no_match_error(&req, "Filter mode - no new requests allowed")
529                .await)
530        }
531    }
532}
533
534// Re-export utility functions from the utils module
535pub use utils::*;
536
537#[derive(Debug)]
538pub struct VcrClientBuilder {
539    inner: Option<Box<dyn HttpClient>>,
540    mode: VcrMode,
541    cassette_path: PathBuf,
542    matcher: Option<Box<dyn RequestMatcher>>,
543    filter_chain: FilterChain,
544    format: Option<CassetteFormat>,
545}
546
547impl VcrClientBuilder {
548    pub fn new<P: Into<PathBuf>>(cassette_path: P) -> Self {
549        Self {
550            inner: None,
551            mode: VcrMode::Once,
552            cassette_path: cassette_path.into(),
553            matcher: None,
554            filter_chain: FilterChain::new(),
555            format: None,
556        }
557    }
558
559    pub fn inner_client(mut self, client: Box<dyn HttpClient>) -> Self {
560        self.inner = Some(client);
561        self
562    }
563
564    pub fn mode(mut self, mode: VcrMode) -> Self {
565        self.mode = mode;
566        self
567    }
568
569    pub fn matcher(mut self, matcher: Box<dyn RequestMatcher>) -> Self {
570        self.matcher = Some(matcher);
571        self
572    }
573
574    pub fn filter_chain(mut self, filter_chain: FilterChain) -> Self {
575        self.filter_chain = filter_chain;
576        self
577    }
578
579    pub fn add_filter(mut self, filter: Box<dyn Filter>) -> Self {
580        self.filter_chain = self.filter_chain.add_filter(filter);
581        self
582    }
583
584    pub fn format(mut self, format: CassetteFormat) -> Self {
585        self.format = Some(format);
586        self
587    }
588
589    pub async fn build(self) -> Result<VcrClient, Error> {
590        let inner = self
591            .inner
592            .ok_or_else(|| Error::from_str(400, "Inner HttpClient is required"))?;
593
594        let cassette = if self.cassette_path.exists() {
595            Cassette::load_from_file(self.cassette_path.clone()).await?
596        } else {
597            let mut cassette = Cassette::new().with_path(self.cassette_path);
598            if let Some(format) = self.format {
599                cassette = cassette.with_format(format);
600            }
601            cassette
602        };
603
604        let mut vcr_client = VcrClient::new(inner, self.mode, cassette);
605
606        if let Some(matcher) = self.matcher {
607            vcr_client.set_matcher(matcher);
608        }
609
610        vcr_client.set_filter_chain(self.filter_chain);
611
612        Ok(vcr_client)
613    }
614}
615
616impl Drop for VcrClient {
617    fn drop(&mut self) {
618        if let Ok(cassette) = self.cassette.try_lock() {
619            // Only save if:
620            // 1. We're in a mode that should persist changes (Record or Once)
621            // 2. The cassette was actually modified since loading
622            let should_save = matches!(self.mode, VcrMode::Record | VcrMode::Once)
623                && cassette.modified_since_load;
624
625            if should_save {
626                log::debug!(
627                    "VcrClient dropped - saving modified cassette with {} interactions",
628                    cassette.interactions.len()
629                );
630                // Save respecting the format setting
631                if let Some(path) = &cassette.path {
632                    let result = match cassette.format {
633                        CassetteFormat::File => {
634                            // Save as single YAML file
635                            if let Ok(yaml) = serde_yaml::to_string(&*cassette) {
636                                std::fs::write(path, yaml)
637                            } else {
638                                Err(std::io::Error::other("Failed to serialize cassette"))
639                            }
640                        }
641                        CassetteFormat::Directory => {
642                            // Save as directory format (synchronous version)
643                            Self::save_cassette_as_directory_sync(&cassette, path)
644                        }
645                    };
646
647                    if let Err(e) = result {
648                        eprintln!("Failed to save cassette on drop: {e}");
649                    } else {
650                        log::debug!("Successfully saved cassette to {path:?}");
651                    }
652                }
653            } else if cassette.modified_since_load {
654                log::debug!(
655                    "VcrClient dropped - not saving cassette (mode: {:?} doesn't persist changes)",
656                    self.mode
657                );
658            }
659        }
660    }
661}
662
663#[async_trait]
664impl HttpClient for VcrClient {
665    async fn send(&self, req: Request) -> Result<Response, Error> {
666        match &self.mode {
667            VcrMode::None => self.handle_none_mode(req).await,
668            VcrMode::Replay => self.handle_replay_mode(req).await,
669            VcrMode::Record => self.handle_record_mode(req).await,
670            VcrMode::Once => self.handle_once_mode(req).await,
671            VcrMode::Filter => self.handle_filter_mode(req).await,
672        }
673    }
674
675    fn set_config(&mut self, config: http_client::Config) -> Result<(), Error> {
676        self.inner
677            .set_config(config)
678            .map_err(|e| Error::from_str(500, format!("Config error: {e}")))
679    }
680
681    fn config(&self) -> &http_client::Config {
682        self.inner.config()
683    }
684}