Skip to main content

mockforge_recorder/
scrubbing.rs

1//! Capture Scrubbing & Deterministic Replay
2//!
3//! Provides functionality to scrub sensitive data from recorded requests/responses
4//! and normalize non-deterministic values for reproducible diffs and replays.
5//!
6//! # Features
7//!
8//! - **Regex-based scrubbing**: Remove or replace sensitive data patterns
9//! - **Deterministic normalization**: Replace timestamps, UUIDs, and random IDs
10//! - **Field-level scrubbing**: Target specific JSON fields or headers
11//! - **Capture filtering**: Selectively record based on status codes or patterns
12//!
13//! # Environment Variables
14//!
15//! - `MOCKFORGE_CAPTURE_SCRUB`: JSON configuration for scrubbing rules
16//! - `MOCKFORGE_CAPTURE_FILTER`: JSON configuration for capture filtering
17//! - `MOCKFORGE_CAPTURE_DETERMINISTIC`: Enable deterministic mode (default: false)
18//!
19//! # Example
20//!
21//! ```bash
22//! export MOCKFORGE_CAPTURE_SCRUB='[
23//!   {"field": "email", "replacement": "user@example.com"},
24//!   {"pattern": "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", "replacement": "00000000-0000-0000-0000-000000000000"}
25//! ]'
26//!
27//! export MOCKFORGE_CAPTURE_FILTER='{"status_codes": [500, 502, 503, 504]}'
28//! export MOCKFORGE_CAPTURE_DETERMINISTIC=true
29//! ```
30
31use crate::{RecordedRequest, RecordedResponse, RecorderError, Result};
32use chrono::{DateTime, Utc};
33use once_cell::sync::Lazy;
34use regex::Regex;
35use serde::{Deserialize, Serialize};
36use std::collections::HashMap;
37use std::sync::Arc;
38use tracing::{debug, warn};
39
40/// Global scrubber instance loaded from environment
41static GLOBAL_SCRUBBER: Lazy<Arc<Scrubber>> = Lazy::new(|| {
42    Arc::new(Scrubber::from_env().unwrap_or_else(|e| {
43        warn!("Failed to load scrubber from environment: {}", e);
44        Scrubber::default()
45    }))
46});
47
48/// Global filter instance loaded from environment
49static GLOBAL_FILTER: Lazy<Arc<CaptureFilter>> = Lazy::new(|| {
50    Arc::new(CaptureFilter::from_env().unwrap_or_else(|e| {
51        warn!("Failed to load capture filter from environment: {}", e);
52        CaptureFilter::default()
53    }))
54});
55
56/// Regex pattern for matching UUIDs
57static UUID_REGEX: Lazy<Regex> = Lazy::new(|| {
58    Regex::new(r"(?i)[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}").unwrap()
59});
60
61/// Regex pattern for matching email addresses
62static EMAIL_REGEX: Lazy<Regex> =
63    Lazy::new(|| Regex::new(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b").unwrap());
64
65/// Regex pattern for matching IPv4 addresses
66static IPV4_REGEX: Lazy<Regex> =
67    Lazy::new(|| Regex::new(r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b").unwrap());
68
69/// Regex pattern for matching credit card numbers
70static CREDIT_CARD_REGEX: Lazy<Regex> =
71    Lazy::new(|| Regex::new(r"\b(?:\d{4}[-\s]?){3}\d{4}\b").unwrap());
72
73/// Configuration for scrubbing sensitive data
74#[derive(Debug, Clone, Serialize, Deserialize, Default)]
75pub struct ScrubConfig {
76    /// Rules for scrubbing data
77    #[serde(default)]
78    pub rules: Vec<ScrubRule>,
79
80    /// Enable deterministic mode (normalize timestamps, IDs, etc.)
81    #[serde(default)]
82    pub deterministic: bool,
83
84    /// Counter seed for deterministic IDs (used in deterministic mode)
85    #[serde(default)]
86    pub counter_seed: u64,
87}
88
89/// A single scrubbing rule
90#[derive(Debug, Clone, Serialize, Deserialize)]
91#[serde(tag = "type", rename_all = "lowercase")]
92pub enum ScrubRule {
93    /// Scrub by regex pattern
94    Regex {
95        /// Regex pattern to match
96        pattern: String,
97        /// Replacement string (can include capture groups like $1, $2)
98        replacement: String,
99        /// Target location (headers, body, all)
100        #[serde(default = "default_target")]
101        target: ScrubTarget,
102    },
103
104    /// Scrub specific JSON field
105    Field {
106        /// JSON field path (e.g., "user.email", "data.id")
107        field: String,
108        /// Replacement value
109        replacement: String,
110        /// Target location
111        #[serde(default = "default_target")]
112        target: ScrubTarget,
113    },
114
115    /// Scrub specific header
116    Header {
117        /// Header name (case-insensitive)
118        name: String,
119        /// Replacement value
120        replacement: String,
121    },
122
123    /// Scrub all UUIDs (replace with deterministic counter)
124    Uuid {
125        /// Replacement pattern (use {{counter}} for deterministic counter)
126        #[serde(default = "default_uuid_replacement")]
127        replacement: String,
128    },
129
130    /// Scrub email addresses
131    Email {
132        /// Replacement value
133        #[serde(default = "default_email_replacement")]
134        replacement: String,
135    },
136
137    /// Scrub IP addresses
138    IpAddress {
139        /// Replacement value
140        #[serde(default = "default_ip_replacement")]
141        replacement: String,
142    },
143
144    /// Scrub credit card numbers
145    CreditCard {
146        /// Replacement value
147        #[serde(default = "default_creditcard_replacement")]
148        replacement: String,
149    },
150}
151
152/// Target location for scrubbing
153#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
154#[serde(rename_all = "lowercase")]
155pub enum ScrubTarget {
156    /// Scrub in headers only
157    Headers,
158    /// Scrub in body only
159    Body,
160    /// Scrub in both headers and body
161    All,
162}
163
164fn default_target() -> ScrubTarget {
165    ScrubTarget::All
166}
167
168fn default_uuid_replacement() -> String {
169    "00000000-0000-0000-0000-{{counter:012}}".to_string()
170}
171
172fn default_email_replacement() -> String {
173    "user@example.com".to_string()
174}
175
176fn default_ip_replacement() -> String {
177    "127.0.0.1".to_string()
178}
179
180fn default_creditcard_replacement() -> String {
181    "XXXX-XXXX-XXXX-XXXX".to_string()
182}
183
184/// Configuration for filtering which requests to capture
185#[derive(Debug, Clone, Serialize, Deserialize)]
186pub struct CaptureFilterConfig {
187    /// Only capture requests with these status codes
188    #[serde(default)]
189    pub status_codes: Vec<u16>,
190
191    /// Only capture requests matching these patterns (regex)
192    #[serde(default)]
193    pub path_patterns: Vec<String>,
194
195    /// Only capture requests with these methods
196    #[serde(default)]
197    pub methods: Vec<String>,
198
199    /// Exclude requests matching these patterns
200    #[serde(default)]
201    pub exclude_paths: Vec<String>,
202
203    /// Only capture errors (status >= 400)
204    #[serde(default)]
205    pub errors_only: bool,
206
207    /// Capture sample rate (0.0 - 1.0, e.g., 0.1 = 10%)
208    #[serde(default = "default_sample_rate")]
209    pub sample_rate: f64,
210}
211
212fn default_sample_rate() -> f64 {
213    1.0
214}
215
216impl Default for CaptureFilterConfig {
217    fn default() -> Self {
218        Self {
219            status_codes: Vec::new(),
220            path_patterns: Vec::new(),
221            methods: Vec::new(),
222            exclude_paths: Vec::new(),
223            errors_only: false,
224            sample_rate: default_sample_rate(),
225        }
226    }
227}
228
229/// Handles scrubbing of sensitive data from recorded requests/responses
230pub struct Scrubber {
231    config: ScrubConfig,
232    compiled_regexes: Vec<(Regex, String, ScrubTarget)>,
233    deterministic_counter: std::sync::atomic::AtomicU64,
234}
235
236impl Default for Scrubber {
237    fn default() -> Self {
238        Self {
239            config: ScrubConfig::default(),
240            compiled_regexes: Vec::new(),
241            deterministic_counter: std::sync::atomic::AtomicU64::new(0),
242        }
243    }
244}
245
246impl Scrubber {
247    /// Create a new scrubber with the given configuration
248    pub fn new(config: ScrubConfig) -> Result<Self> {
249        let mut compiled_regexes = Vec::new();
250
251        // Compile regex patterns
252        for rule in &config.rules {
253            if let ScrubRule::Regex {
254                pattern,
255                replacement,
256                target,
257            } = rule
258            {
259                let regex = Regex::new(pattern).map_err(|e| {
260                    RecorderError::InvalidFilter(format!(
261                        "Invalid regex pattern '{}': {}",
262                        pattern, e
263                    ))
264                })?;
265                compiled_regexes.push((regex, replacement.clone(), *target));
266            }
267        }
268
269        Ok(Self {
270            deterministic_counter: std::sync::atomic::AtomicU64::new(config.counter_seed),
271            config,
272            compiled_regexes,
273        })
274    }
275
276    /// Load scrubber from MOCKFORGE_CAPTURE_SCRUB environment variable
277    pub fn from_env() -> Result<Self> {
278        let scrub_json = std::env::var("MOCKFORGE_CAPTURE_SCRUB").ok();
279        let deterministic = std::env::var("MOCKFORGE_CAPTURE_DETERMINISTIC")
280            .ok()
281            .and_then(|v| v.parse::<bool>().ok())
282            .unwrap_or(false);
283
284        let mut config = if let Some(json) = scrub_json {
285            serde_json::from_str::<ScrubConfig>(&json).map_err(|e| {
286                RecorderError::InvalidFilter(format!("Invalid MOCKFORGE_CAPTURE_SCRUB JSON: {}", e))
287            })?
288        } else {
289            ScrubConfig::default()
290        };
291
292        config.deterministic = deterministic;
293
294        Self::new(config)
295    }
296
297    /// Get the global scrubber instance
298    pub fn global() -> Arc<Self> {
299        Arc::clone(&GLOBAL_SCRUBBER)
300    }
301
302    /// Scrub a recorded request
303    pub fn scrub_request(&self, request: &mut RecordedRequest) {
304        // Scrub headers
305        if let Ok(mut headers) = serde_json::from_str::<HashMap<String, String>>(&request.headers) {
306            self.scrub_headers(&mut headers);
307            if let Ok(json) = serde_json::to_string(&headers) {
308                request.headers = json;
309            }
310        }
311
312        // Scrub body
313        if let Some(ref mut body) = request.body {
314            if request.body_encoding == "utf8" {
315                *body = self.scrub_string(body, ScrubTarget::Body);
316            }
317        }
318
319        // Scrub query params
320        if let Some(ref mut query) = request.query_params {
321            *query = self.scrub_string(query, ScrubTarget::Body);
322        }
323
324        // Normalize timestamp in deterministic mode
325        if self.config.deterministic {
326            request.timestamp = Self::normalize_timestamp(request.timestamp);
327        }
328
329        // Scrub sensitive fields
330        if let Some(ref mut trace_id) = request.trace_id {
331            *trace_id = self.scrub_string(trace_id, ScrubTarget::All);
332        }
333        if let Some(ref mut span_id) = request.span_id {
334            *span_id = self.scrub_string(span_id, ScrubTarget::All);
335        }
336        if let Some(ref mut client_ip) = request.client_ip {
337            *client_ip = self.scrub_string(client_ip, ScrubTarget::All);
338        }
339    }
340
341    /// Scrub a recorded response
342    pub fn scrub_response(&self, response: &mut RecordedResponse) {
343        // Scrub headers
344        if let Ok(mut headers) = serde_json::from_str::<HashMap<String, String>>(&response.headers)
345        {
346            self.scrub_headers(&mut headers);
347            if let Ok(json) = serde_json::to_string(&headers) {
348                response.headers = json;
349            }
350        }
351
352        // Scrub body
353        if let Some(ref mut body) = response.body {
354            if response.body_encoding == "utf8" {
355                *body = self.scrub_string(body, ScrubTarget::Body);
356            }
357        }
358
359        // Normalize timestamp in deterministic mode
360        if self.config.deterministic {
361            response.timestamp = Self::normalize_timestamp(response.timestamp);
362        }
363    }
364
365    /// Scrub headers map
366    fn scrub_headers(&self, headers: &mut HashMap<String, String>) {
367        for rule in &self.config.rules {
368            if let ScrubRule::Header { name, replacement } = rule {
369                // Case-insensitive header matching
370                let key = headers.keys().find(|k| k.eq_ignore_ascii_case(name)).cloned();
371                if let Some(key) = key {
372                    headers.insert(key, replacement.clone());
373                }
374            }
375        }
376
377        // Scrub header values with regex rules
378        for (key, value) in headers.iter_mut() {
379            *value = self.scrub_string(value, ScrubTarget::Headers);
380
381            // Also scrub by field name
382            for rule in &self.config.rules {
383                if let ScrubRule::Field {
384                    field,
385                    replacement,
386                    target,
387                } = rule
388                {
389                    if (*target == ScrubTarget::Headers || *target == ScrubTarget::All)
390                        && key.eq_ignore_ascii_case(field)
391                    {
392                        *value = replacement.clone();
393                    }
394                }
395            }
396        }
397    }
398
399    /// Scrub a string value
400    fn scrub_string(&self, input: &str, location: ScrubTarget) -> String {
401        let mut result = input.to_string();
402
403        // Apply built-in rules
404        for rule in &self.config.rules {
405            match rule {
406                ScrubRule::Uuid { replacement } => {
407                    if location == ScrubTarget::All || location == ScrubTarget::Body {
408                        result = self.scrub_uuids(&result, replacement);
409                    }
410                }
411                ScrubRule::Email { replacement } => {
412                    if location == ScrubTarget::All || location == ScrubTarget::Body {
413                        result = self.scrub_emails(&result, replacement);
414                    }
415                }
416                ScrubRule::IpAddress { replacement } => {
417                    if location == ScrubTarget::All || location == ScrubTarget::Body {
418                        result = self.scrub_ips(&result, replacement);
419                    }
420                }
421                ScrubRule::CreditCard { replacement } => {
422                    if location == ScrubTarget::All || location == ScrubTarget::Body {
423                        result = self.scrub_credit_cards(&result, replacement);
424                    }
425                }
426                ScrubRule::Field {
427                    field,
428                    replacement,
429                    target,
430                } => {
431                    if *target == location || *target == ScrubTarget::All {
432                        result = self.scrub_json_field(&result, field, replacement);
433                    }
434                }
435                _ => {}
436            }
437        }
438
439        // Apply regex rules
440        for (regex, replacement, target) in &self.compiled_regexes {
441            if *target == location || *target == ScrubTarget::All {
442                result = regex.replace_all(&result, replacement.as_str()).to_string();
443            }
444        }
445
446        result
447    }
448
449    /// Scrub UUIDs with deterministic counter
450    fn scrub_uuids(&self, input: &str, replacement: &str) -> String {
451        UUID_REGEX
452            .replace_all(input, |_: &regex::Captures| {
453                let counter =
454                    self.deterministic_counter.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
455                replacement
456                    .replace("{{counter}}", &counter.to_string())
457                    .replace("{{counter:012}}", &format!("{:012}", counter))
458            })
459            .to_string()
460    }
461
462    /// Scrub email addresses
463    fn scrub_emails(&self, input: &str, replacement: &str) -> String {
464        EMAIL_REGEX.replace_all(input, replacement).to_string()
465    }
466
467    /// Scrub IP addresses
468    fn scrub_ips(&self, input: &str, replacement: &str) -> String {
469        IPV4_REGEX.replace_all(input, replacement).to_string()
470    }
471
472    /// Scrub credit card numbers
473    fn scrub_credit_cards(&self, input: &str, replacement: &str) -> String {
474        CREDIT_CARD_REGEX.replace_all(input, replacement).to_string()
475    }
476
477    /// Scrub specific JSON field
478    fn scrub_json_field(&self, input: &str, field_path: &str, replacement: &str) -> String {
479        // Try to parse as JSON
480        if let Ok(mut json) = serde_json::from_str::<serde_json::Value>(input) {
481            if self.scrub_json_value(&mut json, field_path, replacement) {
482                if let Ok(result) = serde_json::to_string(&json) {
483                    return result;
484                }
485            }
486        }
487        input.to_string()
488    }
489
490    /// Recursively scrub JSON value
491    #[allow(clippy::only_used_in_recursion)]
492    fn scrub_json_value(
493        &self,
494        value: &mut serde_json::Value,
495        field_path: &str,
496        replacement: &str,
497    ) -> bool {
498        let parts: Vec<&str> = field_path.split('.').collect();
499        if parts.is_empty() {
500            return false;
501        }
502
503        if parts.len() == 1 {
504            // Base case: scrub this field
505            if let Some(obj) = value.as_object_mut() {
506                if obj.contains_key(parts[0]) {
507                    obj.insert(
508                        parts[0].to_string(),
509                        serde_json::Value::String(replacement.to_string()),
510                    );
511                    return true;
512                }
513            }
514        } else {
515            // Recursive case: navigate deeper
516            if let Some(obj) = value.as_object_mut() {
517                if let Some(child) = obj.get_mut(parts[0]) {
518                    let remaining = parts[1..].join(".");
519                    return self.scrub_json_value(child, &remaining, replacement);
520                }
521            }
522        }
523
524        false
525    }
526
527    /// Normalize timestamp to a deterministic value
528    fn normalize_timestamp(timestamp: DateTime<Utc>) -> DateTime<Utc> {
529        // Normalize to start of day
530        timestamp
531            .date_naive()
532            .and_hms_opt(0, 0, 0)
533            .expect("0 is valid for hours/minutes/seconds")
534            .and_utc()
535    }
536}
537
538/// Handles filtering of which requests to capture
539#[derive(Default)]
540pub struct CaptureFilter {
541    config: CaptureFilterConfig,
542    path_patterns: Vec<Regex>,
543    exclude_patterns: Vec<Regex>,
544}
545
546impl CaptureFilter {
547    /// Create a new capture filter with the given configuration
548    pub fn new(config: CaptureFilterConfig) -> Result<Self> {
549        let mut path_patterns = Vec::new();
550        for pattern in &config.path_patterns {
551            let regex = Regex::new(pattern).map_err(|e| {
552                RecorderError::InvalidFilter(format!("Invalid path pattern '{}': {}", pattern, e))
553            })?;
554            path_patterns.push(regex);
555        }
556
557        let mut exclude_patterns = Vec::new();
558        for pattern in &config.exclude_paths {
559            let regex = Regex::new(pattern).map_err(|e| {
560                RecorderError::InvalidFilter(format!(
561                    "Invalid exclude pattern '{}': {}",
562                    pattern, e
563                ))
564            })?;
565            exclude_patterns.push(regex);
566        }
567
568        Ok(Self {
569            config,
570            path_patterns,
571            exclude_patterns,
572        })
573    }
574
575    /// Load filter from MOCKFORGE_CAPTURE_FILTER environment variable
576    pub fn from_env() -> Result<Self> {
577        let filter_json = std::env::var("MOCKFORGE_CAPTURE_FILTER").ok();
578
579        let config = if let Some(json) = filter_json {
580            serde_json::from_str::<CaptureFilterConfig>(&json).map_err(|e| {
581                RecorderError::InvalidFilter(format!(
582                    "Invalid MOCKFORGE_CAPTURE_FILTER JSON: {}",
583                    e
584                ))
585            })?
586        } else {
587            CaptureFilterConfig::default()
588        };
589
590        Self::new(config)
591    }
592
593    /// Get the global filter instance
594    pub fn global() -> Arc<Self> {
595        Arc::clone(&GLOBAL_FILTER)
596    }
597
598    /// Check if a request should be captured
599    pub fn should_capture(&self, method: &str, path: &str, status_code: Option<u16>) -> bool {
600        debug!(
601            "should_capture called: method={}, path={}, status_code={:?}",
602            method, path, status_code
603        );
604        debug!("  errors_only={}, status_codes={:?}, path_patterns count={}, exclude_patterns count={}",
605               self.config.errors_only, self.config.status_codes, self.path_patterns.len(), self.exclude_patterns.len());
606
607        // Check sample rate
608        if self.config.sample_rate < 1.0 {
609            use std::collections::hash_map::DefaultHasher;
610            use std::hash::{Hash, Hasher};
611
612            let mut hasher = DefaultHasher::new();
613            path.hash(&mut hasher);
614            let hash = hasher.finish();
615            let sample = (hash % 1000) as f64 / 1000.0;
616
617            if sample > self.config.sample_rate {
618                debug!(
619                    "Skipping capture due to sample rate: {} > {}",
620                    sample, self.config.sample_rate
621                );
622                return false;
623            }
624        }
625
626        // Check errors_only
627        if self.config.errors_only {
628            if let Some(code) = status_code {
629                if code < 400 {
630                    debug!("Skipping capture: not an error (status {})", code);
631                    return false;
632                }
633            } else {
634                // No status code yet, we can't determine if it's an error
635                // In this case, we should allow it and filter later when we have the status
636                debug!("errors_only is set but no status code provided, allowing for now");
637            }
638        }
639
640        // Check status code filter
641        if !self.config.status_codes.is_empty() {
642            if let Some(code) = status_code {
643                if !self.config.status_codes.contains(&code) {
644                    debug!("Skipping capture: status code {} not in filter", code);
645                    return false;
646                }
647            } else {
648                // No status code yet, allow it and filter later
649                debug!("status_codes filter set but no status code provided, allowing for now");
650            }
651        }
652
653        // Check method filter
654        if !self.config.methods.is_empty()
655            && !self.config.methods.iter().any(|m| m.eq_ignore_ascii_case(method))
656        {
657            debug!("Skipping capture: method {} not in filter", method);
658            return false;
659        }
660
661        // Check exclude patterns
662        for pattern in &self.exclude_patterns {
663            if pattern.is_match(path) {
664                debug!("Skipping capture: path {} matches exclude pattern", path);
665                return false;
666            }
667        }
668
669        // Check path patterns (if specified, path must match)
670        if !self.path_patterns.is_empty() {
671            let matches = self.path_patterns.iter().any(|p| p.is_match(path));
672            if !matches {
673                debug!("Skipping capture: path {} does not match any pattern", path);
674                return false;
675            }
676        }
677
678        true
679    }
680}
681
682#[cfg(test)]
683mod tests {
684    use super::*;
685
686    // ==================== ScrubConfig Tests ====================
687
688    #[test]
689    fn test_scrub_config_default() {
690        let config = ScrubConfig::default();
691        assert!(config.rules.is_empty());
692        assert!(!config.deterministic);
693        assert_eq!(config.counter_seed, 0);
694    }
695
696    #[test]
697    fn test_scrub_config_serialize() {
698        let config = ScrubConfig {
699            rules: vec![ScrubRule::Email {
700                replacement: "user@example.com".to_string(),
701            }],
702            deterministic: true,
703            counter_seed: 100,
704        };
705        let json = serde_json::to_string(&config).unwrap();
706        assert!(json.contains("email"));
707        assert!(json.contains("deterministic"));
708    }
709
710    // ==================== ScrubTarget Tests ====================
711
712    #[test]
713    fn test_scrub_target_equality() {
714        assert_eq!(ScrubTarget::All, ScrubTarget::All);
715        assert_ne!(ScrubTarget::Headers, ScrubTarget::Body);
716    }
717
718    #[test]
719    fn test_scrub_target_default() {
720        assert_eq!(default_target(), ScrubTarget::All);
721    }
722
723    // ==================== ScrubRule Tests ====================
724
725    #[test]
726    fn test_scrub_rule_regex_serialize() {
727        let rule = ScrubRule::Regex {
728            pattern: r"\d+".to_string(),
729            replacement: "XXX".to_string(),
730            target: ScrubTarget::Body,
731        };
732        let json = serde_json::to_string(&rule).unwrap();
733        assert!(json.contains("regex"));
734        assert!(json.contains("\\\\d+"));
735    }
736
737    #[test]
738    fn test_scrub_rule_header_serialize() {
739        let rule = ScrubRule::Header {
740            name: "Authorization".to_string(),
741            replacement: "Bearer ***".to_string(),
742        };
743        let json = serde_json::to_string(&rule).unwrap();
744        assert!(json.contains("header"));
745        assert!(json.contains("Authorization"));
746    }
747
748    // ==================== Scrubber Tests ====================
749
750    #[test]
751    fn test_scrubber_default() {
752        let scrubber = Scrubber::default();
753        assert!(scrubber.config.rules.is_empty());
754        assert!(scrubber.compiled_regexes.is_empty());
755    }
756
757    #[test]
758    fn test_scrub_email() {
759        let config = ScrubConfig {
760            rules: vec![ScrubRule::Email {
761                replacement: "user@example.com".to_string(),
762            }],
763            deterministic: false,
764            counter_seed: 0,
765        };
766
767        let scrubber = Scrubber::new(config).unwrap();
768        let input = r#"{"email": "john.doe@company.com", "name": "John"}"#;
769        let result = scrubber.scrub_string(input, ScrubTarget::All);
770
771        assert!(result.contains("user@example.com"));
772        assert!(!result.contains("john.doe@company.com"));
773    }
774
775    #[test]
776    fn test_scrub_multiple_emails() {
777        let config = ScrubConfig {
778            rules: vec![ScrubRule::Email {
779                replacement: "redacted@example.com".to_string(),
780            }],
781            deterministic: false,
782            counter_seed: 0,
783        };
784
785        let scrubber = Scrubber::new(config).unwrap();
786        let input = "Contact: john@test.com and jane@test.org";
787        let result = scrubber.scrub_string(input, ScrubTarget::All);
788
789        assert_eq!(result.matches("redacted@example.com").count(), 2);
790    }
791
792    #[test]
793    fn test_scrub_uuid() {
794        let config = ScrubConfig {
795            rules: vec![ScrubRule::Uuid {
796                replacement: "00000000-0000-0000-0000-{{counter:012}}".to_string(),
797            }],
798            deterministic: false,
799            counter_seed: 0,
800        };
801
802        let scrubber = Scrubber::new(config).unwrap();
803        let input = "Request ID: 123e4567-e89b-12d3-a456-426614174000";
804        let result = scrubber.scrub_string(input, ScrubTarget::All);
805
806        assert!(result.contains("00000000-0000-0000-0000-000000000000"));
807        assert!(!result.contains("123e4567-e89b-12d3-a456-426614174000"));
808    }
809
810    #[test]
811    fn test_scrub_uuid_counter_increments() {
812        let config = ScrubConfig {
813            rules: vec![ScrubRule::Uuid {
814                replacement: "00000000-0000-0000-0000-{{counter:012}}".to_string(),
815            }],
816            deterministic: false,
817            counter_seed: 0,
818        };
819
820        let scrubber = Scrubber::new(config).unwrap();
821        let input1 = "ID: 123e4567-e89b-12d3-a456-426614174000";
822        let input2 = "ID: abc12345-e89b-12d3-a456-426614174000";
823
824        let result1 = scrubber.scrub_string(input1, ScrubTarget::All);
825        let result2 = scrubber.scrub_string(input2, ScrubTarget::All);
826
827        assert!(result1.contains("000000000000"));
828        assert!(result2.contains("000000000001"));
829    }
830
831    #[test]
832    fn test_scrub_ip_address() {
833        let config = ScrubConfig {
834            rules: vec![ScrubRule::IpAddress {
835                replacement: "127.0.0.1".to_string(),
836            }],
837            deterministic: false,
838            counter_seed: 0,
839        };
840
841        let scrubber = Scrubber::new(config).unwrap();
842        let input = "Client IP: 192.168.1.100";
843        let result = scrubber.scrub_string(input, ScrubTarget::All);
844
845        assert!(result.contains("127.0.0.1"));
846        assert!(!result.contains("192.168.1.100"));
847    }
848
849    #[test]
850    fn test_scrub_credit_card() {
851        let config = ScrubConfig {
852            rules: vec![ScrubRule::CreditCard {
853                replacement: "XXXX-XXXX-XXXX-XXXX".to_string(),
854            }],
855            deterministic: false,
856            counter_seed: 0,
857        };
858
859        let scrubber = Scrubber::new(config).unwrap();
860        let input = "Card: 1234-5678-9012-3456";
861        let result = scrubber.scrub_string(input, ScrubTarget::All);
862
863        assert!(result.contains("XXXX-XXXX-XXXX-XXXX"));
864        assert!(!result.contains("1234-5678-9012-3456"));
865    }
866
867    #[test]
868    fn test_scrub_json_field() {
869        let config = ScrubConfig {
870            rules: vec![ScrubRule::Field {
871                field: "user.email".to_string(),
872                replacement: "redacted@example.com".to_string(),
873                target: ScrubTarget::All,
874            }],
875            deterministic: false,
876            counter_seed: 0,
877        };
878
879        let scrubber = Scrubber::new(config).unwrap();
880        let input = r#"{"user": {"email": "secret@company.com", "name": "John"}}"#;
881        let result = scrubber.scrub_string(input, ScrubTarget::Body);
882
883        assert!(result.contains("redacted@example.com"));
884        assert!(!result.contains("secret@company.com"));
885    }
886
887    #[test]
888    fn test_scrub_json_field_top_level() {
889        let config = ScrubConfig {
890            rules: vec![ScrubRule::Field {
891                field: "email".to_string(),
892                replacement: "redacted".to_string(),
893                target: ScrubTarget::Body,
894            }],
895            deterministic: false,
896            counter_seed: 0,
897        };
898
899        let scrubber = Scrubber::new(config).unwrap();
900        let input = r#"{"email": "secret@test.com"}"#;
901        let result = scrubber.scrub_string(input, ScrubTarget::Body);
902
903        assert!(result.contains("redacted"));
904    }
905
906    #[test]
907    fn test_scrub_regex_pattern() {
908        let config = ScrubConfig {
909            rules: vec![ScrubRule::Regex {
910                pattern: r"secret-\w+".to_string(),
911                replacement: "secret-REDACTED".to_string(),
912                target: ScrubTarget::All,
913            }],
914            deterministic: false,
915            counter_seed: 0,
916        };
917
918        let scrubber = Scrubber::new(config).unwrap();
919        let input = "Token: secret-abc123";
920        let result = scrubber.scrub_string(input, ScrubTarget::All);
921
922        assert!(result.contains("secret-REDACTED"));
923        assert!(!result.contains("secret-abc123"));
924    }
925
926    #[test]
927    fn test_scrub_regex_invalid_pattern() {
928        let config = ScrubConfig {
929            rules: vec![ScrubRule::Regex {
930                pattern: r"[invalid".to_string(),
931                replacement: "x".to_string(),
932                target: ScrubTarget::All,
933            }],
934            deterministic: false,
935            counter_seed: 0,
936        };
937
938        let result = Scrubber::new(config);
939        assert!(result.is_err());
940    }
941
942    #[test]
943    fn test_scrub_target_body_only() {
944        let config = ScrubConfig {
945            rules: vec![ScrubRule::Regex {
946                pattern: r"test".to_string(),
947                replacement: "REDACTED".to_string(),
948                target: ScrubTarget::Body,
949            }],
950            deterministic: false,
951            counter_seed: 0,
952        };
953
954        let scrubber = Scrubber::new(config).unwrap();
955        let result_body = scrubber.scrub_string("test data", ScrubTarget::Body);
956        let result_headers = scrubber.scrub_string("test data", ScrubTarget::Headers);
957
958        assert_eq!(result_body, "REDACTED data");
959        assert_eq!(result_headers, "test data"); // Should not be scrubbed
960    }
961
962    #[test]
963    fn test_scrub_multiple_rules() {
964        let config = ScrubConfig {
965            rules: vec![
966                ScrubRule::Email {
967                    replacement: "user@example.com".to_string(),
968                },
969                ScrubRule::IpAddress {
970                    replacement: "0.0.0.0".to_string(),
971                },
972            ],
973            deterministic: false,
974            counter_seed: 0,
975        };
976
977        let scrubber = Scrubber::new(config).unwrap();
978        let input = "Email: john@test.com, IP: 192.168.1.1";
979        let result = scrubber.scrub_string(input, ScrubTarget::All);
980
981        assert!(result.contains("user@example.com"));
982        assert!(result.contains("0.0.0.0"));
983    }
984
985    // ==================== CaptureFilterConfig Tests ====================
986
987    #[test]
988    fn test_capture_filter_config_default() {
989        let config = CaptureFilterConfig::default();
990        assert!(config.status_codes.is_empty());
991        assert!(config.path_patterns.is_empty());
992        assert!(config.methods.is_empty());
993        assert!(config.exclude_paths.is_empty());
994        assert!(!config.errors_only);
995        assert_eq!(config.sample_rate, 1.0);
996    }
997
998    // ==================== CaptureFilter Tests ====================
999
1000    #[test]
1001    fn test_capture_filter_default() {
1002        let filter = CaptureFilter::default();
1003        // Default should capture everything
1004        assert!(filter.should_capture("GET", "/api/test", Some(200)));
1005    }
1006
1007    #[test]
1008    fn test_capture_filter_status_code() {
1009        let config = CaptureFilterConfig {
1010            status_codes: vec![500, 502, 503],
1011            ..Default::default()
1012        };
1013
1014        let filter = CaptureFilter::new(config).unwrap();
1015
1016        assert!(filter.should_capture("GET", "/api/test", Some(500)));
1017        assert!(filter.should_capture("POST", "/api/test", Some(502)));
1018        assert!(!filter.should_capture("GET", "/api/test", Some(200)));
1019        assert!(!filter.should_capture("GET", "/api/test", Some(404)));
1020    }
1021
1022    #[test]
1023    fn test_capture_filter_status_code_without_status() {
1024        let config = CaptureFilterConfig {
1025            status_codes: vec![500],
1026            ..Default::default()
1027        };
1028
1029        let filter = CaptureFilter::new(config).unwrap();
1030        // Should allow when no status provided (filter later)
1031        assert!(filter.should_capture("GET", "/api/test", None));
1032    }
1033
1034    #[test]
1035    fn test_capture_filter_errors_only() {
1036        let config = CaptureFilterConfig {
1037            errors_only: true,
1038            ..Default::default()
1039        };
1040
1041        let filter = CaptureFilter::new(config).unwrap();
1042
1043        assert!(filter.should_capture("GET", "/api/test", Some(400)));
1044        assert!(filter.should_capture("GET", "/api/test", Some(500)));
1045        assert!(!filter.should_capture("GET", "/api/test", Some(200)));
1046        assert!(!filter.should_capture("GET", "/api/test", Some(304)));
1047    }
1048
1049    #[test]
1050    fn test_capture_filter_path_pattern() {
1051        let config = CaptureFilterConfig {
1052            path_patterns: vec![r"^/api/v1/.*".to_string()],
1053            ..Default::default()
1054        };
1055
1056        let filter = CaptureFilter::new(config).unwrap();
1057
1058        assert!(filter.should_capture("GET", "/api/v1/users", None));
1059        assert!(filter.should_capture("POST", "/api/v1/orders", None));
1060        assert!(!filter.should_capture("GET", "/api/v2/users", None));
1061        assert!(!filter.should_capture("GET", "/health", None));
1062    }
1063
1064    #[test]
1065    fn test_capture_filter_multiple_path_patterns() {
1066        let config = CaptureFilterConfig {
1067            path_patterns: vec![r"^/api/v1/.*".to_string(), r"^/internal/.*".to_string()],
1068            ..Default::default()
1069        };
1070
1071        let filter = CaptureFilter::new(config).unwrap();
1072
1073        assert!(filter.should_capture("GET", "/api/v1/users", None));
1074        assert!(filter.should_capture("GET", "/internal/admin", None));
1075        assert!(!filter.should_capture("GET", "/public/docs", None));
1076    }
1077
1078    #[test]
1079    fn test_capture_filter_exclude() {
1080        let config = CaptureFilterConfig {
1081            exclude_paths: vec![r"/health".to_string(), r"/metrics".to_string()],
1082            ..Default::default()
1083        };
1084
1085        let filter = CaptureFilter::new(config).unwrap();
1086
1087        assert!(filter.should_capture("GET", "/api/users", None));
1088        assert!(!filter.should_capture("GET", "/health", None));
1089        assert!(!filter.should_capture("GET", "/metrics", None));
1090    }
1091
1092    #[test]
1093    fn test_capture_filter_methods() {
1094        let config = CaptureFilterConfig {
1095            methods: vec!["POST".to_string(), "PUT".to_string()],
1096            ..Default::default()
1097        };
1098
1099        let filter = CaptureFilter::new(config).unwrap();
1100
1101        assert!(filter.should_capture("POST", "/api/users", None));
1102        assert!(filter.should_capture("PUT", "/api/users/1", None));
1103        assert!(!filter.should_capture("GET", "/api/users", None));
1104        assert!(!filter.should_capture("DELETE", "/api/users/1", None));
1105    }
1106
1107    #[test]
1108    fn test_capture_filter_methods_case_insensitive() {
1109        let config = CaptureFilterConfig {
1110            methods: vec!["POST".to_string()],
1111            ..Default::default()
1112        };
1113
1114        let filter = CaptureFilter::new(config).unwrap();
1115
1116        assert!(filter.should_capture("POST", "/api/users", None));
1117        assert!(filter.should_capture("post", "/api/users", None));
1118        assert!(filter.should_capture("Post", "/api/users", None));
1119    }
1120
1121    #[test]
1122    fn test_capture_filter_invalid_path_pattern() {
1123        let config = CaptureFilterConfig {
1124            path_patterns: vec![r"[invalid".to_string()],
1125            ..Default::default()
1126        };
1127
1128        let result = CaptureFilter::new(config);
1129        assert!(result.is_err());
1130    }
1131
1132    #[test]
1133    fn test_capture_filter_invalid_exclude_pattern() {
1134        let config = CaptureFilterConfig {
1135            exclude_paths: vec![r"[invalid".to_string()],
1136            ..Default::default()
1137        };
1138
1139        let result = CaptureFilter::new(config);
1140        assert!(result.is_err());
1141    }
1142
1143    #[test]
1144    fn test_capture_filter_combined_filters() {
1145        let config = CaptureFilterConfig {
1146            path_patterns: vec![r"^/api/.*".to_string()],
1147            methods: vec!["POST".to_string()],
1148            errors_only: true,
1149            ..Default::default()
1150        };
1151
1152        let filter = CaptureFilter::new(config).unwrap();
1153
1154        // Must match all conditions
1155        assert!(filter.should_capture("POST", "/api/users", Some(500)));
1156        assert!(!filter.should_capture("GET", "/api/users", Some(500))); // Wrong method
1157        assert!(!filter.should_capture("POST", "/other/path", Some(500))); // Wrong path
1158        assert!(!filter.should_capture("POST", "/api/users", Some(200))); // Not an error
1159    }
1160
1161    // ==================== Default Value Function Tests ====================
1162
1163    #[test]
1164    fn test_default_uuid_replacement() {
1165        let replacement = default_uuid_replacement();
1166        assert!(replacement.contains("{{counter:012}}"));
1167    }
1168
1169    #[test]
1170    fn test_default_email_replacement() {
1171        let replacement = default_email_replacement();
1172        assert_eq!(replacement, "user@example.com");
1173    }
1174
1175    #[test]
1176    fn test_default_ip_replacement() {
1177        let replacement = default_ip_replacement();
1178        assert_eq!(replacement, "127.0.0.1");
1179    }
1180
1181    #[test]
1182    fn test_default_creditcard_replacement() {
1183        let replacement = default_creditcard_replacement();
1184        assert_eq!(replacement, "XXXX-XXXX-XXXX-XXXX");
1185    }
1186
1187    #[test]
1188    fn test_default_sample_rate() {
1189        let rate = default_sample_rate();
1190        assert_eq!(rate, 1.0);
1191    }
1192}