mockforge_recorder/
scrubbing.rs

1//! Capture Scrubbing & Deterministic Replay
2//!
3//! Provides functionality to scrub sensitive data from recorded requests/responses
4//! and normalize non-deterministic values for reproducible diffs and replays.
5//!
6//! # Features
7//!
8//! - **Regex-based scrubbing**: Remove or replace sensitive data patterns
9//! - **Deterministic normalization**: Replace timestamps, UUIDs, and random IDs
10//! - **Field-level scrubbing**: Target specific JSON fields or headers
11//! - **Capture filtering**: Selectively record based on status codes or patterns
12//!
13//! # Environment Variables
14//!
15//! - `MOCKFORGE_CAPTURE_SCRUB`: JSON configuration for scrubbing rules
16//! - `MOCKFORGE_CAPTURE_FILTER`: JSON configuration for capture filtering
17//! - `MOCKFORGE_CAPTURE_DETERMINISTIC`: Enable deterministic mode (default: false)
18//!
19//! # Example
20//!
21//! ```bash
22//! export MOCKFORGE_CAPTURE_SCRUB='[
23//!   {"field": "email", "replacement": "user@example.com"},
24//!   {"pattern": "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", "replacement": "00000000-0000-0000-0000-000000000000"}
25//! ]'
26//!
27//! export MOCKFORGE_CAPTURE_FILTER='{"status_codes": [500, 502, 503, 504]}'
28//! export MOCKFORGE_CAPTURE_DETERMINISTIC=true
29//! ```
30
31use crate::{RecordedRequest, RecordedResponse, RecorderError, Result};
32use chrono::{DateTime, Utc};
33use once_cell::sync::Lazy;
34use regex::Regex;
35use serde::{Deserialize, Serialize};
36use std::collections::HashMap;
37use std::sync::Arc;
38use tracing::{debug, warn};
39
40/// Global scrubber instance loaded from environment
41static GLOBAL_SCRUBBER: Lazy<Arc<Scrubber>> = Lazy::new(|| {
42    Arc::new(Scrubber::from_env().unwrap_or_else(|e| {
43        warn!("Failed to load scrubber from environment: {}", e);
44        Scrubber::default()
45    }))
46});
47
48/// Global filter instance loaded from environment
49static GLOBAL_FILTER: Lazy<Arc<CaptureFilter>> = Lazy::new(|| {
50    Arc::new(CaptureFilter::from_env().unwrap_or_else(|e| {
51        warn!("Failed to load capture filter from environment: {}", e);
52        CaptureFilter::default()
53    }))
54});
55
56/// Regex pattern for matching UUIDs
57static UUID_REGEX: Lazy<Regex> = Lazy::new(|| {
58    Regex::new(r"(?i)[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}").unwrap()
59});
60
61/// Regex pattern for matching email addresses
62static EMAIL_REGEX: Lazy<Regex> =
63    Lazy::new(|| Regex::new(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b").unwrap());
64
65/// Regex pattern for matching IPv4 addresses
66static IPV4_REGEX: Lazy<Regex> =
67    Lazy::new(|| Regex::new(r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b").unwrap());
68
69/// Regex pattern for matching credit card numbers
70static CREDIT_CARD_REGEX: Lazy<Regex> =
71    Lazy::new(|| Regex::new(r"\b(?:\d{4}[-\s]?){3}\d{4}\b").unwrap());
72
73/// Configuration for scrubbing sensitive data
74#[derive(Debug, Clone, Serialize, Deserialize, Default)]
75pub struct ScrubConfig {
76    /// Rules for scrubbing data
77    #[serde(default)]
78    pub rules: Vec<ScrubRule>,
79
80    /// Enable deterministic mode (normalize timestamps, IDs, etc.)
81    #[serde(default)]
82    pub deterministic: bool,
83
84    /// Counter seed for deterministic IDs (used in deterministic mode)
85    #[serde(default)]
86    pub counter_seed: u64,
87}
88
89/// A single scrubbing rule
90#[derive(Debug, Clone, Serialize, Deserialize)]
91#[serde(tag = "type", rename_all = "lowercase")]
92pub enum ScrubRule {
93    /// Scrub by regex pattern
94    Regex {
95        /// Regex pattern to match
96        pattern: String,
97        /// Replacement string (can include capture groups like $1, $2)
98        replacement: String,
99        /// Target location (headers, body, all)
100        #[serde(default = "default_target")]
101        target: ScrubTarget,
102    },
103
104    /// Scrub specific JSON field
105    Field {
106        /// JSON field path (e.g., "user.email", "data.id")
107        field: String,
108        /// Replacement value
109        replacement: String,
110        /// Target location
111        #[serde(default = "default_target")]
112        target: ScrubTarget,
113    },
114
115    /// Scrub specific header
116    Header {
117        /// Header name (case-insensitive)
118        name: String,
119        /// Replacement value
120        replacement: String,
121    },
122
123    /// Scrub all UUIDs (replace with deterministic counter)
124    Uuid {
125        /// Replacement pattern (use {{counter}} for deterministic counter)
126        #[serde(default = "default_uuid_replacement")]
127        replacement: String,
128    },
129
130    /// Scrub email addresses
131    Email {
132        /// Replacement value
133        #[serde(default = "default_email_replacement")]
134        replacement: String,
135    },
136
137    /// Scrub IP addresses
138    IpAddress {
139        /// Replacement value
140        #[serde(default = "default_ip_replacement")]
141        replacement: String,
142    },
143
144    /// Scrub credit card numbers
145    CreditCard {
146        /// Replacement value
147        #[serde(default = "default_creditcard_replacement")]
148        replacement: String,
149    },
150}
151
152/// Target location for scrubbing
153#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
154#[serde(rename_all = "lowercase")]
155pub enum ScrubTarget {
156    /// Scrub in headers only
157    Headers,
158    /// Scrub in body only
159    Body,
160    /// Scrub in both headers and body
161    All,
162}
163
164fn default_target() -> ScrubTarget {
165    ScrubTarget::All
166}
167
168fn default_uuid_replacement() -> String {
169    "00000000-0000-0000-0000-{{counter:012}}".to_string()
170}
171
172fn default_email_replacement() -> String {
173    "user@example.com".to_string()
174}
175
176fn default_ip_replacement() -> String {
177    "127.0.0.1".to_string()
178}
179
180fn default_creditcard_replacement() -> String {
181    "XXXX-XXXX-XXXX-XXXX".to_string()
182}
183
184/// Configuration for filtering which requests to capture
185#[derive(Debug, Clone, Serialize, Deserialize)]
186pub struct CaptureFilterConfig {
187    /// Only capture requests with these status codes
188    #[serde(default)]
189    pub status_codes: Vec<u16>,
190
191    /// Only capture requests matching these patterns (regex)
192    #[serde(default)]
193    pub path_patterns: Vec<String>,
194
195    /// Only capture requests with these methods
196    #[serde(default)]
197    pub methods: Vec<String>,
198
199    /// Exclude requests matching these patterns
200    #[serde(default)]
201    pub exclude_paths: Vec<String>,
202
203    /// Only capture errors (status >= 400)
204    #[serde(default)]
205    pub errors_only: bool,
206
207    /// Capture sample rate (0.0 - 1.0, e.g., 0.1 = 10%)
208    #[serde(default = "default_sample_rate")]
209    pub sample_rate: f64,
210}
211
212fn default_sample_rate() -> f64 {
213    1.0
214}
215
216impl Default for CaptureFilterConfig {
217    fn default() -> Self {
218        Self {
219            status_codes: Vec::new(),
220            path_patterns: Vec::new(),
221            methods: Vec::new(),
222            exclude_paths: Vec::new(),
223            errors_only: false,
224            sample_rate: default_sample_rate(),
225        }
226    }
227}
228
229/// Handles scrubbing of sensitive data from recorded requests/responses
230pub struct Scrubber {
231    config: ScrubConfig,
232    compiled_regexes: Vec<(Regex, String, ScrubTarget)>,
233    deterministic_counter: std::sync::atomic::AtomicU64,
234}
235
236impl Default for Scrubber {
237    fn default() -> Self {
238        Self {
239            config: ScrubConfig::default(),
240            compiled_regexes: Vec::new(),
241            deterministic_counter: std::sync::atomic::AtomicU64::new(0),
242        }
243    }
244}
245
246impl Scrubber {
247    /// Create a new scrubber with the given configuration
248    pub fn new(config: ScrubConfig) -> Result<Self> {
249        let mut compiled_regexes = Vec::new();
250
251        // Compile regex patterns
252        for rule in &config.rules {
253            if let ScrubRule::Regex {
254                pattern,
255                replacement,
256                target,
257            } = rule
258            {
259                let regex = Regex::new(pattern).map_err(|e| {
260                    RecorderError::InvalidFilter(format!(
261                        "Invalid regex pattern '{}': {}",
262                        pattern, e
263                    ))
264                })?;
265                compiled_regexes.push((regex, replacement.clone(), *target));
266            }
267        }
268
269        Ok(Self {
270            deterministic_counter: std::sync::atomic::AtomicU64::new(config.counter_seed),
271            config,
272            compiled_regexes,
273        })
274    }
275
276    /// Load scrubber from MOCKFORGE_CAPTURE_SCRUB environment variable
277    pub fn from_env() -> Result<Self> {
278        let scrub_json = std::env::var("MOCKFORGE_CAPTURE_SCRUB").ok();
279        let deterministic = std::env::var("MOCKFORGE_CAPTURE_DETERMINISTIC")
280            .ok()
281            .and_then(|v| v.parse::<bool>().ok())
282            .unwrap_or(false);
283
284        let mut config = if let Some(json) = scrub_json {
285            serde_json::from_str::<ScrubConfig>(&json).map_err(|e| {
286                RecorderError::InvalidFilter(format!("Invalid MOCKFORGE_CAPTURE_SCRUB JSON: {}", e))
287            })?
288        } else {
289            ScrubConfig::default()
290        };
291
292        config.deterministic = deterministic;
293
294        Self::new(config)
295    }
296
297    /// Get the global scrubber instance
298    pub fn global() -> Arc<Self> {
299        Arc::clone(&GLOBAL_SCRUBBER)
300    }
301
302    /// Scrub a recorded request
303    pub fn scrub_request(&self, request: &mut RecordedRequest) {
304        // Scrub headers
305        if let Ok(mut headers) = serde_json::from_str::<HashMap<String, String>>(&request.headers) {
306            self.scrub_headers(&mut headers);
307            if let Ok(json) = serde_json::to_string(&headers) {
308                request.headers = json;
309            }
310        }
311
312        // Scrub body
313        if let Some(ref mut body) = request.body {
314            if request.body_encoding == "utf8" {
315                *body = self.scrub_string(body, ScrubTarget::Body);
316            }
317        }
318
319        // Scrub query params
320        if let Some(ref mut query) = request.query_params {
321            *query = self.scrub_string(query, ScrubTarget::Body);
322        }
323
324        // Normalize timestamp in deterministic mode
325        if self.config.deterministic {
326            request.timestamp = Self::normalize_timestamp(request.timestamp);
327        }
328
329        // Scrub sensitive fields
330        if let Some(ref mut trace_id) = request.trace_id {
331            *trace_id = self.scrub_string(trace_id, ScrubTarget::All);
332        }
333        if let Some(ref mut span_id) = request.span_id {
334            *span_id = self.scrub_string(span_id, ScrubTarget::All);
335        }
336        if let Some(ref mut client_ip) = request.client_ip {
337            *client_ip = self.scrub_string(client_ip, ScrubTarget::All);
338        }
339    }
340
341    /// Scrub a recorded response
342    pub fn scrub_response(&self, response: &mut RecordedResponse) {
343        // Scrub headers
344        if let Ok(mut headers) = serde_json::from_str::<HashMap<String, String>>(&response.headers)
345        {
346            self.scrub_headers(&mut headers);
347            if let Ok(json) = serde_json::to_string(&headers) {
348                response.headers = json;
349            }
350        }
351
352        // Scrub body
353        if let Some(ref mut body) = response.body {
354            if response.body_encoding == "utf8" {
355                *body = self.scrub_string(body, ScrubTarget::Body);
356            }
357        }
358
359        // Normalize timestamp in deterministic mode
360        if self.config.deterministic {
361            response.timestamp = Self::normalize_timestamp(response.timestamp);
362        }
363    }
364
365    /// Scrub headers map
366    fn scrub_headers(&self, headers: &mut HashMap<String, String>) {
367        for rule in &self.config.rules {
368            if let ScrubRule::Header { name, replacement } = rule {
369                // Case-insensitive header matching
370                let key = headers.keys().find(|k| k.eq_ignore_ascii_case(name)).cloned();
371                if let Some(key) = key {
372                    headers.insert(key, replacement.clone());
373                }
374            }
375        }
376
377        // Scrub header values with regex rules
378        for (key, value) in headers.iter_mut() {
379            *value = self.scrub_string(value, ScrubTarget::Headers);
380
381            // Also scrub by field name
382            for rule in &self.config.rules {
383                if let ScrubRule::Field {
384                    field,
385                    replacement,
386                    target,
387                } = rule
388                {
389                    if (*target == ScrubTarget::Headers || *target == ScrubTarget::All)
390                        && key.eq_ignore_ascii_case(field)
391                    {
392                        *value = replacement.clone();
393                    }
394                }
395            }
396        }
397    }
398
399    /// Scrub a string value
400    fn scrub_string(&self, input: &str, location: ScrubTarget) -> String {
401        let mut result = input.to_string();
402
403        // Apply built-in rules
404        for rule in &self.config.rules {
405            match rule {
406                ScrubRule::Uuid { replacement } => {
407                    if location == ScrubTarget::All || location == ScrubTarget::Body {
408                        result = self.scrub_uuids(&result, replacement);
409                    }
410                }
411                ScrubRule::Email { replacement } => {
412                    if location == ScrubTarget::All || location == ScrubTarget::Body {
413                        result = self.scrub_emails(&result, replacement);
414                    }
415                }
416                ScrubRule::IpAddress { replacement } => {
417                    if location == ScrubTarget::All || location == ScrubTarget::Body {
418                        result = self.scrub_ips(&result, replacement);
419                    }
420                }
421                ScrubRule::CreditCard { replacement } => {
422                    if location == ScrubTarget::All || location == ScrubTarget::Body {
423                        result = self.scrub_credit_cards(&result, replacement);
424                    }
425                }
426                ScrubRule::Field {
427                    field,
428                    replacement,
429                    target,
430                } => {
431                    if *target == location || *target == ScrubTarget::All {
432                        result = self.scrub_json_field(&result, field, replacement);
433                    }
434                }
435                _ => {}
436            }
437        }
438
439        // Apply regex rules
440        for (regex, replacement, target) in &self.compiled_regexes {
441            if *target == location || *target == ScrubTarget::All {
442                result = regex.replace_all(&result, replacement.as_str()).to_string();
443            }
444        }
445
446        result
447    }
448
449    /// Scrub UUIDs with deterministic counter
450    fn scrub_uuids(&self, input: &str, replacement: &str) -> String {
451        UUID_REGEX
452            .replace_all(input, |_: &regex::Captures| {
453                let counter =
454                    self.deterministic_counter.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
455                replacement
456                    .replace("{{counter}}", &counter.to_string())
457                    .replace("{{counter:012}}", &format!("{:012}", counter))
458            })
459            .to_string()
460    }
461
462    /// Scrub email addresses
463    fn scrub_emails(&self, input: &str, replacement: &str) -> String {
464        EMAIL_REGEX.replace_all(input, replacement).to_string()
465    }
466
467    /// Scrub IP addresses
468    fn scrub_ips(&self, input: &str, replacement: &str) -> String {
469        IPV4_REGEX.replace_all(input, replacement).to_string()
470    }
471
472    /// Scrub credit card numbers
473    fn scrub_credit_cards(&self, input: &str, replacement: &str) -> String {
474        CREDIT_CARD_REGEX.replace_all(input, replacement).to_string()
475    }
476
477    /// Scrub specific JSON field
478    fn scrub_json_field(&self, input: &str, field_path: &str, replacement: &str) -> String {
479        // Try to parse as JSON
480        if let Ok(mut json) = serde_json::from_str::<serde_json::Value>(input) {
481            if self.scrub_json_value(&mut json, field_path, replacement) {
482                if let Ok(result) = serde_json::to_string(&json) {
483                    return result;
484                }
485            }
486        }
487        input.to_string()
488    }
489
490    /// Recursively scrub JSON value
491    fn scrub_json_value(
492        &self,
493        value: &mut serde_json::Value,
494        field_path: &str,
495        replacement: &str,
496    ) -> bool {
497        let parts: Vec<&str> = field_path.split('.').collect();
498        if parts.is_empty() {
499            return false;
500        }
501
502        if parts.len() == 1 {
503            // Base case: scrub this field
504            if let Some(obj) = value.as_object_mut() {
505                if obj.contains_key(parts[0]) {
506                    obj.insert(
507                        parts[0].to_string(),
508                        serde_json::Value::String(replacement.to_string()),
509                    );
510                    return true;
511                }
512            }
513        } else {
514            // Recursive case: navigate deeper
515            if let Some(obj) = value.as_object_mut() {
516                if let Some(child) = obj.get_mut(parts[0]) {
517                    let remaining = parts[1..].join(".");
518                    return self.scrub_json_value(child, &remaining, replacement);
519                }
520            }
521        }
522
523        false
524    }
525
526    /// Normalize timestamp to a deterministic value
527    fn normalize_timestamp(timestamp: DateTime<Utc>) -> DateTime<Utc> {
528        // Normalize to start of day
529        timestamp
530            .date_naive()
531            .and_hms_opt(0, 0, 0)
532            .expect("0 is valid for hours/minutes/seconds")
533            .and_utc()
534    }
535}
536
537/// Handles filtering of which requests to capture
538#[derive(Default)]
539pub struct CaptureFilter {
540    config: CaptureFilterConfig,
541    path_patterns: Vec<Regex>,
542    exclude_patterns: Vec<Regex>,
543}
544
545impl CaptureFilter {
546    /// Create a new capture filter with the given configuration
547    pub fn new(config: CaptureFilterConfig) -> Result<Self> {
548        let mut path_patterns = Vec::new();
549        for pattern in &config.path_patterns {
550            let regex = Regex::new(pattern).map_err(|e| {
551                RecorderError::InvalidFilter(format!("Invalid path pattern '{}': {}", pattern, e))
552            })?;
553            path_patterns.push(regex);
554        }
555
556        let mut exclude_patterns = Vec::new();
557        for pattern in &config.exclude_paths {
558            let regex = Regex::new(pattern).map_err(|e| {
559                RecorderError::InvalidFilter(format!(
560                    "Invalid exclude pattern '{}': {}",
561                    pattern, e
562                ))
563            })?;
564            exclude_patterns.push(regex);
565        }
566
567        Ok(Self {
568            config,
569            path_patterns,
570            exclude_patterns,
571        })
572    }
573
574    /// Load filter from MOCKFORGE_CAPTURE_FILTER environment variable
575    pub fn from_env() -> Result<Self> {
576        let filter_json = std::env::var("MOCKFORGE_CAPTURE_FILTER").ok();
577
578        let config = if let Some(json) = filter_json {
579            serde_json::from_str::<CaptureFilterConfig>(&json).map_err(|e| {
580                RecorderError::InvalidFilter(format!(
581                    "Invalid MOCKFORGE_CAPTURE_FILTER JSON: {}",
582                    e
583                ))
584            })?
585        } else {
586            CaptureFilterConfig::default()
587        };
588
589        Self::new(config)
590    }
591
592    /// Get the global filter instance
593    pub fn global() -> Arc<Self> {
594        Arc::clone(&GLOBAL_FILTER)
595    }
596
597    /// Check if a request should be captured
598    pub fn should_capture(&self, method: &str, path: &str, status_code: Option<u16>) -> bool {
599        debug!(
600            "should_capture called: method={}, path={}, status_code={:?}",
601            method, path, status_code
602        );
603        debug!("  errors_only={}, status_codes={:?}, path_patterns count={}, exclude_patterns count={}",
604               self.config.errors_only, self.config.status_codes, self.path_patterns.len(), self.exclude_patterns.len());
605
606        // Check sample rate
607        if self.config.sample_rate < 1.0 {
608            use std::collections::hash_map::DefaultHasher;
609            use std::hash::{Hash, Hasher};
610
611            let mut hasher = DefaultHasher::new();
612            path.hash(&mut hasher);
613            let hash = hasher.finish();
614            let sample = (hash % 1000) as f64 / 1000.0;
615
616            if sample > self.config.sample_rate {
617                debug!(
618                    "Skipping capture due to sample rate: {} > {}",
619                    sample, self.config.sample_rate
620                );
621                return false;
622            }
623        }
624
625        // Check errors_only
626        if self.config.errors_only {
627            if let Some(code) = status_code {
628                if code < 400 {
629                    debug!("Skipping capture: not an error (status {})", code);
630                    return false;
631                }
632            } else {
633                // No status code yet, we can't determine if it's an error
634                // In this case, we should allow it and filter later when we have the status
635                debug!("errors_only is set but no status code provided, allowing for now");
636            }
637        }
638
639        // Check status code filter
640        if !self.config.status_codes.is_empty() {
641            if let Some(code) = status_code {
642                if !self.config.status_codes.contains(&code) {
643                    debug!("Skipping capture: status code {} not in filter", code);
644                    return false;
645                }
646            } else {
647                // No status code yet, allow it and filter later
648                debug!("status_codes filter set but no status code provided, allowing for now");
649            }
650        }
651
652        // Check method filter
653        if !self.config.methods.is_empty()
654            && !self.config.methods.iter().any(|m| m.eq_ignore_ascii_case(method))
655        {
656            debug!("Skipping capture: method {} not in filter", method);
657            return false;
658        }
659
660        // Check exclude patterns
661        for pattern in &self.exclude_patterns {
662            if pattern.is_match(path) {
663                debug!("Skipping capture: path {} matches exclude pattern", path);
664                return false;
665            }
666        }
667
668        // Check path patterns (if specified, path must match)
669        if !self.path_patterns.is_empty() {
670            let matches = self.path_patterns.iter().any(|p| p.is_match(path));
671            if !matches {
672                debug!("Skipping capture: path {} does not match any pattern", path);
673                return false;
674            }
675        }
676
677        true
678    }
679}
680
681#[cfg(test)]
682mod tests {
683    use super::*;
684
685    // ==================== ScrubConfig Tests ====================
686
687    #[test]
688    fn test_scrub_config_default() {
689        let config = ScrubConfig::default();
690        assert!(config.rules.is_empty());
691        assert!(!config.deterministic);
692        assert_eq!(config.counter_seed, 0);
693    }
694
695    #[test]
696    fn test_scrub_config_serialize() {
697        let config = ScrubConfig {
698            rules: vec![ScrubRule::Email {
699                replacement: "user@example.com".to_string(),
700            }],
701            deterministic: true,
702            counter_seed: 100,
703        };
704        let json = serde_json::to_string(&config).unwrap();
705        assert!(json.contains("email"));
706        assert!(json.contains("deterministic"));
707    }
708
709    // ==================== ScrubTarget Tests ====================
710
711    #[test]
712    fn test_scrub_target_equality() {
713        assert_eq!(ScrubTarget::All, ScrubTarget::All);
714        assert_ne!(ScrubTarget::Headers, ScrubTarget::Body);
715    }
716
717    #[test]
718    fn test_scrub_target_default() {
719        assert_eq!(default_target(), ScrubTarget::All);
720    }
721
722    // ==================== ScrubRule Tests ====================
723
724    #[test]
725    fn test_scrub_rule_regex_serialize() {
726        let rule = ScrubRule::Regex {
727            pattern: r"\d+".to_string(),
728            replacement: "XXX".to_string(),
729            target: ScrubTarget::Body,
730        };
731        let json = serde_json::to_string(&rule).unwrap();
732        assert!(json.contains("regex"));
733        assert!(json.contains("\\\\d+"));
734    }
735
736    #[test]
737    fn test_scrub_rule_header_serialize() {
738        let rule = ScrubRule::Header {
739            name: "Authorization".to_string(),
740            replacement: "Bearer ***".to_string(),
741        };
742        let json = serde_json::to_string(&rule).unwrap();
743        assert!(json.contains("header"));
744        assert!(json.contains("Authorization"));
745    }
746
747    // ==================== Scrubber Tests ====================
748
749    #[test]
750    fn test_scrubber_default() {
751        let scrubber = Scrubber::default();
752        assert!(scrubber.config.rules.is_empty());
753        assert!(scrubber.compiled_regexes.is_empty());
754    }
755
756    #[test]
757    fn test_scrub_email() {
758        let config = ScrubConfig {
759            rules: vec![ScrubRule::Email {
760                replacement: "user@example.com".to_string(),
761            }],
762            deterministic: false,
763            counter_seed: 0,
764        };
765
766        let scrubber = Scrubber::new(config).unwrap();
767        let input = r#"{"email": "john.doe@company.com", "name": "John"}"#;
768        let result = scrubber.scrub_string(input, ScrubTarget::All);
769
770        assert!(result.contains("user@example.com"));
771        assert!(!result.contains("john.doe@company.com"));
772    }
773
774    #[test]
775    fn test_scrub_multiple_emails() {
776        let config = ScrubConfig {
777            rules: vec![ScrubRule::Email {
778                replacement: "redacted@example.com".to_string(),
779            }],
780            deterministic: false,
781            counter_seed: 0,
782        };
783
784        let scrubber = Scrubber::new(config).unwrap();
785        let input = "Contact: john@test.com and jane@test.org";
786        let result = scrubber.scrub_string(input, ScrubTarget::All);
787
788        assert_eq!(result.matches("redacted@example.com").count(), 2);
789    }
790
791    #[test]
792    fn test_scrub_uuid() {
793        let config = ScrubConfig {
794            rules: vec![ScrubRule::Uuid {
795                replacement: "00000000-0000-0000-0000-{{counter:012}}".to_string(),
796            }],
797            deterministic: false,
798            counter_seed: 0,
799        };
800
801        let scrubber = Scrubber::new(config).unwrap();
802        let input = "Request ID: 123e4567-e89b-12d3-a456-426614174000";
803        let result = scrubber.scrub_string(input, ScrubTarget::All);
804
805        assert!(result.contains("00000000-0000-0000-0000-000000000000"));
806        assert!(!result.contains("123e4567-e89b-12d3-a456-426614174000"));
807    }
808
809    #[test]
810    fn test_scrub_uuid_counter_increments() {
811        let config = ScrubConfig {
812            rules: vec![ScrubRule::Uuid {
813                replacement: "00000000-0000-0000-0000-{{counter:012}}".to_string(),
814            }],
815            deterministic: false,
816            counter_seed: 0,
817        };
818
819        let scrubber = Scrubber::new(config).unwrap();
820        let input1 = "ID: 123e4567-e89b-12d3-a456-426614174000";
821        let input2 = "ID: abc12345-e89b-12d3-a456-426614174000";
822
823        let result1 = scrubber.scrub_string(input1, ScrubTarget::All);
824        let result2 = scrubber.scrub_string(input2, ScrubTarget::All);
825
826        assert!(result1.contains("000000000000"));
827        assert!(result2.contains("000000000001"));
828    }
829
830    #[test]
831    fn test_scrub_ip_address() {
832        let config = ScrubConfig {
833            rules: vec![ScrubRule::IpAddress {
834                replacement: "127.0.0.1".to_string(),
835            }],
836            deterministic: false,
837            counter_seed: 0,
838        };
839
840        let scrubber = Scrubber::new(config).unwrap();
841        let input = "Client IP: 192.168.1.100";
842        let result = scrubber.scrub_string(input, ScrubTarget::All);
843
844        assert!(result.contains("127.0.0.1"));
845        assert!(!result.contains("192.168.1.100"));
846    }
847
848    #[test]
849    fn test_scrub_credit_card() {
850        let config = ScrubConfig {
851            rules: vec![ScrubRule::CreditCard {
852                replacement: "XXXX-XXXX-XXXX-XXXX".to_string(),
853            }],
854            deterministic: false,
855            counter_seed: 0,
856        };
857
858        let scrubber = Scrubber::new(config).unwrap();
859        let input = "Card: 1234-5678-9012-3456";
860        let result = scrubber.scrub_string(input, ScrubTarget::All);
861
862        assert!(result.contains("XXXX-XXXX-XXXX-XXXX"));
863        assert!(!result.contains("1234-5678-9012-3456"));
864    }
865
866    #[test]
867    fn test_scrub_json_field() {
868        let config = ScrubConfig {
869            rules: vec![ScrubRule::Field {
870                field: "user.email".to_string(),
871                replacement: "redacted@example.com".to_string(),
872                target: ScrubTarget::All,
873            }],
874            deterministic: false,
875            counter_seed: 0,
876        };
877
878        let scrubber = Scrubber::new(config).unwrap();
879        let input = r#"{"user": {"email": "secret@company.com", "name": "John"}}"#;
880        let result = scrubber.scrub_string(input, ScrubTarget::Body);
881
882        assert!(result.contains("redacted@example.com"));
883        assert!(!result.contains("secret@company.com"));
884    }
885
886    #[test]
887    fn test_scrub_json_field_top_level() {
888        let config = ScrubConfig {
889            rules: vec![ScrubRule::Field {
890                field: "email".to_string(),
891                replacement: "redacted".to_string(),
892                target: ScrubTarget::Body,
893            }],
894            deterministic: false,
895            counter_seed: 0,
896        };
897
898        let scrubber = Scrubber::new(config).unwrap();
899        let input = r#"{"email": "secret@test.com"}"#;
900        let result = scrubber.scrub_string(input, ScrubTarget::Body);
901
902        assert!(result.contains("redacted"));
903    }
904
905    #[test]
906    fn test_scrub_regex_pattern() {
907        let config = ScrubConfig {
908            rules: vec![ScrubRule::Regex {
909                pattern: r"secret-\w+".to_string(),
910                replacement: "secret-REDACTED".to_string(),
911                target: ScrubTarget::All,
912            }],
913            deterministic: false,
914            counter_seed: 0,
915        };
916
917        let scrubber = Scrubber::new(config).unwrap();
918        let input = "Token: secret-abc123";
919        let result = scrubber.scrub_string(input, ScrubTarget::All);
920
921        assert!(result.contains("secret-REDACTED"));
922        assert!(!result.contains("secret-abc123"));
923    }
924
925    #[test]
926    fn test_scrub_regex_invalid_pattern() {
927        let config = ScrubConfig {
928            rules: vec![ScrubRule::Regex {
929                pattern: r"[invalid".to_string(),
930                replacement: "x".to_string(),
931                target: ScrubTarget::All,
932            }],
933            deterministic: false,
934            counter_seed: 0,
935        };
936
937        let result = Scrubber::new(config);
938        assert!(result.is_err());
939    }
940
941    #[test]
942    fn test_scrub_target_body_only() {
943        let config = ScrubConfig {
944            rules: vec![ScrubRule::Regex {
945                pattern: r"test".to_string(),
946                replacement: "REDACTED".to_string(),
947                target: ScrubTarget::Body,
948            }],
949            deterministic: false,
950            counter_seed: 0,
951        };
952
953        let scrubber = Scrubber::new(config).unwrap();
954        let result_body = scrubber.scrub_string("test data", ScrubTarget::Body);
955        let result_headers = scrubber.scrub_string("test data", ScrubTarget::Headers);
956
957        assert_eq!(result_body, "REDACTED data");
958        assert_eq!(result_headers, "test data"); // Should not be scrubbed
959    }
960
961    #[test]
962    fn test_scrub_multiple_rules() {
963        let config = ScrubConfig {
964            rules: vec![
965                ScrubRule::Email {
966                    replacement: "user@example.com".to_string(),
967                },
968                ScrubRule::IpAddress {
969                    replacement: "0.0.0.0".to_string(),
970                },
971            ],
972            deterministic: false,
973            counter_seed: 0,
974        };
975
976        let scrubber = Scrubber::new(config).unwrap();
977        let input = "Email: john@test.com, IP: 192.168.1.1";
978        let result = scrubber.scrub_string(input, ScrubTarget::All);
979
980        assert!(result.contains("user@example.com"));
981        assert!(result.contains("0.0.0.0"));
982    }
983
984    // ==================== CaptureFilterConfig Tests ====================
985
986    #[test]
987    fn test_capture_filter_config_default() {
988        let config = CaptureFilterConfig::default();
989        assert!(config.status_codes.is_empty());
990        assert!(config.path_patterns.is_empty());
991        assert!(config.methods.is_empty());
992        assert!(config.exclude_paths.is_empty());
993        assert!(!config.errors_only);
994        assert_eq!(config.sample_rate, 1.0);
995    }
996
997    // ==================== CaptureFilter Tests ====================
998
999    #[test]
1000    fn test_capture_filter_default() {
1001        let filter = CaptureFilter::default();
1002        // Default should capture everything
1003        assert!(filter.should_capture("GET", "/api/test", Some(200)));
1004    }
1005
1006    #[test]
1007    fn test_capture_filter_status_code() {
1008        let config = CaptureFilterConfig {
1009            status_codes: vec![500, 502, 503],
1010            ..Default::default()
1011        };
1012
1013        let filter = CaptureFilter::new(config).unwrap();
1014
1015        assert!(filter.should_capture("GET", "/api/test", Some(500)));
1016        assert!(filter.should_capture("POST", "/api/test", Some(502)));
1017        assert!(!filter.should_capture("GET", "/api/test", Some(200)));
1018        assert!(!filter.should_capture("GET", "/api/test", Some(404)));
1019    }
1020
1021    #[test]
1022    fn test_capture_filter_status_code_without_status() {
1023        let config = CaptureFilterConfig {
1024            status_codes: vec![500],
1025            ..Default::default()
1026        };
1027
1028        let filter = CaptureFilter::new(config).unwrap();
1029        // Should allow when no status provided (filter later)
1030        assert!(filter.should_capture("GET", "/api/test", None));
1031    }
1032
1033    #[test]
1034    fn test_capture_filter_errors_only() {
1035        let config = CaptureFilterConfig {
1036            errors_only: true,
1037            ..Default::default()
1038        };
1039
1040        let filter = CaptureFilter::new(config).unwrap();
1041
1042        assert!(filter.should_capture("GET", "/api/test", Some(400)));
1043        assert!(filter.should_capture("GET", "/api/test", Some(500)));
1044        assert!(!filter.should_capture("GET", "/api/test", Some(200)));
1045        assert!(!filter.should_capture("GET", "/api/test", Some(304)));
1046    }
1047
1048    #[test]
1049    fn test_capture_filter_path_pattern() {
1050        let config = CaptureFilterConfig {
1051            path_patterns: vec![r"^/api/v1/.*".to_string()],
1052            ..Default::default()
1053        };
1054
1055        let filter = CaptureFilter::new(config).unwrap();
1056
1057        assert!(filter.should_capture("GET", "/api/v1/users", None));
1058        assert!(filter.should_capture("POST", "/api/v1/orders", None));
1059        assert!(!filter.should_capture("GET", "/api/v2/users", None));
1060        assert!(!filter.should_capture("GET", "/health", None));
1061    }
1062
1063    #[test]
1064    fn test_capture_filter_multiple_path_patterns() {
1065        let config = CaptureFilterConfig {
1066            path_patterns: vec![r"^/api/v1/.*".to_string(), r"^/internal/.*".to_string()],
1067            ..Default::default()
1068        };
1069
1070        let filter = CaptureFilter::new(config).unwrap();
1071
1072        assert!(filter.should_capture("GET", "/api/v1/users", None));
1073        assert!(filter.should_capture("GET", "/internal/admin", None));
1074        assert!(!filter.should_capture("GET", "/public/docs", None));
1075    }
1076
1077    #[test]
1078    fn test_capture_filter_exclude() {
1079        let config = CaptureFilterConfig {
1080            exclude_paths: vec![r"/health".to_string(), r"/metrics".to_string()],
1081            ..Default::default()
1082        };
1083
1084        let filter = CaptureFilter::new(config).unwrap();
1085
1086        assert!(filter.should_capture("GET", "/api/users", None));
1087        assert!(!filter.should_capture("GET", "/health", None));
1088        assert!(!filter.should_capture("GET", "/metrics", None));
1089    }
1090
1091    #[test]
1092    fn test_capture_filter_methods() {
1093        let config = CaptureFilterConfig {
1094            methods: vec!["POST".to_string(), "PUT".to_string()],
1095            ..Default::default()
1096        };
1097
1098        let filter = CaptureFilter::new(config).unwrap();
1099
1100        assert!(filter.should_capture("POST", "/api/users", None));
1101        assert!(filter.should_capture("PUT", "/api/users/1", None));
1102        assert!(!filter.should_capture("GET", "/api/users", None));
1103        assert!(!filter.should_capture("DELETE", "/api/users/1", None));
1104    }
1105
1106    #[test]
1107    fn test_capture_filter_methods_case_insensitive() {
1108        let config = CaptureFilterConfig {
1109            methods: vec!["POST".to_string()],
1110            ..Default::default()
1111        };
1112
1113        let filter = CaptureFilter::new(config).unwrap();
1114
1115        assert!(filter.should_capture("POST", "/api/users", None));
1116        assert!(filter.should_capture("post", "/api/users", None));
1117        assert!(filter.should_capture("Post", "/api/users", None));
1118    }
1119
1120    #[test]
1121    fn test_capture_filter_invalid_path_pattern() {
1122        let config = CaptureFilterConfig {
1123            path_patterns: vec![r"[invalid".to_string()],
1124            ..Default::default()
1125        };
1126
1127        let result = CaptureFilter::new(config);
1128        assert!(result.is_err());
1129    }
1130
1131    #[test]
1132    fn test_capture_filter_invalid_exclude_pattern() {
1133        let config = CaptureFilterConfig {
1134            exclude_paths: vec![r"[invalid".to_string()],
1135            ..Default::default()
1136        };
1137
1138        let result = CaptureFilter::new(config);
1139        assert!(result.is_err());
1140    }
1141
1142    #[test]
1143    fn test_capture_filter_combined_filters() {
1144        let config = CaptureFilterConfig {
1145            path_patterns: vec![r"^/api/.*".to_string()],
1146            methods: vec!["POST".to_string()],
1147            errors_only: true,
1148            ..Default::default()
1149        };
1150
1151        let filter = CaptureFilter::new(config).unwrap();
1152
1153        // Must match all conditions
1154        assert!(filter.should_capture("POST", "/api/users", Some(500)));
1155        assert!(!filter.should_capture("GET", "/api/users", Some(500))); // Wrong method
1156        assert!(!filter.should_capture("POST", "/other/path", Some(500))); // Wrong path
1157        assert!(!filter.should_capture("POST", "/api/users", Some(200))); // Not an error
1158    }
1159
1160    // ==================== Default Value Function Tests ====================
1161
1162    #[test]
1163    fn test_default_uuid_replacement() {
1164        let replacement = default_uuid_replacement();
1165        assert!(replacement.contains("{{counter:012}}"));
1166    }
1167
1168    #[test]
1169    fn test_default_email_replacement() {
1170        let replacement = default_email_replacement();
1171        assert_eq!(replacement, "user@example.com");
1172    }
1173
1174    #[test]
1175    fn test_default_ip_replacement() {
1176        let replacement = default_ip_replacement();
1177        assert_eq!(replacement, "127.0.0.1");
1178    }
1179
1180    #[test]
1181    fn test_default_creditcard_replacement() {
1182        let replacement = default_creditcard_replacement();
1183        assert_eq!(replacement, "XXXX-XXXX-XXXX-XXXX");
1184    }
1185
1186    #[test]
1187    fn test_default_sample_rate() {
1188        let rate = default_sample_rate();
1189        assert_eq!(rate, 1.0);
1190    }
1191}