1use crate::{RecordedRequest, RecordedResponse, RecorderError, Result};
32use chrono::{DateTime, Utc};
33use once_cell::sync::Lazy;
34use regex::Regex;
35use serde::{Deserialize, Serialize};
36use std::collections::HashMap;
37use std::sync::Arc;
38use tracing::{debug, warn};
39
40static GLOBAL_SCRUBBER: Lazy<Arc<Scrubber>> = Lazy::new(|| {
42 Arc::new(Scrubber::from_env().unwrap_or_else(|e| {
43 warn!("Failed to load scrubber from environment: {}", e);
44 Scrubber::default()
45 }))
46});
47
48static GLOBAL_FILTER: Lazy<Arc<CaptureFilter>> = Lazy::new(|| {
50 Arc::new(CaptureFilter::from_env().unwrap_or_else(|e| {
51 warn!("Failed to load capture filter from environment: {}", e);
52 CaptureFilter::default()
53 }))
54});
55
56#[derive(Debug, Clone, Serialize, Deserialize, Default)]
58pub struct ScrubConfig {
59 #[serde(default)]
61 pub rules: Vec<ScrubRule>,
62
63 #[serde(default)]
65 pub deterministic: bool,
66
67 #[serde(default)]
69 pub counter_seed: u64,
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize)]
74#[serde(tag = "type", rename_all = "lowercase")]
75pub enum ScrubRule {
76 Regex {
78 pattern: String,
80 replacement: String,
82 #[serde(default = "default_target")]
84 target: ScrubTarget,
85 },
86
87 Field {
89 field: String,
91 replacement: String,
93 #[serde(default = "default_target")]
95 target: ScrubTarget,
96 },
97
98 Header {
100 name: String,
102 replacement: String,
104 },
105
106 Uuid {
108 #[serde(default = "default_uuid_replacement")]
110 replacement: String,
111 },
112
113 Email {
115 #[serde(default = "default_email_replacement")]
117 replacement: String,
118 },
119
120 IpAddress {
122 #[serde(default = "default_ip_replacement")]
124 replacement: String,
125 },
126
127 CreditCard {
129 #[serde(default = "default_creditcard_replacement")]
131 replacement: String,
132 },
133}
134
135#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
137#[serde(rename_all = "lowercase")]
138pub enum ScrubTarget {
139 Headers,
141 Body,
143 All,
145}
146
147fn default_target() -> ScrubTarget {
148 ScrubTarget::All
149}
150
151fn default_uuid_replacement() -> String {
152 "00000000-0000-0000-0000-{{counter:012}}".to_string()
153}
154
155fn default_email_replacement() -> String {
156 "user@example.com".to_string()
157}
158
159fn default_ip_replacement() -> String {
160 "127.0.0.1".to_string()
161}
162
163fn default_creditcard_replacement() -> String {
164 "XXXX-XXXX-XXXX-XXXX".to_string()
165}
166
167#[derive(Debug, Clone, Serialize, Deserialize)]
169pub struct CaptureFilterConfig {
170 #[serde(default)]
172 pub status_codes: Vec<u16>,
173
174 #[serde(default)]
176 pub path_patterns: Vec<String>,
177
178 #[serde(default)]
180 pub methods: Vec<String>,
181
182 #[serde(default)]
184 pub exclude_paths: Vec<String>,
185
186 #[serde(default)]
188 pub errors_only: bool,
189
190 #[serde(default = "default_sample_rate")]
192 pub sample_rate: f64,
193}
194
195fn default_sample_rate() -> f64 {
196 1.0
197}
198
199impl Default for CaptureFilterConfig {
200 fn default() -> Self {
201 Self {
202 status_codes: Vec::new(),
203 path_patterns: Vec::new(),
204 methods: Vec::new(),
205 exclude_paths: Vec::new(),
206 errors_only: false,
207 sample_rate: default_sample_rate(),
208 }
209 }
210}
211
212pub struct Scrubber {
214 config: ScrubConfig,
215 compiled_regexes: Vec<(Regex, String, ScrubTarget)>,
216 deterministic_counter: std::sync::atomic::AtomicU64,
217}
218
219impl Default for Scrubber {
220 fn default() -> Self {
221 Self {
222 config: ScrubConfig::default(),
223 compiled_regexes: Vec::new(),
224 deterministic_counter: std::sync::atomic::AtomicU64::new(0),
225 }
226 }
227}
228
229impl Scrubber {
230 pub fn new(config: ScrubConfig) -> Result<Self> {
232 let mut compiled_regexes = Vec::new();
233
234 for rule in &config.rules {
236 if let ScrubRule::Regex {
237 pattern,
238 replacement,
239 target,
240 } = rule
241 {
242 let regex = Regex::new(pattern).map_err(|e| {
243 RecorderError::InvalidFilter(format!(
244 "Invalid regex pattern '{}': {}",
245 pattern, e
246 ))
247 })?;
248 compiled_regexes.push((regex, replacement.clone(), *target));
249 }
250 }
251
252 Ok(Self {
253 deterministic_counter: std::sync::atomic::AtomicU64::new(config.counter_seed),
254 config,
255 compiled_regexes,
256 })
257 }
258
259 pub fn from_env() -> Result<Self> {
261 let scrub_json = std::env::var("MOCKFORGE_CAPTURE_SCRUB").ok();
262 let deterministic = std::env::var("MOCKFORGE_CAPTURE_DETERMINISTIC")
263 .ok()
264 .and_then(|v| v.parse::<bool>().ok())
265 .unwrap_or(false);
266
267 let mut config = if let Some(json) = scrub_json {
268 serde_json::from_str::<ScrubConfig>(&json).map_err(|e| {
269 RecorderError::InvalidFilter(format!("Invalid MOCKFORGE_CAPTURE_SCRUB JSON: {}", e))
270 })?
271 } else {
272 ScrubConfig::default()
273 };
274
275 config.deterministic = deterministic;
276
277 Self::new(config)
278 }
279
280 pub fn global() -> Arc<Self> {
282 Arc::clone(&GLOBAL_SCRUBBER)
283 }
284
285 pub fn scrub_request(&self, request: &mut RecordedRequest) {
287 if let Ok(mut headers) = serde_json::from_str::<HashMap<String, String>>(&request.headers) {
289 self.scrub_headers(&mut headers);
290 if let Ok(json) = serde_json::to_string(&headers) {
291 request.headers = json;
292 }
293 }
294
295 if let Some(ref mut body) = request.body {
297 if request.body_encoding == "utf8" {
298 *body = self.scrub_string(body, ScrubTarget::Body);
299 }
300 }
301
302 if let Some(ref mut query) = request.query_params {
304 *query = self.scrub_string(query, ScrubTarget::Body);
305 }
306
307 if self.config.deterministic {
309 request.timestamp = Self::normalize_timestamp(request.timestamp);
310 }
311
312 if let Some(ref mut trace_id) = request.trace_id {
314 *trace_id = self.scrub_string(trace_id, ScrubTarget::All);
315 }
316 if let Some(ref mut span_id) = request.span_id {
317 *span_id = self.scrub_string(span_id, ScrubTarget::All);
318 }
319 if let Some(ref mut client_ip) = request.client_ip {
320 *client_ip = self.scrub_string(client_ip, ScrubTarget::All);
321 }
322 }
323
324 pub fn scrub_response(&self, response: &mut RecordedResponse) {
326 if let Ok(mut headers) = serde_json::from_str::<HashMap<String, String>>(&response.headers)
328 {
329 self.scrub_headers(&mut headers);
330 if let Ok(json) = serde_json::to_string(&headers) {
331 response.headers = json;
332 }
333 }
334
335 if let Some(ref mut body) = response.body {
337 if response.body_encoding == "utf8" {
338 *body = self.scrub_string(body, ScrubTarget::Body);
339 }
340 }
341
342 if self.config.deterministic {
344 response.timestamp = Self::normalize_timestamp(response.timestamp);
345 }
346 }
347
348 fn scrub_headers(&self, headers: &mut HashMap<String, String>) {
350 for rule in &self.config.rules {
351 if let ScrubRule::Header { name, replacement } = rule {
352 let key = headers.keys().find(|k| k.eq_ignore_ascii_case(name)).cloned();
354 if let Some(key) = key {
355 headers.insert(key, replacement.clone());
356 }
357 }
358 }
359
360 for (key, value) in headers.iter_mut() {
362 *value = self.scrub_string(value, ScrubTarget::Headers);
363
364 for rule in &self.config.rules {
366 if let ScrubRule::Field {
367 field,
368 replacement,
369 target,
370 } = rule
371 {
372 if *target == ScrubTarget::Headers || *target == ScrubTarget::All {
373 if key.eq_ignore_ascii_case(field) {
374 *value = replacement.clone();
375 }
376 }
377 }
378 }
379 }
380 }
381
382 fn scrub_string(&self, input: &str, location: ScrubTarget) -> String {
384 let mut result = input.to_string();
385
386 for rule in &self.config.rules {
388 match rule {
389 ScrubRule::Uuid { replacement } => {
390 if location == ScrubTarget::All || location == ScrubTarget::Body {
391 result = self.scrub_uuids(&result, replacement);
392 }
393 }
394 ScrubRule::Email { replacement } => {
395 if location == ScrubTarget::All || location == ScrubTarget::Body {
396 result = self.scrub_emails(&result, replacement);
397 }
398 }
399 ScrubRule::IpAddress { replacement } => {
400 if location == ScrubTarget::All || location == ScrubTarget::Body {
401 result = self.scrub_ips(&result, replacement);
402 }
403 }
404 ScrubRule::CreditCard { replacement } => {
405 if location == ScrubTarget::All || location == ScrubTarget::Body {
406 result = self.scrub_credit_cards(&result, replacement);
407 }
408 }
409 ScrubRule::Field {
410 field,
411 replacement,
412 target,
413 } => {
414 if *target == location || *target == ScrubTarget::All {
415 result = self.scrub_json_field(&result, field, replacement);
416 }
417 }
418 _ => {}
419 }
420 }
421
422 for (regex, replacement, target) in &self.compiled_regexes {
424 if *target == location || *target == ScrubTarget::All {
425 result = regex.replace_all(&result, replacement.as_str()).to_string();
426 }
427 }
428
429 result
430 }
431
432 fn scrub_uuids(&self, input: &str, replacement: &str) -> String {
434 let uuid_pattern =
435 Regex::new(r"(?i)[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}")
436 .unwrap();
437
438 uuid_pattern
439 .replace_all(input, |_: ®ex::Captures| {
440 let counter =
441 self.deterministic_counter.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
442 replacement
443 .replace("{{counter}}", &counter.to_string())
444 .replace("{{counter:012}}", &format!("{:012}", counter))
445 })
446 .to_string()
447 }
448
449 fn scrub_emails(&self, input: &str, replacement: &str) -> String {
451 let email_pattern =
452 Regex::new(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b").unwrap();
453 email_pattern.replace_all(input, replacement).to_string()
454 }
455
456 fn scrub_ips(&self, input: &str, replacement: &str) -> String {
458 let ipv4_pattern = Regex::new(r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b").unwrap();
459 ipv4_pattern.replace_all(input, replacement).to_string()
460 }
461
462 fn scrub_credit_cards(&self, input: &str, replacement: &str) -> String {
464 let cc_pattern = Regex::new(r"\b(?:\d{4}[-\s]?){3}\d{4}\b").unwrap();
465 cc_pattern.replace_all(input, replacement).to_string()
466 }
467
468 fn scrub_json_field(&self, input: &str, field_path: &str, replacement: &str) -> String {
470 if let Ok(mut json) = serde_json::from_str::<serde_json::Value>(input) {
472 if self.scrub_json_value(&mut json, field_path, replacement) {
473 if let Ok(result) = serde_json::to_string(&json) {
474 return result;
475 }
476 }
477 }
478 input.to_string()
479 }
480
481 fn scrub_json_value(
483 &self,
484 value: &mut serde_json::Value,
485 field_path: &str,
486 replacement: &str,
487 ) -> bool {
488 let parts: Vec<&str> = field_path.split('.').collect();
489 if parts.is_empty() {
490 return false;
491 }
492
493 if parts.len() == 1 {
494 if let Some(obj) = value.as_object_mut() {
496 if obj.contains_key(parts[0]) {
497 obj.insert(
498 parts[0].to_string(),
499 serde_json::Value::String(replacement.to_string()),
500 );
501 return true;
502 }
503 }
504 } else {
505 if let Some(obj) = value.as_object_mut() {
507 if let Some(child) = obj.get_mut(parts[0]) {
508 let remaining = parts[1..].join(".");
509 return self.scrub_json_value(child, &remaining, replacement);
510 }
511 }
512 }
513
514 false
515 }
516
517 fn normalize_timestamp(timestamp: DateTime<Utc>) -> DateTime<Utc> {
519 timestamp.date_naive().and_hms_opt(0, 0, 0).unwrap().and_utc()
521 }
522}
523
524pub struct CaptureFilter {
526 config: CaptureFilterConfig,
527 path_patterns: Vec<Regex>,
528 exclude_patterns: Vec<Regex>,
529}
530
531impl Default for CaptureFilter {
532 fn default() -> Self {
533 Self {
534 config: CaptureFilterConfig::default(),
535 path_patterns: Vec::new(),
536 exclude_patterns: Vec::new(),
537 }
538 }
539}
540
541impl CaptureFilter {
542 pub fn new(config: CaptureFilterConfig) -> Result<Self> {
544 let mut path_patterns = Vec::new();
545 for pattern in &config.path_patterns {
546 let regex = Regex::new(pattern).map_err(|e| {
547 RecorderError::InvalidFilter(format!("Invalid path pattern '{}': {}", pattern, e))
548 })?;
549 path_patterns.push(regex);
550 }
551
552 let mut exclude_patterns = Vec::new();
553 for pattern in &config.exclude_paths {
554 let regex = Regex::new(pattern).map_err(|e| {
555 RecorderError::InvalidFilter(format!(
556 "Invalid exclude pattern '{}': {}",
557 pattern, e
558 ))
559 })?;
560 exclude_patterns.push(regex);
561 }
562
563 Ok(Self {
564 config,
565 path_patterns,
566 exclude_patterns,
567 })
568 }
569
570 pub fn from_env() -> Result<Self> {
572 let filter_json = std::env::var("MOCKFORGE_CAPTURE_FILTER").ok();
573
574 let config = if let Some(json) = filter_json {
575 serde_json::from_str::<CaptureFilterConfig>(&json).map_err(|e| {
576 RecorderError::InvalidFilter(format!(
577 "Invalid MOCKFORGE_CAPTURE_FILTER JSON: {}",
578 e
579 ))
580 })?
581 } else {
582 CaptureFilterConfig::default()
583 };
584
585 Self::new(config)
586 }
587
588 pub fn global() -> Arc<Self> {
590 Arc::clone(&GLOBAL_FILTER)
591 }
592
593 pub fn should_capture(&self, method: &str, path: &str, status_code: Option<u16>) -> bool {
595 debug!(
596 "should_capture called: method={}, path={}, status_code={:?}",
597 method, path, status_code
598 );
599 debug!(" errors_only={}, status_codes={:?}, path_patterns count={}, exclude_patterns count={}",
600 self.config.errors_only, self.config.status_codes, self.path_patterns.len(), self.exclude_patterns.len());
601
602 if self.config.sample_rate < 1.0 {
604 use std::collections::hash_map::DefaultHasher;
605 use std::hash::{Hash, Hasher};
606
607 let mut hasher = DefaultHasher::new();
608 path.hash(&mut hasher);
609 let hash = hasher.finish();
610 let sample = (hash % 1000) as f64 / 1000.0;
611
612 if sample > self.config.sample_rate {
613 debug!(
614 "Skipping capture due to sample rate: {} > {}",
615 sample, self.config.sample_rate
616 );
617 return false;
618 }
619 }
620
621 if self.config.errors_only {
623 if let Some(code) = status_code {
624 if code < 400 {
625 debug!("Skipping capture: not an error (status {})", code);
626 return false;
627 }
628 } else {
629 debug!("errors_only is set but no status code provided, allowing for now");
632 }
633 }
634
635 if !self.config.status_codes.is_empty() {
637 if let Some(code) = status_code {
638 if !self.config.status_codes.contains(&code) {
639 debug!("Skipping capture: status code {} not in filter", code);
640 return false;
641 }
642 } else {
643 debug!("status_codes filter set but no status code provided, allowing for now");
645 }
646 }
647
648 if !self.config.methods.is_empty() {
650 if !self.config.methods.iter().any(|m| m.eq_ignore_ascii_case(method)) {
651 debug!("Skipping capture: method {} not in filter", method);
652 return false;
653 }
654 }
655
656 for pattern in &self.exclude_patterns {
658 if pattern.is_match(path) {
659 debug!("Skipping capture: path {} matches exclude pattern", path);
660 return false;
661 }
662 }
663
664 if !self.path_patterns.is_empty() {
666 let matches = self.path_patterns.iter().any(|p| p.is_match(path));
667 if !matches {
668 debug!("Skipping capture: path {} does not match any pattern", path);
669 return false;
670 }
671 }
672
673 true
674 }
675}
676
677#[cfg(test)]
678mod tests {
679 use super::*;
680
681 #[test]
682 fn test_scrub_email() {
683 let config = ScrubConfig {
684 rules: vec![ScrubRule::Email {
685 replacement: "user@example.com".to_string(),
686 }],
687 deterministic: false,
688 counter_seed: 0,
689 };
690
691 let scrubber = Scrubber::new(config).unwrap();
692 let input = r#"{"email": "john.doe@company.com", "name": "John"}"#;
693 let result = scrubber.scrub_string(input, ScrubTarget::All);
694
695 assert!(result.contains("user@example.com"));
696 assert!(!result.contains("john.doe@company.com"));
697 }
698
699 #[test]
700 fn test_scrub_uuid() {
701 let config = ScrubConfig {
702 rules: vec![ScrubRule::Uuid {
703 replacement: "00000000-0000-0000-0000-{{counter:012}}".to_string(),
704 }],
705 deterministic: false,
706 counter_seed: 0,
707 };
708
709 let scrubber = Scrubber::new(config).unwrap();
710 let input = "Request ID: 123e4567-e89b-12d3-a456-426614174000";
711 let result = scrubber.scrub_string(input, ScrubTarget::All);
712
713 assert!(result.contains("00000000-0000-0000-0000-000000000000"));
714 assert!(!result.contains("123e4567-e89b-12d3-a456-426614174000"));
715 }
716
717 #[test]
718 fn test_scrub_json_field() {
719 let config = ScrubConfig {
720 rules: vec![ScrubRule::Field {
721 field: "user.email".to_string(),
722 replacement: "redacted@example.com".to_string(),
723 target: ScrubTarget::All,
724 }],
725 deterministic: false,
726 counter_seed: 0,
727 };
728
729 let scrubber = Scrubber::new(config).unwrap();
730 let input = r#"{"user": {"email": "secret@company.com", "name": "John"}}"#;
731 let result = scrubber.scrub_string(input, ScrubTarget::Body);
732
733 assert!(result.contains("redacted@example.com"));
734 assert!(!result.contains("secret@company.com"));
735 }
736
737 #[test]
738 fn test_capture_filter_status_code() {
739 let config = CaptureFilterConfig {
740 status_codes: vec![500, 502, 503],
741 ..Default::default()
742 };
743
744 let filter = CaptureFilter::new(config).unwrap();
745
746 assert!(filter.should_capture("GET", "/api/test", Some(500)));
747 assert!(filter.should_capture("POST", "/api/test", Some(502)));
748 assert!(!filter.should_capture("GET", "/api/test", Some(200)));
749 assert!(!filter.should_capture("GET", "/api/test", Some(404)));
750 }
751
752 #[test]
753 fn test_capture_filter_errors_only() {
754 let config = CaptureFilterConfig {
755 errors_only: true,
756 ..Default::default()
757 };
758
759 let filter = CaptureFilter::new(config).unwrap();
760
761 assert!(filter.should_capture("GET", "/api/test", Some(400)));
762 assert!(filter.should_capture("GET", "/api/test", Some(500)));
763 assert!(!filter.should_capture("GET", "/api/test", Some(200)));
764 assert!(!filter.should_capture("GET", "/api/test", Some(304)));
765 }
766
767 #[test]
768 fn test_capture_filter_path_pattern() {
769 let config = CaptureFilterConfig {
770 path_patterns: vec![r"^/api/v1/.*".to_string()],
771 ..Default::default()
772 };
773
774 let filter = CaptureFilter::new(config).unwrap();
775
776 assert!(filter.should_capture("GET", "/api/v1/users", None));
777 assert!(filter.should_capture("POST", "/api/v1/orders", None));
778 assert!(!filter.should_capture("GET", "/api/v2/users", None));
779 assert!(!filter.should_capture("GET", "/health", None));
780 }
781
782 #[test]
783 fn test_capture_filter_exclude() {
784 let config = CaptureFilterConfig {
785 exclude_paths: vec![r"/health".to_string(), r"/metrics".to_string()],
786 ..Default::default()
787 };
788
789 let filter = CaptureFilter::new(config).unwrap();
790
791 assert!(filter.should_capture("GET", "/api/users", None));
792 assert!(!filter.should_capture("GET", "/health", None));
793 assert!(!filter.should_capture("GET", "/metrics", None));
794 }
795}