1use std::sync::LazyLock;
12
13use regex::Regex;
14
15struct ScrubPattern {
17 regex: Regex,
18 replacement: &'static str,
19 is_player_name: bool,
24}
25
26#[derive(Debug, Clone, PartialEq, Eq, Default)]
44pub struct ScrubOptions {
45 pub keep_player_names: bool,
53}
54
55static SCRUB_PATTERNS: LazyLock<Vec<ScrubPattern>> = LazyLock::new(|| {
73 let definitions: &[(&str, &str, bool)] = &[
80 (r"Token:\s*\S+", "Token: <redacted>", false),
83 (r"\bBearer\s+\S+", "Bearer <redacted>", false),
90 (r"Match to [A-Z0-9_]+:", "Match to <redacted>:", false),
94 (
96 r#""[Cc]lient[Ii]d"\s*:\s*"[^"]+""#,
97 r#""clientId": "<redacted>""#,
98 false,
99 ),
100 (
102 r#""[Uu]ser[Ii]d"\s*:\s*"[^"]+""#,
103 r#""userId": "<redacted>""#,
104 false,
105 ),
106 (r"[A-Z]:\\Users\\[^\\]+\\", r"<user-path>\", false),
108 (r"/Users/[^/]+/", "<user-path>/", false),
110 (r"/home/[^/]+/", "<user-path>/", false),
112 (
115 r#""[Tt]oken"\s*:\s*"[^"]+""#,
116 r#""token": "<redacted>""#,
117 false,
118 ),
119 (
121 r#""[Ss]ession[Ii]d"\s*:\s*"[^"]+""#,
122 r#""sessionId": "<redacted>""#,
123 false,
124 ),
125 (
129 r#""[Ss]creen[Nn]ame"\s*:\s*"[^"]+""#,
130 r#""screenName": "<redacted>""#,
131 true,
132 ),
133 (
138 r#""[Pp]layer[Nn]ame"\s*:\s*"[^"]+""#,
139 r#""playerName": "<redacted>""#,
140 true,
141 ),
142 (r"(?m)^\s+Renderer:\s+.+", " Renderer: <redacted>", false),
146 (r"(?m)^\s+Vendor:\s+.+", " Vendor: <redacted>", false),
148 (r"(?m)^\s+VRAM:\s+.+", " VRAM: <redacted>", false),
150 (r"(?m)^\s+Driver:\s+.+", " Driver: <redacted>", false),
152 (
155 r"[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}",
156 "<email-redacted>",
157 false,
158 ),
159 (
176 concat!(
177 r"::(?:[0-9a-fA-F]{1,4}(?::[0-9a-fA-F]{1,4})*)?",
178 r"|\b[0-9a-fA-F]{1,4}(?::[0-9a-fA-F]{1,4})*::[0-9a-fA-F]{0,4}(?::[0-9a-fA-F]{1,4})*",
179 r"|\b(?:[0-9a-fA-F]{1,4}:){3,7}[0-9a-fA-F]{1,4}\b",
180 ),
181 "<ip-redacted>",
182 false,
183 ),
184 (
196 r"\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b",
197 "<ip-redacted>",
198 false,
199 ),
200 ];
201
202 definitions
203 .iter()
204 .filter_map(|(pattern, replacement, is_player_name)| {
205 match Regex::new(pattern) {
209 Ok(regex) => Some(ScrubPattern {
210 regex,
211 replacement,
212 is_player_name: *is_player_name,
213 }),
214 Err(e) => {
215 ::log::error!("BUG: failed to compile privacy pattern {pattern:?}: {e}");
216 None
217 }
218 }
219 })
220 .collect()
221});
222
223pub fn scrub_raw_log(input: &str) -> String {
242 scrub_raw_log_with(input, &ScrubOptions::default())
243}
244
245pub fn scrub_raw_log_with(input: &str, opts: &ScrubOptions) -> String {
263 if input.is_empty() {
264 return String::new();
265 }
266
267 let mut result = input.to_owned();
268 for pattern in SCRUB_PATTERNS.iter() {
269 if opts.keep_player_names && pattern.is_player_name {
270 continue;
271 }
272 result = pattern
273 .regex
274 .replace_all(&result, pattern.replacement)
275 .into_owned();
276 }
277 result
278}
279
280#[cfg(test)]
285mod tests {
286 use super::*;
287
288 #[test]
291 fn test_scrub_raw_log_empty_input_returns_empty() {
292 assert_eq!(scrub_raw_log(""), "");
293 }
294
295 #[test]
296 fn test_scrub_raw_log_single_line_no_sensitive_data_unchanged() {
297 let input = "[UnityCrossThreadLogger] Game started";
298 assert_eq!(scrub_raw_log(input), input);
299 }
300
301 #[test]
302 fn test_scrub_raw_log_multiline_no_sensitive_data_unchanged() {
303 let input = "Line 1\nLine 2\nLine 3\n";
304 assert_eq!(scrub_raw_log(input), input);
305 }
306
307 #[test]
310 fn test_scrub_raw_log_token_value_redacted() {
311 let input =
312 "Token: eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.signature";
313 let result = scrub_raw_log(input);
314 assert_eq!(result, "Token: <redacted>");
315 }
316
317 #[test]
318 fn test_scrub_raw_log_token_no_space_after_colon_redacted() {
319 let input = "Token:abc123def456";
320 let result = scrub_raw_log(input);
321 assert_eq!(result, "Token: <redacted>");
322 }
323
324 #[test]
325 fn test_scrub_raw_log_token_with_surrounding_text() {
326 let input = "[Auth] Login response Token: eyJhbGciOiJSUzI1NiJ9.payload.sig -- done";
327 let result = scrub_raw_log(input);
328 assert_eq!(result, "[Auth] Login response Token: <redacted> -- done");
329 }
330
331 #[test]
332 fn test_scrub_raw_log_multiple_tokens_on_separate_lines() {
333 let input = "Token: first_token\nSome other line\nToken: second_token\n";
334 let result = scrub_raw_log(input);
335 assert!(result.contains("Token: <redacted>"));
336 assert!(!result.contains("first_token"));
337 assert!(!result.contains("second_token"));
338 }
339
340 #[test]
343 fn test_scrub_raw_log_bearer_token_redacted() {
344 let input = "Authorization: Bearer eyJhbGciOiJSUzI1NiJ9.payload.signature";
345 let result = scrub_raw_log(input);
346 assert_eq!(result, "Authorization: Bearer <redacted>");
347 }
348
349 #[test]
350 fn test_scrub_raw_log_bearer_with_extra_whitespace() {
351 let input = "Bearer some_token_value";
352 let result = scrub_raw_log(input);
353 assert_eq!(result, "Bearer <redacted>");
354 }
355
356 #[test]
357 fn test_scrub_raw_log_bearer_false_positive_standard_bearer_not_redacted() {
358 let input = r#""Title_StandardBearer""#;
359 assert_eq!(scrub_raw_log(input), input);
360 }
361
362 #[test]
363 fn test_scrub_raw_log_bearer_jwt_still_redacted() {
364 let input = "Authorization: Bearer eyJhbGciOiJIUzI1NiJ9.payload.signature";
365 let result = scrub_raw_log(input);
366 assert_eq!(result, "Authorization: Bearer <redacted>");
367 assert!(!result.contains("eyJhbGciOiJIUzI1NiJ9"));
368 }
369
370 #[test]
373 fn test_scrub_raw_log_windows_path_redacted() {
374 let input =
375 r"Loading from C:\Users\JohnDoe\AppData\LocalLow\Wizards Of The Coast\MTGA\Player.log";
376 let result = scrub_raw_log(input);
377 assert!(result.contains(r"<user-path>\AppData\LocalLow"));
378 assert!(!result.contains("JohnDoe"));
379 }
380
381 #[test]
382 fn test_scrub_raw_log_windows_path_different_drive_letter() {
383 let input = r"D:\Users\Alice\Documents\game.log";
384 let result = scrub_raw_log(input);
385 assert!(result.contains(r"<user-path>\Documents"));
386 assert!(!result.contains("Alice"));
387 }
388
389 #[test]
392 fn test_scrub_raw_log_macos_path_redacted() {
393 let input = "/Users/johndoe/Library/Logs/com.wizards.mtga/Player.log";
394 let result = scrub_raw_log(input);
395 assert!(result.contains("<user-path>/Library/Logs"));
396 assert!(!result.contains("johndoe"));
397 }
398
399 #[test]
400 fn test_scrub_raw_log_macos_path_with_spaces_in_context() {
401 let input = "Reading file at /Users/jane_doe/Library/Logs/app.log successfully";
402 let result = scrub_raw_log(input);
403 assert!(result.contains("<user-path>/Library/Logs"));
404 assert!(!result.contains("jane_doe"));
405 }
406
407 #[test]
410 fn test_scrub_raw_log_linux_path_redacted() {
411 let input = "/home/gamer/.local/share/Steam/steamapps/common/MTGA/Player.log";
412 let result = scrub_raw_log(input);
413 assert!(result.contains("<user-path>/.local/share"));
414 assert!(!result.contains("gamer"));
415 }
416
417 #[test]
418 fn test_scrub_raw_log_linux_path_different_username() {
419 let input = "Config at /home/mtg_player/.config/manasight/settings.toml";
420 let result = scrub_raw_log(input);
421 assert!(result.contains("<user-path>/.config/manasight"));
422 assert!(!result.contains("mtg_player"));
423 }
424
425 #[test]
428 fn test_scrub_raw_log_json_token_value_redacted() {
429 let input = r#"{"screenName": "Player#1", "token": "abc123secret"}"#;
430 let result = scrub_raw_log(input);
431 assert!(result.contains(r#""token": "<redacted>""#));
432 assert!(!result.contains("abc123secret"));
433 }
434
435 #[test]
436 fn test_scrub_raw_log_json_token_uppercase_key_redacted() {
437 let input = r#"{"Token": "eyJhbGci.payload.sig"}"#;
438 let result = scrub_raw_log(input);
439 assert!(result.contains(r#""token": "<redacted>""#));
440 assert!(!result.contains("eyJhbGci"));
441 }
442
443 #[test]
444 fn test_scrub_raw_log_json_session_id_redacted() {
445 let input = r#"{"sessionId": "sess_abc123def456", "status": "connected"}"#;
446 let result = scrub_raw_log(input);
447 assert!(result.contains(r#""sessionId": "<redacted>""#));
448 assert!(!result.contains("sess_abc123def456"));
449 }
450
451 #[test]
452 fn test_scrub_raw_log_authenticate_response_block() {
453 let input = "[UnityCrossThreadLogger]authenticateResponse\n\
454 {\"screenName\": \"TestPlayer#12345\", \"token\": \"secret_jwt_value\"}";
455 let result = scrub_raw_log(input);
456 assert!(!result.contains("secret_jwt_value"));
457 assert!(result.contains(r#""token": "<redacted>""#));
458 assert!(!result.contains("TestPlayer#12345"));
459 assert!(result.contains(r#""screenName": "<redacted>""#));
460 }
461
462 #[test]
463 fn test_scrub_raw_log_session_id_with_spaces_in_json() {
464 let input = r#"{ "SessionId" : "long-session-id-value-here" }"#;
465 let result = scrub_raw_log(input);
466 assert!(result.contains(r#""sessionId": "<redacted>""#));
467 assert!(!result.contains("long-session-id-value-here"));
468 }
469
470 #[test]
473 fn test_scrub_raw_log_match_to_account_id_redacted() {
474 let input = "Match to CR4QJUQPDBCVVMGCGNZLWGDFJE: AuthenticateResponse";
475 let result = scrub_raw_log(input);
476 assert_eq!(result, "Match to <redacted>: AuthenticateResponse");
477 assert!(!result.contains("CR4QJUQPDBCVVMGCGNZLWGDFJE"));
478 }
479
480 #[test]
481 fn test_scrub_raw_log_match_to_with_underscore_in_id() {
482 let input = "Match to SOME_ACCOUNT_ID_123: MatchCreated";
483 let result = scrub_raw_log(input);
484 assert_eq!(result, "Match to <redacted>: MatchCreated");
485 assert!(!result.contains("SOME_ACCOUNT_ID_123"));
486 }
487
488 #[test]
489 fn test_scrub_raw_log_match_to_with_log_timestamp_prefix() {
490 let input = "[UnityCrossThreadLogger]3/22/2026 12:00:31 PM: Match to CR4QJUQPDBCVVMGCGNZLWGDFJE: AuthenticateResponse";
491 let result = scrub_raw_log(input);
492 assert!(result.contains("Match to <redacted>:"));
493 assert!(!result.contains("CR4QJUQPDBCVVMGCGNZLWGDFJE"));
494 }
495
496 #[test]
499 fn test_scrub_raw_log_json_client_id_redacted() {
500 let input = r#""clientId": "CR4QJUQPDBCVVMGCGNZLWGDFJE""#;
501 let result = scrub_raw_log(input);
502 assert_eq!(result, r#""clientId": "<redacted>""#);
503 assert!(!result.contains("CR4QJUQPDBCVVMGCGNZLWGDFJE"));
504 }
505
506 #[test]
507 fn test_scrub_raw_log_json_client_id_with_spaces() {
508 let input = r#"{ "ClientId" : "ABCDEF123456" }"#;
509 let result = scrub_raw_log(input);
510 assert!(result.contains(r#""clientId": "<redacted>""#));
511 assert!(!result.contains("ABCDEF123456"));
512 }
513
514 #[test]
517 fn test_scrub_raw_log_json_user_id_redacted() {
518 let input = r#""userId": "CR4QJUQPDBCVVMGCGNZLWGDFJE""#;
519 let result = scrub_raw_log(input);
520 assert_eq!(result, r#""userId": "<redacted>""#);
521 assert!(!result.contains("CR4QJUQPDBCVVMGCGNZLWGDFJE"));
522 }
523
524 #[test]
525 fn test_scrub_raw_log_json_user_id_uppercase_key() {
526 let input = r#"{"UserId": "OPPONENT_ACCOUNT_ID_XYZ"}"#;
527 let result = scrub_raw_log(input);
528 assert!(result.contains(r#""userId": "<redacted>""#));
529 assert!(!result.contains("OPPONENT_ACCOUNT_ID_XYZ"));
530 }
531
532 #[test]
533 fn test_scrub_raw_log_json_user_id_in_match_event() {
534 let input = r#"{"players": [{"userId": "PLAYER_ABC"}, {"userId": "OPPONENT_XYZ"}]}"#;
535 let result = scrub_raw_log(input);
536 assert!(!result.contains("PLAYER_ABC"));
537 assert!(!result.contains("OPPONENT_XYZ"));
538 assert_eq!(result.matches(r#""userId": "<redacted>""#).count(), 2);
539 }
540
541 #[test]
544 fn test_scrub_raw_log_screen_name_redacted() {
545 let input = r#""screenName": "PlayerDisplayName#12345""#;
546 let result = scrub_raw_log(input);
547 assert_eq!(result, r#""screenName": "<redacted>""#);
548 assert!(!result.contains("PlayerDisplayName"));
549 }
550
551 #[test]
552 fn test_scrub_raw_log_screen_name_uppercase_key() {
553 let input = r#"{"ScreenName": "SomePlayer#99999"}"#;
554 let result = scrub_raw_log(input);
555 assert!(result.contains(r#""screenName": "<redacted>""#));
556 assert!(!result.contains("SomePlayer"));
557 }
558
559 #[test]
560 fn test_scrub_raw_log_screen_name_no_space_after_colon() {
561 let input = r#""screenName":"Truffie#12345""#;
562 let result = scrub_raw_log(input);
563 assert!(result.contains(r#""screenName": "<redacted>""#));
564 assert!(!result.contains("Truffie"));
565 }
566
567 #[test]
570 fn test_scrub_raw_log_player_name_redacted() {
571 let input = r#""playerName": "OpponentName#67890""#;
572 let result = scrub_raw_log(input);
573 assert_eq!(result, r#""playerName": "<redacted>""#);
574 assert!(!result.contains("OpponentName"));
575 }
576
577 #[test]
578 fn test_scrub_raw_log_player_name_both_players_redacted() {
579 let input =
580 r#"{"players": [{"playerName": "LocalPlayer#111"}, {"playerName": "Opponent#222"}]}"#;
581 let result = scrub_raw_log(input);
582 assert!(!result.contains("LocalPlayer"));
583 assert!(!result.contains("Opponent"));
584 assert_eq!(result.matches(r#""playerName": "<redacted>""#).count(), 2);
585 }
586
587 #[test]
588 fn test_scrub_raw_log_player_name_uppercase_key() {
589 let input = r#"{"PlayerName": "SomeUser#42"}"#;
590 let result = scrub_raw_log(input);
591 assert!(result.contains(r#""playerName": "<redacted>""#));
592 assert!(!result.contains("SomeUser"));
593 }
594
595 #[test]
598 fn test_scrub_raw_log_hardware_fingerprint_all_lines_redacted() {
599 let input =
600 " Renderer: NVIDIA GeForce RTX 3080\n Vendor: NVIDIA\n VRAM: 10240\n Driver: 537.58";
601 let result = scrub_raw_log(input);
602 assert!(!result.contains("NVIDIA GeForce RTX 3080"));
603 assert!(!result.contains("NVIDIA"));
604 assert!(!result.contains("10240"));
605 assert!(!result.contains("537.58"));
606 assert!(result.contains("Renderer: <redacted>"));
607 assert!(result.contains("Vendor: <redacted>"));
608 assert!(result.contains("VRAM: <redacted>"));
609 assert!(result.contains("Driver: <redacted>"));
610 }
611
612 #[test]
613 fn test_scrub_raw_log_hardware_fingerprint_in_full_log_header() {
614 let input = "\
622[UnityCrossThreadLogger] Version: 1.2.3.4
623 SystemInfo:
624 Renderer: AMD Radeon RX 6800 XT
625 Vendor: AMD
626 VRAM: 16384
627 Driver: 23.12.1
628[UnityCrossThreadLogger] Game starting";
629 let result = scrub_raw_log(input);
630 assert!(!result.contains("AMD Radeon RX 6800 XT"));
631 assert!(!result.contains("16384"));
632 assert!(!result.contains("23.12.1"));
633 assert!(!result.contains("1.2.3.4"));
636 assert!(result.contains("Version: <ip-redacted>"));
637 assert!(result.contains("Game starting"));
638 }
639
640 #[test]
641 fn test_scrub_raw_log_hardware_renderer_not_matched_without_leading_whitespace() {
642 let input = "Renderer: some game object reference";
643 assert_eq!(scrub_raw_log(input), input);
644 }
645
646 #[test]
647 fn test_scrub_raw_log_hardware_vendor_not_matched_without_leading_whitespace() {
648 let input = "Vendor: some vendor string in game data";
649 assert_eq!(scrub_raw_log(input), input);
650 }
651
652 #[test]
655 fn test_scrub_raw_log_mixed_sensitive_data_all_redacted() {
656 let input = "\
657[Auth] Token: eyJhbGciOiJSUzI1NiJ9.payload.sig
658[HTTP] Authorization: Bearer eyToken123.payload.sig
659[Init] Loading config from C:\\Users\\JaneDoe\\AppData\\Local\\manasight\\config.toml
660[Init] Log path: /Users/johndoe/Library/Logs/manasight.log
661[Init] Linux path: /home/linuxuser/.local/share/manasight/data.db
662[Game] Match started: event=PlayQueue";
663
664 let result = scrub_raw_log(input);
665
666 assert!(!result.contains("eyJhbGciOiJSUzI1NiJ9"));
667 assert!(!result.contains("eyToken123"));
668 assert!(!result.contains("JaneDoe"));
669 assert!(!result.contains("johndoe"));
670 assert!(!result.contains("linuxuser"));
671
672 assert!(result.contains("Token: <redacted>"));
673 assert!(result.contains("Bearer <redacted>"));
674 assert!(result.contains(r"<user-path>\AppData"));
675 assert!(result.contains("<user-path>/Library/Logs"));
676 assert!(result.contains("<user-path>/.local/share"));
677
678 assert!(result.contains("[Game] Match started: event=PlayQueue"));
679 }
680
681 #[test]
684 fn test_scrub_raw_log_preserves_line_endings() {
685 let input = "Line 1\r\nToken: secret_value\r\nLine 3\r\n";
686 let result = scrub_raw_log(input);
687 assert!(result.contains("\r\n"));
688 assert!(result.contains("Token: <redacted>"));
689 }
690
691 #[test]
692 fn test_scrub_raw_log_large_input_does_not_panic() {
693 let line = "Normal log line without sensitive data\n";
694 let large_input: String = line.repeat(25_000);
695 let result = scrub_raw_log(&large_input);
696 assert_eq!(result.len(), large_input.len());
697 }
698
699 #[test]
700 fn test_scrub_raw_log_token_at_end_of_line_no_trailing_space() {
701 let input = "Token: abc123";
702 let result = scrub_raw_log(input);
703 assert_eq!(result, "Token: <redacted>");
704 }
705
706 #[test]
707 fn test_scrub_raw_log_bearer_at_end_of_line_no_trailing_space() {
708 let input = "Bearer abc123";
709 let result = scrub_raw_log(input);
710 assert_eq!(result, "Bearer <redacted>");
711 }
712
713 #[test]
714 fn test_scrub_raw_log_path_only_line() {
715 let input = r"C:\Users\SomeUser\";
716 let result = scrub_raw_log(input);
717 assert_eq!(result, r"<user-path>\");
718 }
719
720 #[test]
721 fn test_scrub_raw_log_multiple_paths_on_same_line() {
722 let input = "Copied /Users/alice/source.txt to /Users/bob/dest.txt";
723 let result = scrub_raw_log(input);
724 assert!(!result.contains("alice"));
725 assert!(!result.contains("bob"));
726 assert_eq!(
727 result,
728 "Copied <user-path>/source.txt to <user-path>/dest.txt"
729 );
730 }
731
732 #[test]
733 fn test_scrub_raw_log_idempotent() {
734 let input = "Token: secret123\n/home/user/.config/app.toml";
735 let first_pass = scrub_raw_log(input);
736 let second_pass = scrub_raw_log(&first_pass);
737 assert_eq!(first_pass, second_pass, "Scrubbing should be idempotent");
738 }
739
740 #[test]
743 fn test_scrub_raw_log_lowercase_token_not_redacted() {
744 let input = "token: not_a_real_token";
745 assert_eq!(scrub_raw_log(input), input);
746 }
747
748 #[test]
749 fn test_scrub_raw_log_lowercase_bearer_not_redacted() {
750 let input = "bearer not_a_real_token";
751 assert_eq!(scrub_raw_log(input), input);
752 }
753
754 #[test]
755 fn test_scrub_raw_log_non_user_paths_not_redacted() {
756 let input = "/usr/local/bin/mtga\n/etc/config.toml\n/var/log/syslog";
757 assert_eq!(scrub_raw_log(input), input);
758 }
759
760 #[test]
763 fn test_scrub_raw_log_with_keep_player_names_false_redacts_names() {
764 let opts = ScrubOptions {
765 keep_player_names: false,
766 };
767 let input = r#""screenName": "Alice#123", "playerName": "Bob#456""#;
768 let result = scrub_raw_log_with(input, &opts);
769 assert!(!result.contains("Alice"));
770 assert!(!result.contains("Bob"));
771 assert!(result.contains(r#""screenName": "<redacted>""#));
772 assert!(result.contains(r#""playerName": "<redacted>""#));
773 }
774
775 #[test]
776 fn test_scrub_raw_log_with_keep_player_names_true_preserves_names() {
777 let opts = ScrubOptions {
778 keep_player_names: true,
779 };
780 let input = r#""screenName": "Alice#123", "playerName": "Bob#456""#;
781 let result = scrub_raw_log_with(input, &opts);
782 assert!(result.contains("Alice#123"));
783 assert!(result.contains("Bob#456"));
784 }
785
786 #[test]
787 fn test_scrub_raw_log_with_keep_player_names_true_still_redacts_tokens() {
788 let opts = ScrubOptions {
789 keep_player_names: true,
790 };
791 let input = r#"Token: secret123 and "screenName": "Alice#123""#;
792 let result = scrub_raw_log_with(input, &opts);
793 assert!(result.contains("Token: <redacted>"));
794 assert!(!result.contains("secret123"));
795 assert!(result.contains("Alice#123"));
796 }
797
798 #[test]
799 fn test_scrub_raw_log_with_keep_player_names_true_still_redacts_session_ids() {
800 let opts = ScrubOptions {
801 keep_player_names: true,
802 };
803 let input = r#"{"sessionId": "sess_xyz789", "screenName": "Alice#123"}"#;
804 let result = scrub_raw_log_with(input, &opts);
805 assert!(result.contains(r#""sessionId": "<redacted>""#));
806 assert!(!result.contains("sess_xyz789"));
807 assert!(result.contains("Alice#123"));
808 }
809
810 #[test]
811 fn test_scrub_raw_log_with_keep_player_names_true_still_redacts_paths() {
812 let opts = ScrubOptions {
813 keep_player_names: true,
814 };
815 let input = r#""playerName": "Alice#123" at /home/alice/.config/app"#;
816 let result = scrub_raw_log_with(input, &opts);
817 assert!(result.contains("Alice#123"));
818 assert!(!result.contains("/home/alice/"));
819 assert!(result.contains("<user-path>/"));
820 }
821
822 #[test]
823 fn test_scrub_raw_log_with_keep_player_names_true_still_redacts_client_id() {
824 let opts = ScrubOptions {
825 keep_player_names: true,
826 };
827 let input = r#"{"clientId": "CR4QJUQP", "screenName": "Alice#123"}"#;
828 let result = scrub_raw_log_with(input, &opts);
829 assert!(result.contains(r#""clientId": "<redacted>""#));
830 assert!(!result.contains("CR4QJUQP"));
831 assert!(result.contains("Alice#123"));
832 }
833
834 #[test]
835 fn test_scrub_raw_log_with_keep_player_names_true_still_redacts_hardware_fingerprints() {
836 let opts = ScrubOptions {
837 keep_player_names: true,
838 };
839 let input = "\"playerName\": \"Alice#123\"\n Renderer: NVIDIA GeForce RTX 3080";
840 let result = scrub_raw_log_with(input, &opts);
841 assert!(result.contains("Alice#123"));
842 assert!(!result.contains("NVIDIA GeForce RTX 3080"));
843 assert!(result.contains("Renderer: <redacted>"));
844 }
845
846 #[test]
847 fn test_scrub_raw_log_with_default_opts_equals_scrub_raw_log() {
848 let inputs = [
851 r#""screenName": "Alice#123", Token: secret"#,
852 "Token: abc Bearer tok123",
853 r#"{"sessionId": "s1", "playerName": "Bob#99"}"#,
854 "[UnityCrossThreadLogger] Game started",
855 "",
856 ];
857 for input in &inputs {
858 assert_eq!(
859 scrub_raw_log(input),
860 scrub_raw_log_with(input, &ScrubOptions::default()),
861 "scrub_raw_log and scrub_raw_log_with(default) differ for input: {input:?}"
862 );
863 }
864 }
865
866 #[test]
869 fn test_scrub_raw_log_email_address_redacted() {
870 let input = "Contact: user@example.com for support";
871 let result = scrub_raw_log(input);
872 assert!(!result.contains("user@example.com"));
873 assert!(result.contains("<email-redacted>"));
874 }
875
876 #[test]
877 fn test_scrub_raw_log_email_in_json_value_redacted() {
878 let input = r#"{"email": "player.one+mtga@arena.wizards.com"}"#;
879 let result = scrub_raw_log(input);
880 assert!(!result.contains("player.one+mtga@arena.wizards.com"));
881 assert!(result.contains("<email-redacted>"));
882 }
883
884 #[test]
885 fn test_scrub_raw_log_multiple_emails_on_same_line_redacted() {
886 let input = "From: alice@example.com To: bob@example.org";
887 let result = scrub_raw_log(input);
888 assert!(!result.contains("alice@example.com"));
889 assert!(!result.contains("bob@example.org"));
890 assert_eq!(result.matches("<email-redacted>").count(), 2);
891 }
892
893 #[test]
896 fn test_scrub_raw_log_ipv4_address_redacted() {
897 let input = "Server address: 192.168.1.100 port 443";
898 let result = scrub_raw_log(input);
899 assert!(!result.contains("192.168.1.100"));
900 assert!(result.contains("<ip-redacted>"));
901 }
902
903 #[test]
904 fn test_scrub_raw_log_ipv4_loopback_redacted() {
905 let input = "Connecting to 127.0.0.1:8080";
906 let result = scrub_raw_log(input);
907 assert!(!result.contains("127.0.0.1"));
908 assert!(result.contains("<ip-redacted>"));
909 }
910
911 #[test]
912 fn test_scrub_raw_log_ipv4_public_address_redacted() {
913 let input = "WotC endpoint: 52.23.1.200";
914 let result = scrub_raw_log(input);
915 assert!(!result.contains("52.23.1.200"));
916 assert!(result.contains("<ip-redacted>"));
917 }
918
919 #[test]
920 fn test_scrub_raw_log_version_string_redacted_as_ipv4_deliberate_tradeoff() {
921 let input = "Version: 1.2.3.4";
927 let result = scrub_raw_log(input);
928 assert!(!result.contains("1.2.3.4"));
929 assert!(result.contains("<ip-redacted>"));
930 }
931
932 #[test]
935 fn test_scrub_raw_log_ipv6_loopback_redacted() {
936 let input = "Listening on ::1 port 3000";
937 let result = scrub_raw_log(input);
938 assert!(!result.contains("::1"));
939 assert!(result.contains("<ip-redacted>"));
940 }
941
942 #[test]
943 fn test_scrub_raw_log_ipv6_link_local_redacted() {
944 let input = "Interface address: fe80::1%eth0";
945 let result = scrub_raw_log(input);
946 assert!(!result.contains("fe80::1"));
947 assert!(result.contains("<ip-redacted>"));
948 }
949
950 #[test]
951 fn test_scrub_raw_log_ipv6_full_address_redacted() {
952 let input = "IPv6: 2001:0db8:85a3:0000:0000:8a2e:0370:7334";
953 let result = scrub_raw_log(input);
954 assert!(!result.contains("2001:0db8:85a3:0000:0000:8a2e:0370:7334"));
955 assert!(result.contains("<ip-redacted>"));
956 }
957
958 #[test]
959 fn test_scrub_raw_log_ipv6_compressed_redacted() {
960 let input = "Remote: 2001:db8::1";
961 let result = scrub_raw_log(input);
962 assert!(!result.contains("2001:db8::1"));
963 assert!(result.contains("<ip-redacted>"));
964 }
965
966 #[test]
976 fn test_corpus_scrub_no_pii_survives() {
977 let Ok(dir) = std::env::var("SCRUBBER_CORPUS_DIR") else {
978 return;
979 };
980 let corpus_dir = std::path::PathBuf::from(dir);
981
982 let pii_patterns: Vec<(&str, Regex)> = vec![
983 (
984 "screenName",
985 Regex::new(r#""[Ss]creen[Nn]ame"\s*:\s*"([^"]+)""#)
986 .unwrap_or_else(|_| unreachable!()),
987 ),
988 (
989 "playerName",
990 Regex::new(r#""[Pp]layer[Nn]ame"\s*:\s*"([^"]+)""#)
991 .unwrap_or_else(|_| unreachable!()),
992 ),
993 (
994 "Renderer",
995 Regex::new(r"(?m)^\s+Renderer:\s+(.+)").unwrap_or_else(|_| unreachable!()),
996 ),
997 (
998 "Vendor",
999 Regex::new(r"(?m)^\s+Vendor:\s+(.+)").unwrap_or_else(|_| unreachable!()),
1000 ),
1001 (
1002 "VRAM",
1003 Regex::new(r"(?m)^\s+VRAM:\s+(.+)").unwrap_or_else(|_| unreachable!()),
1004 ),
1005 (
1006 "Driver",
1007 Regex::new(r"(?m)^\s+Driver:\s+(.+)").unwrap_or_else(|_| unreachable!()),
1008 ),
1009 ];
1010
1011 let mut total_before = 0u32;
1012 let mut failures: Vec<String> = Vec::new();
1013
1014 let entries: Vec<_> = std::fs::read_dir(&corpus_dir)
1015 .unwrap_or_else(|_| unreachable!())
1016 .filter_map(Result::ok)
1017 .filter(|e| e.path().extension().is_some_and(|ext| ext == "log"))
1018 .collect();
1019
1020 for entry in &entries {
1021 let path = entry.path();
1022 let filename = path
1023 .file_name()
1024 .unwrap_or_else(|| unreachable!())
1025 .to_string_lossy();
1026 let Ok(raw) = std::fs::read_to_string(&path) else {
1027 continue;
1028 };
1029
1030 let scrubbed = scrub_raw_log(&raw);
1031
1032 for (name, re) in &pii_patterns {
1033 let before = u32::try_from(re.find_iter(&raw).count()).unwrap_or(u32::MAX);
1034 total_before += before;
1035
1036 let leaked: Vec<String> = re
1037 .captures_iter(&scrubbed)
1038 .filter_map(|cap| {
1039 let val = cap.get(1).map_or("", |m| m.as_str());
1040 if val == "<redacted>" {
1041 None
1042 } else {
1043 Some(val.to_owned())
1044 }
1045 })
1046 .collect();
1047
1048 for val in &leaked {
1049 failures.push(format!("{filename}: {name} leaked: {val:?}"));
1050 }
1051 }
1052 }
1053
1054 assert!(
1055 total_before > 0,
1056 "corpus should contain at least one PII match to be a meaningful test"
1057 );
1058 assert!(
1059 failures.is_empty(),
1060 "PII survived scrubbing in {} location(s) (of {total_before} raw matches):\n{}",
1061 failures.len(),
1062 failures.join("\n")
1063 );
1064 }
1065}