1use std::sync::LazyLock;
12
13use regex::Regex;
14
15struct ScrubPattern {
17 regex: Regex,
18 replacement: &'static str,
19}
20
21static SCRUB_PATTERNS: LazyLock<Vec<ScrubPattern>> = LazyLock::new(|| {
36 let definitions: &[(&str, &str)] = &[
40 (r"Token:\s*\S+", "Token: <redacted>"),
43 (r"\bBearer\s+\S+", "Bearer <redacted>"),
50 (r"Match to [A-Z0-9_]+:", "Match to <redacted>:"),
54 (
56 r#""[Cc]lient[Ii]d"\s*:\s*"[^"]+""#,
57 r#""clientId": "<redacted>""#,
58 ),
59 (
61 r#""[Uu]ser[Ii]d"\s*:\s*"[^"]+""#,
62 r#""userId": "<redacted>""#,
63 ),
64 (r"[A-Z]:\\Users\\[^\\]+\\", r"<user-path>\"),
66 (r"/Users/[^/]+/", "<user-path>/"),
68 (r"/home/[^/]+/", "<user-path>/"),
70 (r#""[Tt]oken"\s*:\s*"[^"]+""#, r#""token": "<redacted>""#),
73 (
75 r#""[Ss]ession[Ii]d"\s*:\s*"[^"]+""#,
76 r#""sessionId": "<redacted>""#,
77 ),
78 (
80 r#""[Ss]creen[Nn]ame"\s*:\s*"[^"]+""#,
81 r#""screenName": "<redacted>""#,
82 ),
83 (
87 r#""[Pp]layer[Nn]ame"\s*:\s*"[^"]+""#,
88 r#""playerName": "<redacted>""#,
89 ),
90 (r"(?m)^\s+Renderer:\s+.+", " Renderer: <redacted>"),
94 (r"(?m)^\s+Vendor:\s+.+", " Vendor: <redacted>"),
96 (r"(?m)^\s+VRAM:\s+.+", " VRAM: <redacted>"),
98 (r"(?m)^\s+Driver:\s+.+", " Driver: <redacted>"),
100 ];
101
102 definitions
103 .iter()
104 .filter_map(|(pattern, replacement)| {
105 match Regex::new(pattern) {
109 Ok(regex) => Some(ScrubPattern { regex, replacement }),
110 Err(e) => {
111 ::log::error!("BUG: failed to compile privacy pattern {pattern:?}: {e}");
112 None
113 }
114 }
115 })
116 .collect()
117});
118
119pub fn scrub_raw_log(input: &str) -> String {
136 if input.is_empty() {
137 return String::new();
138 }
139
140 let mut result = input.to_owned();
141 for pattern in SCRUB_PATTERNS.iter() {
142 result = pattern
143 .regex
144 .replace_all(&result, pattern.replacement)
145 .into_owned();
146 }
147 result
148}
149
150#[cfg(test)]
155mod tests {
156 use super::*;
157
158 #[test]
161 fn test_scrub_raw_log_empty_input_returns_empty() {
162 assert_eq!(scrub_raw_log(""), "");
163 }
164
165 #[test]
166 fn test_scrub_raw_log_single_line_no_sensitive_data_unchanged() {
167 let input = "[UnityCrossThreadLogger] Game started";
168 assert_eq!(scrub_raw_log(input), input);
169 }
170
171 #[test]
172 fn test_scrub_raw_log_multiline_no_sensitive_data_unchanged() {
173 let input = "Line 1\nLine 2\nLine 3\n";
174 assert_eq!(scrub_raw_log(input), input);
175 }
176
177 #[test]
180 fn test_scrub_raw_log_token_value_redacted() {
181 let input =
182 "Token: eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.signature";
183 let result = scrub_raw_log(input);
184 assert_eq!(result, "Token: <redacted>");
185 }
186
187 #[test]
188 fn test_scrub_raw_log_token_no_space_after_colon_redacted() {
189 let input = "Token:abc123def456";
190 let result = scrub_raw_log(input);
191 assert_eq!(result, "Token: <redacted>");
192 }
193
194 #[test]
195 fn test_scrub_raw_log_token_with_surrounding_text() {
196 let input = "[Auth] Login response Token: eyJhbGciOiJSUzI1NiJ9.payload.sig -- done";
197 let result = scrub_raw_log(input);
198 assert_eq!(result, "[Auth] Login response Token: <redacted> -- done");
199 }
200
201 #[test]
202 fn test_scrub_raw_log_multiple_tokens_on_separate_lines() {
203 let input = "Token: first_token\nSome other line\nToken: second_token\n";
204 let result = scrub_raw_log(input);
205 assert!(result.contains("Token: <redacted>"));
206 assert!(!result.contains("first_token"));
207 assert!(!result.contains("second_token"));
208 }
209
210 #[test]
213 fn test_scrub_raw_log_bearer_token_redacted() {
214 let input = "Authorization: Bearer eyJhbGciOiJSUzI1NiJ9.payload.signature";
215 let result = scrub_raw_log(input);
216 assert_eq!(result, "Authorization: Bearer <redacted>");
217 }
218
219 #[test]
220 fn test_scrub_raw_log_bearer_with_extra_whitespace() {
221 let input = "Bearer some_token_value";
222 let result = scrub_raw_log(input);
223 assert_eq!(result, "Bearer <redacted>");
224 }
225
226 #[test]
227 fn test_scrub_raw_log_bearer_false_positive_standard_bearer_not_redacted() {
228 let input = r#""Title_StandardBearer""#;
229 assert_eq!(scrub_raw_log(input), input);
230 }
231
232 #[test]
233 fn test_scrub_raw_log_bearer_jwt_still_redacted() {
234 let input = "Authorization: Bearer eyJhbGciOiJIUzI1NiJ9.payload.signature";
235 let result = scrub_raw_log(input);
236 assert_eq!(result, "Authorization: Bearer <redacted>");
237 assert!(!result.contains("eyJhbGciOiJIUzI1NiJ9"));
238 }
239
240 #[test]
243 fn test_scrub_raw_log_windows_path_redacted() {
244 let input =
245 r"Loading from C:\Users\JohnDoe\AppData\LocalLow\Wizards Of The Coast\MTGA\Player.log";
246 let result = scrub_raw_log(input);
247 assert!(result.contains(r"<user-path>\AppData\LocalLow"));
248 assert!(!result.contains("JohnDoe"));
249 }
250
251 #[test]
252 fn test_scrub_raw_log_windows_path_different_drive_letter() {
253 let input = r"D:\Users\Alice\Documents\game.log";
254 let result = scrub_raw_log(input);
255 assert!(result.contains(r"<user-path>\Documents"));
256 assert!(!result.contains("Alice"));
257 }
258
259 #[test]
262 fn test_scrub_raw_log_macos_path_redacted() {
263 let input = "/Users/johndoe/Library/Logs/com.wizards.mtga/Player.log";
264 let result = scrub_raw_log(input);
265 assert!(result.contains("<user-path>/Library/Logs"));
266 assert!(!result.contains("johndoe"));
267 }
268
269 #[test]
270 fn test_scrub_raw_log_macos_path_with_spaces_in_context() {
271 let input = "Reading file at /Users/jane_doe/Library/Logs/app.log successfully";
272 let result = scrub_raw_log(input);
273 assert!(result.contains("<user-path>/Library/Logs"));
274 assert!(!result.contains("jane_doe"));
275 }
276
277 #[test]
280 fn test_scrub_raw_log_linux_path_redacted() {
281 let input = "/home/gamer/.local/share/Steam/steamapps/common/MTGA/Player.log";
282 let result = scrub_raw_log(input);
283 assert!(result.contains("<user-path>/.local/share"));
284 assert!(!result.contains("gamer"));
285 }
286
287 #[test]
288 fn test_scrub_raw_log_linux_path_different_username() {
289 let input = "Config at /home/mtg_player/.config/manasight/settings.toml";
290 let result = scrub_raw_log(input);
291 assert!(result.contains("<user-path>/.config/manasight"));
292 assert!(!result.contains("mtg_player"));
293 }
294
295 #[test]
298 fn test_scrub_raw_log_json_token_value_redacted() {
299 let input = r#"{"screenName": "Player#1", "token": "abc123secret"}"#;
300 let result = scrub_raw_log(input);
301 assert!(result.contains(r#""token": "<redacted>""#));
302 assert!(!result.contains("abc123secret"));
303 }
304
305 #[test]
306 fn test_scrub_raw_log_json_token_uppercase_key_redacted() {
307 let input = r#"{"Token": "eyJhbGci.payload.sig"}"#;
308 let result = scrub_raw_log(input);
309 assert!(result.contains(r#""token": "<redacted>""#));
310 assert!(!result.contains("eyJhbGci"));
311 }
312
313 #[test]
314 fn test_scrub_raw_log_json_session_id_redacted() {
315 let input = r#"{"sessionId": "sess_abc123def456", "status": "connected"}"#;
316 let result = scrub_raw_log(input);
317 assert!(result.contains(r#""sessionId": "<redacted>""#));
318 assert!(!result.contains("sess_abc123def456"));
319 }
320
321 #[test]
322 fn test_scrub_raw_log_authenticate_response_block() {
323 let input = "[UnityCrossThreadLogger]authenticateResponse\n\
324 {\"screenName\": \"TestPlayer#12345\", \"token\": \"secret_jwt_value\"}";
325 let result = scrub_raw_log(input);
326 assert!(!result.contains("secret_jwt_value"));
327 assert!(result.contains(r#""token": "<redacted>""#));
328 assert!(!result.contains("TestPlayer#12345"));
329 assert!(result.contains(r#""screenName": "<redacted>""#));
330 }
331
332 #[test]
333 fn test_scrub_raw_log_session_id_with_spaces_in_json() {
334 let input = r#"{ "SessionId" : "long-session-id-value-here" }"#;
335 let result = scrub_raw_log(input);
336 assert!(result.contains(r#""sessionId": "<redacted>""#));
337 assert!(!result.contains("long-session-id-value-here"));
338 }
339
340 #[test]
343 fn test_scrub_raw_log_match_to_account_id_redacted() {
344 let input = "Match to CR4QJUQPDBCVVMGCGNZLWGDFJE: AuthenticateResponse";
345 let result = scrub_raw_log(input);
346 assert_eq!(result, "Match to <redacted>: AuthenticateResponse");
347 assert!(!result.contains("CR4QJUQPDBCVVMGCGNZLWGDFJE"));
348 }
349
350 #[test]
351 fn test_scrub_raw_log_match_to_with_underscore_in_id() {
352 let input = "Match to SOME_ACCOUNT_ID_123: MatchCreated";
353 let result = scrub_raw_log(input);
354 assert_eq!(result, "Match to <redacted>: MatchCreated");
355 assert!(!result.contains("SOME_ACCOUNT_ID_123"));
356 }
357
358 #[test]
359 fn test_scrub_raw_log_match_to_with_log_timestamp_prefix() {
360 let input = "[UnityCrossThreadLogger]3/22/2026 12:00:31 PM: Match to CR4QJUQPDBCVVMGCGNZLWGDFJE: AuthenticateResponse";
361 let result = scrub_raw_log(input);
362 assert!(result.contains("Match to <redacted>:"));
363 assert!(!result.contains("CR4QJUQPDBCVVMGCGNZLWGDFJE"));
364 }
365
366 #[test]
369 fn test_scrub_raw_log_json_client_id_redacted() {
370 let input = r#""clientId": "CR4QJUQPDBCVVMGCGNZLWGDFJE""#;
371 let result = scrub_raw_log(input);
372 assert_eq!(result, r#""clientId": "<redacted>""#);
373 assert!(!result.contains("CR4QJUQPDBCVVMGCGNZLWGDFJE"));
374 }
375
376 #[test]
377 fn test_scrub_raw_log_json_client_id_with_spaces() {
378 let input = r#"{ "ClientId" : "ABCDEF123456" }"#;
379 let result = scrub_raw_log(input);
380 assert!(result.contains(r#""clientId": "<redacted>""#));
381 assert!(!result.contains("ABCDEF123456"));
382 }
383
384 #[test]
387 fn test_scrub_raw_log_json_user_id_redacted() {
388 let input = r#""userId": "CR4QJUQPDBCVVMGCGNZLWGDFJE""#;
389 let result = scrub_raw_log(input);
390 assert_eq!(result, r#""userId": "<redacted>""#);
391 assert!(!result.contains("CR4QJUQPDBCVVMGCGNZLWGDFJE"));
392 }
393
394 #[test]
395 fn test_scrub_raw_log_json_user_id_uppercase_key() {
396 let input = r#"{"UserId": "OPPONENT_ACCOUNT_ID_XYZ"}"#;
397 let result = scrub_raw_log(input);
398 assert!(result.contains(r#""userId": "<redacted>""#));
399 assert!(!result.contains("OPPONENT_ACCOUNT_ID_XYZ"));
400 }
401
402 #[test]
403 fn test_scrub_raw_log_json_user_id_in_match_event() {
404 let input = r#"{"players": [{"userId": "PLAYER_ABC"}, {"userId": "OPPONENT_XYZ"}]}"#;
405 let result = scrub_raw_log(input);
406 assert!(!result.contains("PLAYER_ABC"));
407 assert!(!result.contains("OPPONENT_XYZ"));
408 assert_eq!(result.matches(r#""userId": "<redacted>""#).count(), 2);
409 }
410
411 #[test]
414 fn test_scrub_raw_log_screen_name_redacted() {
415 let input = r#""screenName": "PlayerDisplayName#12345""#;
416 let result = scrub_raw_log(input);
417 assert_eq!(result, r#""screenName": "<redacted>""#);
418 assert!(!result.contains("PlayerDisplayName"));
419 }
420
421 #[test]
422 fn test_scrub_raw_log_screen_name_uppercase_key() {
423 let input = r#"{"ScreenName": "SomePlayer#99999"}"#;
424 let result = scrub_raw_log(input);
425 assert!(result.contains(r#""screenName": "<redacted>""#));
426 assert!(!result.contains("SomePlayer"));
427 }
428
429 #[test]
430 fn test_scrub_raw_log_screen_name_no_space_after_colon() {
431 let input = r#""screenName":"Truffie#12345""#;
432 let result = scrub_raw_log(input);
433 assert!(result.contains(r#""screenName": "<redacted>""#));
434 assert!(!result.contains("Truffie"));
435 }
436
437 #[test]
440 fn test_scrub_raw_log_player_name_redacted() {
441 let input = r#""playerName": "OpponentName#67890""#;
442 let result = scrub_raw_log(input);
443 assert_eq!(result, r#""playerName": "<redacted>""#);
444 assert!(!result.contains("OpponentName"));
445 }
446
447 #[test]
448 fn test_scrub_raw_log_player_name_both_players_redacted() {
449 let input =
450 r#"{"players": [{"playerName": "LocalPlayer#111"}, {"playerName": "Opponent#222"}]}"#;
451 let result = scrub_raw_log(input);
452 assert!(!result.contains("LocalPlayer"));
453 assert!(!result.contains("Opponent"));
454 assert_eq!(result.matches(r#""playerName": "<redacted>""#).count(), 2);
455 }
456
457 #[test]
458 fn test_scrub_raw_log_player_name_uppercase_key() {
459 let input = r#"{"PlayerName": "SomeUser#42"}"#;
460 let result = scrub_raw_log(input);
461 assert!(result.contains(r#""playerName": "<redacted>""#));
462 assert!(!result.contains("SomeUser"));
463 }
464
465 #[test]
468 fn test_scrub_raw_log_hardware_fingerprint_all_lines_redacted() {
469 let input =
470 " Renderer: NVIDIA GeForce RTX 3080\n Vendor: NVIDIA\n VRAM: 10240\n Driver: 537.58";
471 let result = scrub_raw_log(input);
472 assert!(!result.contains("NVIDIA GeForce RTX 3080"));
473 assert!(!result.contains("NVIDIA"));
474 assert!(!result.contains("10240"));
475 assert!(!result.contains("537.58"));
476 assert!(result.contains("Renderer: <redacted>"));
477 assert!(result.contains("Vendor: <redacted>"));
478 assert!(result.contains("VRAM: <redacted>"));
479 assert!(result.contains("Driver: <redacted>"));
480 }
481
482 #[test]
483 fn test_scrub_raw_log_hardware_fingerprint_in_full_log_header() {
484 let input = "\
485[UnityCrossThreadLogger] Version: 1.2.3.4
486 SystemInfo:
487 Renderer: AMD Radeon RX 6800 XT
488 Vendor: AMD
489 VRAM: 16384
490 Driver: 23.12.1
491[UnityCrossThreadLogger] Game starting";
492 let result = scrub_raw_log(input);
493 assert!(!result.contains("AMD Radeon RX 6800 XT"));
494 assert!(!result.contains("16384"));
495 assert!(!result.contains("23.12.1"));
496 assert!(result.contains("Version: 1.2.3.4"));
497 assert!(result.contains("Game starting"));
498 }
499
500 #[test]
501 fn test_scrub_raw_log_hardware_renderer_not_matched_without_leading_whitespace() {
502 let input = "Renderer: some game object reference";
503 assert_eq!(scrub_raw_log(input), input);
504 }
505
506 #[test]
507 fn test_scrub_raw_log_hardware_vendor_not_matched_without_leading_whitespace() {
508 let input = "Vendor: some vendor string in game data";
509 assert_eq!(scrub_raw_log(input), input);
510 }
511
512 #[test]
515 fn test_scrub_raw_log_mixed_sensitive_data_all_redacted() {
516 let input = "\
517[Auth] Token: eyJhbGciOiJSUzI1NiJ9.payload.sig
518[HTTP] Authorization: Bearer eyToken123.payload.sig
519[Init] Loading config from C:\\Users\\JaneDoe\\AppData\\Local\\manasight\\config.toml
520[Init] Log path: /Users/johndoe/Library/Logs/manasight.log
521[Init] Linux path: /home/linuxuser/.local/share/manasight/data.db
522[Game] Match started: event=PlayQueue";
523
524 let result = scrub_raw_log(input);
525
526 assert!(!result.contains("eyJhbGciOiJSUzI1NiJ9"));
527 assert!(!result.contains("eyToken123"));
528 assert!(!result.contains("JaneDoe"));
529 assert!(!result.contains("johndoe"));
530 assert!(!result.contains("linuxuser"));
531
532 assert!(result.contains("Token: <redacted>"));
533 assert!(result.contains("Bearer <redacted>"));
534 assert!(result.contains(r"<user-path>\AppData"));
535 assert!(result.contains("<user-path>/Library/Logs"));
536 assert!(result.contains("<user-path>/.local/share"));
537
538 assert!(result.contains("[Game] Match started: event=PlayQueue"));
539 }
540
541 #[test]
544 fn test_scrub_raw_log_preserves_line_endings() {
545 let input = "Line 1\r\nToken: secret_value\r\nLine 3\r\n";
546 let result = scrub_raw_log(input);
547 assert!(result.contains("\r\n"));
548 assert!(result.contains("Token: <redacted>"));
549 }
550
551 #[test]
552 fn test_scrub_raw_log_large_input_does_not_panic() {
553 let line = "Normal log line without sensitive data\n";
554 let large_input: String = line.repeat(25_000);
555 let result = scrub_raw_log(&large_input);
556 assert_eq!(result.len(), large_input.len());
557 }
558
559 #[test]
560 fn test_scrub_raw_log_token_at_end_of_line_no_trailing_space() {
561 let input = "Token: abc123";
562 let result = scrub_raw_log(input);
563 assert_eq!(result, "Token: <redacted>");
564 }
565
566 #[test]
567 fn test_scrub_raw_log_bearer_at_end_of_line_no_trailing_space() {
568 let input = "Bearer abc123";
569 let result = scrub_raw_log(input);
570 assert_eq!(result, "Bearer <redacted>");
571 }
572
573 #[test]
574 fn test_scrub_raw_log_path_only_line() {
575 let input = r"C:\Users\SomeUser\";
576 let result = scrub_raw_log(input);
577 assert_eq!(result, r"<user-path>\");
578 }
579
580 #[test]
581 fn test_scrub_raw_log_multiple_paths_on_same_line() {
582 let input = "Copied /Users/alice/source.txt to /Users/bob/dest.txt";
583 let result = scrub_raw_log(input);
584 assert!(!result.contains("alice"));
585 assert!(!result.contains("bob"));
586 assert_eq!(
587 result,
588 "Copied <user-path>/source.txt to <user-path>/dest.txt"
589 );
590 }
591
592 #[test]
593 fn test_scrub_raw_log_idempotent() {
594 let input = "Token: secret123\n/home/user/.config/app.toml";
595 let first_pass = scrub_raw_log(input);
596 let second_pass = scrub_raw_log(&first_pass);
597 assert_eq!(first_pass, second_pass, "Scrubbing should be idempotent");
598 }
599
600 #[test]
603 fn test_scrub_raw_log_lowercase_token_not_redacted() {
604 let input = "token: not_a_real_token";
605 assert_eq!(scrub_raw_log(input), input);
606 }
607
608 #[test]
609 fn test_scrub_raw_log_lowercase_bearer_not_redacted() {
610 let input = "bearer not_a_real_token";
611 assert_eq!(scrub_raw_log(input), input);
612 }
613
614 #[test]
615 fn test_scrub_raw_log_non_user_paths_not_redacted() {
616 let input = "/usr/local/bin/mtga\n/etc/config.toml\n/var/log/syslog";
617 assert_eq!(scrub_raw_log(input), input);
618 }
619
620 #[test]
630 fn test_corpus_scrub_no_pii_survives() {
631 let Ok(dir) = std::env::var("SCRUBBER_CORPUS_DIR") else {
632 return;
633 };
634 let corpus_dir = std::path::PathBuf::from(dir);
635
636 let pii_patterns: Vec<(&str, Regex)> = vec![
637 (
638 "screenName",
639 Regex::new(r#""[Ss]creen[Nn]ame"\s*:\s*"([^"]+)""#)
640 .unwrap_or_else(|_| unreachable!()),
641 ),
642 (
643 "playerName",
644 Regex::new(r#""[Pp]layer[Nn]ame"\s*:\s*"([^"]+)""#)
645 .unwrap_or_else(|_| unreachable!()),
646 ),
647 (
648 "Renderer",
649 Regex::new(r"(?m)^\s+Renderer:\s+(.+)").unwrap_or_else(|_| unreachable!()),
650 ),
651 (
652 "Vendor",
653 Regex::new(r"(?m)^\s+Vendor:\s+(.+)").unwrap_or_else(|_| unreachable!()),
654 ),
655 (
656 "VRAM",
657 Regex::new(r"(?m)^\s+VRAM:\s+(.+)").unwrap_or_else(|_| unreachable!()),
658 ),
659 (
660 "Driver",
661 Regex::new(r"(?m)^\s+Driver:\s+(.+)").unwrap_or_else(|_| unreachable!()),
662 ),
663 ];
664
665 let mut total_before = 0u32;
666 let mut failures: Vec<String> = Vec::new();
667
668 let entries: Vec<_> = std::fs::read_dir(&corpus_dir)
669 .unwrap_or_else(|_| unreachable!())
670 .filter_map(Result::ok)
671 .filter(|e| e.path().extension().is_some_and(|ext| ext == "log"))
672 .collect();
673
674 for entry in &entries {
675 let path = entry.path();
676 let filename = path
677 .file_name()
678 .unwrap_or_else(|| unreachable!())
679 .to_string_lossy();
680 let Ok(raw) = std::fs::read_to_string(&path) else {
681 continue;
682 };
683
684 let scrubbed = scrub_raw_log(&raw);
685
686 for (name, re) in &pii_patterns {
687 let before = u32::try_from(re.find_iter(&raw).count()).unwrap_or(u32::MAX);
688 total_before += before;
689
690 let leaked: Vec<String> = re
691 .captures_iter(&scrubbed)
692 .filter_map(|cap| {
693 let val = cap.get(1).map_or("", |m| m.as_str());
694 if val == "<redacted>" {
695 None
696 } else {
697 Some(val.to_owned())
698 }
699 })
700 .collect();
701
702 for val in &leaked {
703 failures.push(format!("{filename}: {name} leaked: {val:?}"));
704 }
705 }
706 }
707
708 assert!(
709 total_before > 0,
710 "corpus should contain at least one PII match to be a meaningful test"
711 );
712 assert!(
713 failures.is_empty(),
714 "PII survived scrubbing in {} location(s) (of {total_before} raw matches):\n{}",
715 failures.len(),
716 failures.join("\n")
717 );
718 }
719}