1pub mod gitleaks;
2use crate::helper::generate_simple_id;
3pub use gitleaks::initialize_gitleaks_config;
5use gitleaks::{DetectedSecret, detect_secrets};
6use std::collections::HashMap;
7use std::fmt;
8
9#[derive(Debug, Clone)]
11pub struct RedactionResult {
12 pub redacted_string: String,
14 pub redaction_map: HashMap<String, String>,
16}
17
18impl RedactionResult {
19 pub fn new(redacted_string: String, redaction_map: HashMap<String, String>) -> Self {
20 Self {
21 redacted_string,
22 redaction_map,
23 }
24 }
25}
26
27impl fmt::Display for RedactionResult {
28 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
29 write!(f, "{}", self.redacted_string)
30 }
31}
32
33pub fn redact_secrets(
37 content: &str,
38 path: Option<&str>,
39 old_redaction_map: &HashMap<String, String>,
40 privacy_mode: bool,
41) -> RedactionResult {
42 if content.contains("[REDACTED_SECRET:") {
44 return RedactionResult::new(content.to_string(), HashMap::new());
45 }
46
47 let mut secrets = detect_secrets(content, path, privacy_mode);
48
49 let mut redaction_map = old_redaction_map.clone();
50 let mut reverse_redaction_map: HashMap<String, String> = old_redaction_map
51 .clone()
52 .into_iter()
53 .map(|(k, v)| (v, k))
54 .collect();
55
56 for (original_secret, redaction_key) in &reverse_redaction_map {
57 let key_parts = redaction_key.split(':').collect::<Vec<&str>>();
59 if key_parts.len() == 3 {
60 let rule_id = key_parts[1].to_string();
61 if let Some(start) = content.find(original_secret) {
62 let end = start + original_secret.len();
63 secrets.push(DetectedSecret {
64 rule_id,
65 value: original_secret.clone(),
66 start_pos: start,
67 end_pos: end,
68 });
69 }
70 }
71 }
72
73 if secrets.is_empty() {
74 return RedactionResult::new(content.to_string(), HashMap::new());
75 }
76
77 let mut redacted_string = content.to_string();
78
79 let mut deduplicated_secrets: Vec<DetectedSecret> = Vec::new();
81 let mut sorted_by_start = secrets;
82 sorted_by_start.sort_by(|a, b| a.start_pos.cmp(&b.start_pos));
83
84 for secret in sorted_by_start {
85 let mut should_add = true;
86 let mut to_remove = Vec::new();
87
88 for (i, existing) in deduplicated_secrets.iter().enumerate() {
89 let overlaps =
91 secret.start_pos < existing.end_pos && secret.end_pos > existing.start_pos;
92
93 if overlaps {
94 if secret.value.len() > existing.value.len() {
96 to_remove.push(i);
97 } else {
98 should_add = false;
99 break;
100 }
101 }
102 }
103
104 for &i in to_remove.iter().rev() {
106 deduplicated_secrets.remove(i);
107 }
108
109 if should_add {
110 deduplicated_secrets.push(secret);
111 }
112 }
113
114 deduplicated_secrets.sort_by(|a, b| b.start_pos.cmp(&a.start_pos));
116
117 for secret in deduplicated_secrets {
118 if !content.is_char_boundary(secret.start_pos) || !content.is_char_boundary(secret.end_pos)
120 {
121 continue;
122 }
123
124 if secret.start_pos >= redacted_string.len() || secret.end_pos > redacted_string.len() {
126 continue;
127 }
128
129 let redaction_key = if let Some(existing_key) = reverse_redaction_map.get(&secret.value) {
132 existing_key.clone()
133 } else {
134 let key = generate_redaction_key(&secret.rule_id);
135 redaction_map.insert(key.clone(), secret.value.clone());
137 reverse_redaction_map.insert(secret.value, key.clone());
138 key
139 };
140
141 redacted_string.replace_range(secret.start_pos..secret.end_pos, &redaction_key);
143 }
144
145 RedactionResult::new(redacted_string, redaction_map)
146}
147
148pub fn restore_secrets(redacted_string: &str, redaction_map: &HashMap<String, String>) -> String {
150 let mut restored = redacted_string.to_string();
151
152 for (redaction_key, original_value) in redaction_map {
153 restored = restored.replace(redaction_key, original_value);
154 }
155
156 restored
157}
158
159pub fn redact_password(
161 content: &str,
162 password: &str,
163 old_redaction_map: &HashMap<String, String>,
164) -> RedactionResult {
165 if password.is_empty() {
166 return RedactionResult::new(content.to_string(), HashMap::new());
167 }
168
169 if content.contains("[REDACTED_SECRET:") {
171 return RedactionResult::new(content.to_string(), HashMap::new());
172 }
173
174 let mut redacted_string = content.to_string();
175 let mut redaction_map = old_redaction_map.clone();
176 let mut reverse_redaction_map: HashMap<String, String> = old_redaction_map
177 .clone()
178 .into_iter()
179 .map(|(k, v)| (v, k))
180 .collect();
181
182 let redaction_key = if let Some(existing_key) = reverse_redaction_map.get(password) {
184 existing_key.clone()
185 } else {
186 let key = generate_redaction_key("password");
187 redaction_map.insert(key.clone(), password.to_string());
189 reverse_redaction_map.insert(password.to_string(), key.clone());
190 key
191 };
192
193 redacted_string = redacted_string.replace(password, &redaction_key);
195
196 RedactionResult::new(redacted_string, redaction_map)
197}
198
199fn generate_redaction_key(rule_id: &str) -> String {
201 let id = generate_simple_id(6);
202 format!("[REDACTED_SECRET:{rule_id}:{id}]")
203}
204
205#[cfg(test)]
206mod tests {
207 use regex::Regex;
208
209 use crate::secrets::gitleaks::{
210 GITLEAKS_CONFIG, calculate_entropy, contains_any_keyword, create_simple_api_key_regex,
211 is_allowed_by_rule_allowlist, should_allow_match,
212 };
213
214 use super::*;
215
216 #[test]
217 fn test_redaction_key_generation() {
218 let key1 = generate_redaction_key("test");
219 let key2 = generate_redaction_key("my-rule");
220
221 assert_ne!(key1, key2);
223
224 assert!(key1.starts_with("[REDACTED_SECRET:test:"));
226 assert!(key1.ends_with("]"));
227 assert!(key2.starts_with("[REDACTED_SECRET:my-rule:"));
228 assert!(key2.ends_with("]"));
229 }
230
231 #[test]
232 fn test_empty_input() {
233 let result = redact_secrets("", None, &HashMap::new(), false);
234 assert_eq!(result.redacted_string, "");
235 assert!(result.redaction_map.is_empty());
236 }
237
238 #[test]
239 fn test_restore_secrets() {
240 let mut redaction_map = HashMap::new();
241 redaction_map.insert("[REDACTED_abc123]".to_string(), "secret123".to_string());
242 redaction_map.insert("[REDACTED_def456]".to_string(), "api_key_xyz".to_string());
243
244 let redacted = "Password is [REDACTED_abc123] and key is [REDACTED_def456]";
245 let restored = restore_secrets(redacted, &redaction_map);
246
247 assert_eq!(restored, "Password is secret123 and key is api_key_xyz");
248 }
249
250 #[test]
251 fn test_redaction_result_display() {
252 let mut redaction_map = HashMap::new();
253 redaction_map.insert("[REDACTED_test]".to_string(), "secret".to_string());
254
255 let result = RedactionResult::new("Hello [REDACTED_test]".to_string(), redaction_map);
256 assert_eq!(format!("{}", result), "Hello [REDACTED_test]");
257 }
258
259 #[test]
260 fn test_redact_secrets_with_api_key() {
261 let input = "export API_KEY=abc123def456ghi789jkl012mno345pqr678";
263 let result = redact_secrets(input, None, &HashMap::new(), false);
264
265 assert!(!result.redaction_map.is_empty());
267 assert!(result.redacted_string.contains("[REDACTED_"));
268 println!("Input: {}", input);
269 println!("Redacted: {}", result.redacted_string);
270 println!("Mapping: {:?}", result.redaction_map);
271 }
272
273 #[test]
274 fn test_redact_secrets_with_aws_key() {
275 let input = "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EX23PLE";
276 let result = redact_secrets(input, None, &HashMap::new(), false);
277
278 assert!(!result.redaction_map.is_empty());
280 println!("Input: {}", input);
281 println!("Redacted: {}", result.redacted_string);
282 println!("Mapping: {:?}", result.redaction_map);
283 }
284
285 #[test]
286 fn test_redaction_identical_secrets() {
287 let input = r#"
288 export AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EX23PLE
289 export AWS_ACCESS_KEY_ID_2=AKIAIOSFODNN7EX23PLE
290 "#;
291 let result = redact_secrets(input, None, &HashMap::new(), false);
292
293 assert_eq!(result.redaction_map.len(), 1);
294 }
295
296 #[test]
297 fn test_redaction_identical_secrets_different_contexts() {
298 let input_1 = r#"
299 export AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EX23PLE
300 "#;
301 let input_2 = r#"
302 export SOME_OTHER_SECRET=AKIAIOSFODNN7EX23PLE
303 "#;
304 let result_1 = redact_secrets(input_1, None, &HashMap::new(), false);
305 let result_2 = redact_secrets(input_2, None, &result_1.redaction_map, false);
306
307 assert_eq!(result_1.redaction_map, result_2.redaction_map);
308 }
309
310 #[test]
311 fn test_redact_secrets_with_github_token() {
312 let input = "GITHUB_TOKEN=ghp_1234567890abcdef1234567890abcdef12345678";
313 let result = redact_secrets(input, None, &HashMap::new(), false);
314
315 assert!(!result.redaction_map.is_empty());
317 println!("Input: {}", input);
318 println!("Redacted: {}", result.redacted_string);
319 println!("Mapping: {:?}", result.redaction_map);
320 }
321
322 #[test]
323 fn test_no_secrets() {
324 let input = "This is just a normal string with no secrets";
325 let result = redact_secrets(input, None, &HashMap::new(), false);
326
327 assert_eq!(result.redaction_map.len(), 0);
329 assert_eq!(result.redacted_string, input);
330 }
331
332 #[test]
333 fn test_debug_generic_api_key() {
334 let config = &*GITLEAKS_CONFIG;
335
336 let generic_rule = config.rules.iter().find(|r| r.id == "generic-api-key");
338 if let Some(rule) = generic_rule {
339 println!("Generic API Key Rule:");
340 println!(" Regex: {:?}", rule.regex);
341 println!(" Entropy: {:?}", rule.entropy);
342 println!(" Keywords: {:?}", rule.keywords);
343
344 if let Some(regex_pattern) = &rule.regex {
346 if let Ok(regex) = Regex::new(regex_pattern) {
347 let test_input = "API_KEY=abc123def456ghi789jkl012mno345pqr678";
348 println!("\nTesting regex directly:");
349 println!(" Input: {}", test_input);
350
351 for mat in regex.find_iter(test_input) {
352 println!(" Raw match: '{}'", mat.as_str());
353 println!(" Match position: {}-{}", mat.start(), mat.end());
354
355 if let Some(captures) = regex.captures(mat.as_str()) {
357 for (i, cap) in captures.iter().enumerate() {
358 if let Some(cap) = cap {
359 println!(" Capture {}: '{}'", i, cap.as_str());
360 if i == 1 {
361 let entropy = calculate_entropy(cap.as_str());
362 println!(" Entropy of capture 1: {:.2}", entropy);
363 }
364 }
365 }
366 }
367 }
368 }
369 } else {
370 println!(" No regex pattern (path-based rule)");
371 }
372
373 let test_inputs = vec![
375 "API_KEY=abc123def456ghi789jkl012mno345pqr678",
376 "api_key=RaNd0mH1ghEnTr0pyV4luE567890abcdef",
377 "access_key=Kx9mP2nQ8rT4vW7yZ3cF6hJ1lN5sA0bD8eF2gH5jK",
378 "secret_token=1234567890abcdef1234567890abcdef",
379 "password=9k2L8pMvB3nQ7rX1ZdF5GhJwY4AsPo6C",
380 ];
381
382 for input in test_inputs {
383 println!("\nTesting input: {}", input);
384 let result = redact_secrets(input, None, &HashMap::new(), false);
385 println!(" Detected secrets: {}", result.redaction_map.len());
386 if !result.redaction_map.is_empty() {
387 println!(" Redacted: {}", result.redacted_string);
388 }
389 }
390 } else {
391 println!("Generic API key rule not found!");
392 }
393 }
394
395 #[test]
396 fn test_simple_regex_match() {
397 let input = "key=abcdefghijklmnop";
399 println!("Testing simple input: {}", input);
400
401 let config = &*GITLEAKS_CONFIG;
402 let generic_rule = config
403 .rules
404 .iter()
405 .find(|r| r.id == "generic-api-key")
406 .unwrap();
407
408 if let Some(regex_pattern) = &generic_rule.regex {
409 if let Ok(regex) = Regex::new(regex_pattern) {
410 println!("Regex pattern: {}", regex_pattern);
411
412 if regex.is_match(input) {
413 println!("✓ Regex MATCHES the input!");
414
415 for mat in regex.find_iter(input) {
416 println!("Match found: '{}'", mat.as_str());
417
418 if let Some(captures) = regex.captures(mat.as_str()) {
419 println!("Full capture groups:");
420 for (i, cap) in captures.iter().enumerate() {
421 if let Some(cap) = cap {
422 println!(" Group {}: '{}'", i, cap.as_str());
423 if i == 1 {
424 let entropy = calculate_entropy(cap.as_str());
425 println!(" Entropy: {:.2} (threshold: 3.5)", entropy);
426 }
427 }
428 }
429 }
430 }
431 } else {
432 println!("✗ Regex does NOT match the input");
433 }
434 }
435 } else {
436 println!("Rule has no regex pattern (path-based rule)");
437 }
438
439 let result = redact_secrets(input, None, &HashMap::new(), false);
441 println!(
442 "Full function result: {} secrets detected",
443 result.redaction_map.len()
444 );
445 }
446
447 #[test]
448 fn test_regex_breakdown() {
449 let config = &*GITLEAKS_CONFIG;
450 let generic_rule = config
451 .rules
452 .iter()
453 .find(|r| r.id == "generic-api-key")
454 .unwrap();
455
456 if let Some(regex_pattern) = &generic_rule.regex {
457 println!("Full regex: {}", regex_pattern);
458
459 let test_inputs = vec![
461 "key=abcdefghijklmnop",
462 "api_key=abcdefghijklmnop",
463 "secret=abcdefghijklmnop",
464 "token=abcdefghijklmnop",
465 "password=abcdefghijklmnop",
466 "access_key=abcdefghijklmnop",
467 ];
468
469 for input in test_inputs {
470 println!("\nTesting: '{}'", input);
471
472 if let Ok(regex) = Regex::new(regex_pattern) {
474 let matches: Vec<_> = regex.find_iter(input).collect();
475 println!(" Matches found: {}", matches.len());
476
477 for (i, mat) in matches.iter().enumerate() {
478 println!(" Match {}: '{}'", i, mat.as_str());
479
480 if let Some(captures) = regex.captures(mat.as_str()) {
482 for (j, cap) in captures.iter().enumerate() {
483 if let Some(cap) = cap {
484 println!(" Capture {}: '{}'", j, cap.as_str());
485 if j == 1 {
486 let entropy = calculate_entropy(cap.as_str());
487 println!(" Entropy: {:.2} (threshold: 3.5)", entropy);
488 if entropy >= 3.5 {
489 println!(" ✓ Entropy check PASSED");
490 } else {
491 println!(" ✗ Entropy check FAILED");
492 }
493 }
494 }
495 }
496 }
497 }
498 }
499 }
500 } else {
501 println!("Rule has no regex pattern (path-based rule)");
502 }
503
504 println!("\nTesting AWS pattern that we know works:");
506 let aws_input = "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE";
507 println!("Input: {}", aws_input);
508
509 let aws_rule = config
510 .rules
511 .iter()
512 .find(|r| r.id == "aws-access-token")
513 .unwrap();
514 if let Some(aws_regex_pattern) = &aws_rule.regex {
515 if let Ok(regex) = Regex::new(aws_regex_pattern) {
516 for mat in regex.find_iter(aws_input) {
517 println!("AWS Match: '{}'", mat.as_str());
518 if let Some(captures) = regex.captures(mat.as_str()) {
519 for (i, cap) in captures.iter().enumerate() {
520 if let Some(cap) = cap {
521 println!(" AWS Capture {}: '{}'", i, cap.as_str());
522 }
523 }
524 }
525 }
526 }
527 } else {
528 println!("AWS rule has no regex pattern");
529 }
530 }
531
532 #[test]
533 fn test_working_api_key_patterns() {
534 let config = &*GITLEAKS_CONFIG;
535 let generic_rule = config
536 .rules
537 .iter()
538 .find(|r| r.id == "generic-api-key")
539 .unwrap();
540
541 let regex = generic_rule
543 .compiled_regex
544 .as_ref()
545 .expect("Regex should be compiled");
546
547 let test_inputs = vec![
549 "myapp_api_key = \"abc123def456ghi789jklmnop\"",
551 "export SECRET_TOKEN=Kx9mP2nQ8rT4vW7yZ3cF6hJ1lN5sA0bD8eF",
552 "app.auth.password: 9k2L8pMvB3nQ7rX1ZdF5GhJwY4AsPo6C8mN",
553 "config.access_key=\"RaNd0mH1ghEnTr0pyV4luE567890abcdef\";",
554 "DB_CREDENTIALS=xy9mP2nQ8rT4vW7yZ3cF6hJ1lN5sAdefghij",
555 ];
556
557 for input in test_inputs {
558 println!("\nTesting: '{}'", input);
559
560 let matches: Vec<_> = regex.find_iter(input).collect();
561 println!(" Matches found: {}", matches.len());
562
563 for (i, mat) in matches.iter().enumerate() {
564 println!(" Match {}: '{}'", i, mat.as_str());
565
566 if let Some(captures) = regex.captures(mat.as_str()) {
567 for (j, cap) in captures.iter().enumerate() {
568 if let Some(cap) = cap {
569 println!(" Capture {}: '{}'", j, cap.as_str());
570 if j == 1 {
571 let entropy = calculate_entropy(cap.as_str());
572 println!(" Entropy: {:.2} (threshold: 3.5)", entropy);
573
574 let allowed = should_allow_match(
576 input,
577 None,
578 mat.as_str(),
579 mat.start(),
580 mat.end(),
581 generic_rule,
582 &config.allowlist,
583 );
584 println!(" Allowed by allowlist: {}", allowed);
585 }
586 }
587 }
588 }
589 }
590
591 let result = redact_secrets(input, None, &HashMap::new(), false);
593 println!(
594 " Full function detected: {} secrets",
595 result.redaction_map.len()
596 );
597 if !result.redaction_map.is_empty() {
598 println!(" Redacted result: {}", result.redacted_string);
599 }
600 }
601 }
602
603 #[test]
604 fn test_regex_components() {
605 let test_input = "export API_KEY=Kx9mP2nQ8rT4vW7yZ3cF6hJ1lN5sA0bD8eF";
607 println!("Testing input: {}", test_input);
608
609 let test_patterns = vec![
611 (r"API_KEY", "Simple keyword match"),
612 (r"(?i)api_key", "Case insensitive keyword"),
613 (r"(?i).*key.*", "Any text with 'key'"),
614 (r"(?i).*key\s*=", "Key with equals"),
615 (r"(?i).*key\s*=\s*\w+", "Key with value"),
616 (
617 r"(?i)[\w.-]*(?:key).*?=.*?(\w{10,})",
618 "Complex pattern with capture",
619 ),
620 ];
621
622 for (pattern, description) in test_patterns {
623 println!("\nTesting pattern: {} ({})", pattern, description);
624
625 match Regex::new(pattern) {
626 Ok(regex) => {
627 if regex.is_match(test_input) {
628 println!(" ✓ MATCHES");
629 for mat in regex.find_iter(test_input) {
630 println!(" Full match: '{}'", mat.as_str());
631 }
632 if let Some(captures) = regex.captures(test_input) {
633 for (i, cap) in captures.iter().enumerate() {
634 if let Some(cap) = cap {
635 println!(" Capture {}: '{}'", i, cap.as_str());
636 }
637 }
638 }
639 } else {
640 println!(" ✗ NO MATCH");
641 }
642 }
643 Err(e) => println!(" Error: {}", e),
644 }
645 }
646
647 let config = &*GITLEAKS_CONFIG;
649 let generic_rule = config
650 .rules
651 .iter()
652 .find(|r| r.id == "generic-api-key")
653 .unwrap();
654
655 println!("\nTesting actual gitleaks regex:");
656 if let Some(regex_pattern) = &generic_rule.regex {
657 match Regex::new(regex_pattern) {
658 Ok(regex) => {
659 println!(" ✓ Regex compiles successfully");
660 println!(" Testing against: {}", test_input);
661 if regex.is_match(test_input) {
662 println!(" ✓ MATCHES");
663 } else {
664 println!(" ✗ NO MATCH");
665 }
666 }
667 Err(e) => println!(" ✗ Regex compilation error: {}", e),
668 }
669 } else {
670 println!(" Rule has no regex pattern (path-based rule)");
671 }
672 }
673
674 #[test]
675 fn test_comprehensive_secrets_redaction() {
676 let input = r#"
677# Configuration file with various secrets
678export AWS_ACCESS_KEY_ID=AKIAIOSFODNN7REALKEY
679export GITHUB_TOKEN=ghp_1234567890abcdef1234567890abcdef12345678
680export API_KEY=abc123def456ghi789jklmnop
681export SECRET_TOKEN=Kx9mP2nQ8rT4vW7yZ3cF6hJ1lN5sA0bD8eF
682export PASSWORD=supersecretpassword123456
683
684# Some normal configuration
685export DEBUG=true
686export PORT=3000
687"#;
688
689 println!("Original input:\n{}", input);
690
691 let result = redact_secrets(input, None, &HashMap::new(), false);
692
693 println!("Redacted output:\n{}", result.redacted_string);
694 println!("\nDetected {} secrets:", result.redaction_map.len());
695 for (key, value) in &result.redaction_map {
696 println!(" {} -> {}", key, value);
697 }
698
699 assert!(
701 result.redaction_map.len() >= 5,
702 "Should detect at least 5 secrets, found: {}",
703 result.redaction_map.len()
704 );
705
706 assert!(!result.redacted_string.contains("AKIAIOSFODNN7REALKEY"));
708 assert!(
709 !result
710 .redacted_string
711 .contains("ghp_1234567890abcdef1234567890abcdef12345678")
712 );
713 assert!(!result.redacted_string.contains("abc123def456ghi789jklmnop"));
714
715 assert!(result.redacted_string.contains("DEBUG=true"));
717 assert!(result.redacted_string.contains("PORT=3000"));
718 }
719
720 fn count_rules_that_would_process(input: &str) -> Vec<String> {
722 let config = &*GITLEAKS_CONFIG;
723 let mut rules = Vec::new();
724
725 for rule in &config.rules {
726 if rule.keywords.is_empty() || contains_any_keyword(input, &rule.keywords) {
727 rules.push(rule.id.clone());
728 }
729 }
730
731 rules
732 }
733
734 #[test]
735 fn test_keyword_filtering() {
736 println!("=== TESTING KEYWORD FILTERING ===");
737
738 let config = &*GITLEAKS_CONFIG;
739
740 let generic_rule = config
742 .rules
743 .iter()
744 .find(|r| r.id == "generic-api-key")
745 .unwrap();
746 println!("Generic API Key rule keywords: {:?}", generic_rule.keywords);
747
748 let input_with_keywords = "export API_KEY=abc123def456ghi789jklmnop";
750 let result1 = redact_secrets(input_with_keywords, None, &HashMap::new(), false);
751 println!("\nTest 1 - Input WITH keywords:");
752 println!(" Input: {}", input_with_keywords);
753 println!(
754 " Keywords present: {}",
755 contains_any_keyword(input_with_keywords, &generic_rule.keywords)
756 );
757 println!(" Secrets detected: {}", result1.redaction_map.len());
758
759 let input_without_keywords = "export DATABASE_URL=postgresql://user:pass@localhost/db";
761 let result2 = redact_secrets(input_without_keywords, None, &HashMap::new(), false);
762 println!("\nTest 2 - Input WITHOUT generic-api-key keywords:");
763 println!(" Input: {}", input_without_keywords);
764 println!(
765 " Keywords present: {}",
766 contains_any_keyword(input_without_keywords, &generic_rule.keywords)
767 );
768 println!(" Secrets detected: {}", result2.redaction_map.len());
769
770 let aws_rule = config
772 .rules
773 .iter()
774 .find(|r| r.id == "aws-access-token")
775 .unwrap();
776 let aws_input = "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE";
777 let result3 = redact_secrets(aws_input, None, &HashMap::new(), false);
778 println!("\nTest 3 - AWS input:");
779 println!(" Input: {}", aws_input);
780 println!(" AWS rule keywords: {:?}", aws_rule.keywords);
781 println!(
782 " Keywords present: {}",
783 contains_any_keyword(aws_input, &aws_rule.keywords)
784 );
785 println!(" Secrets detected: {}", result3.redaction_map.len());
786
787 assert!(
789 contains_any_keyword(input_with_keywords, &generic_rule.keywords),
790 "API_KEY input should contain generic-api-key keywords"
791 );
792 assert!(
793 !contains_any_keyword(input_without_keywords, &generic_rule.keywords),
794 "DATABASE_URL input should NOT contain generic-api-key keywords"
795 );
796 assert!(
797 contains_any_keyword(aws_input, &aws_rule.keywords),
798 "AWS input should contain AWS rule keywords"
799 );
800 }
801
802 #[test]
803 fn test_keyword_optimization_performance() {
804 println!("=== TESTING KEYWORD OPTIMIZATION PERFORMANCE ===");
805
806 let config = &*GITLEAKS_CONFIG;
807
808 let no_keywords_input = "export DATABASE_CONNECTION=some_long_connection_string_that_has_no_common_secret_keywords";
810 println!("Testing input with no secret keywords:");
811 println!(" Input: {}", no_keywords_input);
812
813 let mut keyword_matches = 0;
814 for rule in &config.rules {
815 if contains_any_keyword(no_keywords_input, &rule.keywords) {
816 keyword_matches += 1;
817 println!(" Rule '{}' keywords match: {:?}", rule.id, rule.keywords);
818 }
819 }
820 println!(
821 " Rules with matching keywords: {} out of {}",
822 keyword_matches,
823 config.rules.len()
824 );
825
826 let result = redact_secrets(no_keywords_input, None, &HashMap::new(), false);
827 println!(" Secrets detected: {}", result.redaction_map.len());
828
829 let specific_keywords_input = "export GITHUB_TOKEN=ghp_1234567890abcdef";
831 println!("\nTesting input with specific keywords (github):");
832 println!(" Input: {}", specific_keywords_input);
833
834 let mut matching_rules = Vec::new();
835 for rule in &config.rules {
836 if contains_any_keyword(specific_keywords_input, &rule.keywords) {
837 matching_rules.push(&rule.id);
838 }
839 }
840 println!(" Rules that would be processed: {:?}", matching_rules);
841
842 let result = redact_secrets(specific_keywords_input, None, &HashMap::new(), false);
843 println!(" Secrets detected: {}", result.redaction_map.len());
844
845 let rules_without_keywords: Vec<_> = config
847 .rules
848 .iter()
849 .filter(|rule| rule.keywords.is_empty())
850 .collect();
851 println!(
852 "\nRules without keywords (always processed): {}",
853 rules_without_keywords.len()
854 );
855 for rule in &rules_without_keywords {
856 println!(" - {}", rule.id);
857 }
858
859 assert!(
861 keyword_matches < config.rules.len(),
862 "Input with no keywords should not match all rules"
863 );
864 assert!(
865 !matching_rules.is_empty(),
866 "GitHub token input should match some rules"
867 );
868 assert!(
869 matching_rules.contains(&&"github-pat".to_string())
870 || matching_rules
871 .iter()
872 .any(|rule_id| rule_id.contains("github")),
873 "GitHub token should match GitHub-related rules"
874 );
875 }
876
877 #[test]
878 fn test_keyword_filtering_efficiency() {
879 println!("=== KEYWORD FILTERING EFFICIENCY TEST ===");
880
881 let config = &*GITLEAKS_CONFIG;
882 println!("Total rules in config: {}", config.rules.len());
883
884 let non_secret_input = "export DATABASE_URL=localhost PORT=3000 DEBUG=true TIMEOUT=30";
886 println!("\nTesting non-secret input: {}", non_secret_input);
887
888 let mut rules_skipped = 0;
889 let mut rules_processed = 0;
890
891 for rule in &config.rules {
892 if rule.keywords.is_empty() || contains_any_keyword(non_secret_input, &rule.keywords) {
893 rules_processed += 1;
894 } else {
895 rules_skipped += 1;
896 }
897 }
898
899 println!(
900 " Rules skipped due to keyword filtering: {}",
901 rules_skipped
902 );
903 println!(" Rules that would be processed: {}", rules_processed);
904 println!(
905 " Efficiency gain: {:.1}% of rules skipped",
906 (rules_skipped as f64 / config.rules.len() as f64) * 100.0
907 );
908
909 let result = redact_secrets(non_secret_input, None, &HashMap::new(), false);
911 println!(" Secrets detected: {}", result.redaction_map.len());
912
913 let secret_input =
915 "export API_KEY=abc123def456ghi789jklmnop SECRET_TOKEN=xyz789uvw012rst345def678";
916 println!("\nTesting input WITH secret keywords:");
917 println!(" Input: {}", secret_input);
918
919 let mut rules_with_keywords = 0;
920 for rule in &config.rules {
921 if contains_any_keyword(secret_input, &rule.keywords) {
922 rules_with_keywords += 1;
923 }
924 }
925
926 println!(" Rules that match keywords: {}", rules_with_keywords);
927
928 let result = redact_secrets(secret_input, None, &HashMap::new(), false);
929 println!(" Secrets detected: {}", result.redaction_map.len());
930
931 assert!(
933 rules_skipped > 0,
934 "Should skip at least some rules for non-secret input"
935 );
936 assert!(
937 rules_with_keywords > 0,
938 "Should find matching rules for secret input"
939 );
940 assert!(
941 !result.redaction_map.is_empty(),
942 "Should detect at least one secret"
943 );
944 }
945
946 #[test]
947 fn test_keyword_validation_summary() {
948 println!("=== KEYWORD VALIDATION SUMMARY ===");
949
950 let config = &*GITLEAKS_CONFIG;
951 let total_rules = config.rules.len();
952 println!("Total rules in gitleaks config: {}", total_rules);
953
954 let no_keyword_input = "export DATABASE_URL=localhost PORT=3000";
956 println!("\n--- No keywords - should skip all rules ---");
957 println!("Input: {}", no_keyword_input);
958
959 let no_keyword_rules = count_rules_that_would_process(no_keyword_input);
960 println!(
961 "Rules that would be processed: {} out of {}",
962 no_keyword_rules.len(),
963 total_rules
964 );
965 println!(" Rules: {:?}", no_keyword_rules);
966
967 let no_keyword_secrets = detect_secrets(no_keyword_input, None, false);
968 println!(
969 "Secrets detected: {} (expected: 0)",
970 no_keyword_secrets.len()
971 );
972 assert_eq!(no_keyword_secrets.len(), 0, "Should not detect any secrets");
973 println!("✅ Test passed");
974
975 let api_input = "export API_KEY=abc123def456ghi789jklmnop";
977 println!("\n--- API keyword - should process generic-api-key rule ---");
978 println!("Input: {}", api_input);
979
980 let api_rules = count_rules_that_would_process(api_input);
981 println!(
982 "Rules that would be processed: {} out of {}",
983 api_rules.len(),
984 total_rules
985 );
986 println!(" Rules: {:?}", api_rules);
987
988 let api_secrets = detect_secrets(api_input, None, false);
989 println!("Secrets detected: {} (expected: 1)", api_secrets.len());
990 assert!(!api_secrets.is_empty(), "Should detect at least 1 secrets");
991 println!("✅ Test passed");
992
993 let aws_input = "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7REALKEY";
996 println!("\n--- AWS keyword - should process aws-access-token rule ---");
997 println!("Input: {}", aws_input);
998
999 let aws_rules = count_rules_that_would_process(aws_input);
1000 println!(
1001 "Rules that would be processed: {} out of {}",
1002 aws_rules.len(),
1003 total_rules
1004 );
1005 println!(" Rules: {:?}", aws_rules);
1006
1007 let aws_secrets = detect_secrets(aws_input, None, false);
1008 println!("Secrets detected: {} (expected: 1)", aws_secrets.len());
1009
1010 assert!(!aws_secrets.is_empty(), "Should detect at least 1 secrets");
1012 println!("✅ Test passed");
1013 }
1014
1015 #[test]
1016 fn test_debug_missing_secrets() {
1017 println!("=== DEBUGGING MISSING SECRETS ===");
1018
1019 let test_cases = vec![
1020 "SECRET_TOKEN=Kx9mP2nQ8rT4vW7yZ3cF6hJ1lN5sA0bD8eF",
1021 "PASSWORD=supersecretpassword123456",
1022 ];
1023
1024 for input in test_cases {
1025 println!("\nTesting: {}", input);
1026
1027 let parts: Vec<&str> = input.split('=').collect();
1029 if parts.len() == 2 {
1030 let secret_value = parts[1];
1031 let entropy = calculate_entropy(secret_value);
1032 println!(" Secret value: '{}'", secret_value);
1033 println!(" Entropy: {:.2} (threshold: 3.5)", entropy);
1034
1035 if entropy >= 3.5 {
1036 println!(" ✓ Entropy check PASSED");
1037 } else {
1038 println!(" ✗ Entropy check FAILED - this is why it's not detected");
1039 }
1040 }
1041
1042 if let Ok(regex) = create_simple_api_key_regex() {
1044 println!(" Testing fallback regex:");
1045 if regex.is_match(input) {
1046 println!(" ✓ Fallback regex MATCHES");
1047 for mat in regex.find_iter(input) {
1048 println!(" Match: '{}'", mat.as_str());
1049 if let Some(captures) = regex.captures(mat.as_str()) {
1050 for (i, cap) in captures.iter().enumerate() {
1051 if let Some(cap) = cap {
1052 println!(" Capture {}: '{}'", i, cap.as_str());
1053 }
1054 }
1055 }
1056
1057 let config = &*GITLEAKS_CONFIG;
1059 let generic_rule = config
1060 .rules
1061 .iter()
1062 .find(|r| r.id == "generic-api-key")
1063 .unwrap();
1064 let allowed = should_allow_match(
1065 input,
1066 None,
1067 mat.as_str(),
1068 mat.start(),
1069 mat.end(),
1070 generic_rule,
1071 &config.allowlist,
1072 );
1073 println!(" Allowed by allowlist: {}", allowed);
1074 if allowed {
1075 println!(
1076 " ✗ FILTERED OUT by allowlist - this is why it's not detected"
1077 );
1078 }
1079 }
1080 } else {
1081 println!(" ✗ Fallback regex does NOT match");
1082 }
1083 }
1084
1085 let result = redact_secrets(input, None, &HashMap::new(), false);
1087 println!(
1088 " Full detection result: {} secrets",
1089 result.redaction_map.len()
1090 );
1091 }
1092 }
1093
1094 #[test]
1095 fn test_debug_allowlist_filtering() {
1096 println!("=== DEBUGGING ALLOWLIST FILTERING ===");
1097
1098 let test_cases = vec![
1099 "SECRET_TOKEN=Kx9mP2nQ8rT4vW7yZ3cF6hJ1lN5sA0bD8eF",
1100 "PASSWORD=supersecretpassword123456",
1101 ];
1102
1103 let config = &*GITLEAKS_CONFIG;
1104 let generic_rule = config
1105 .rules
1106 .iter()
1107 .find(|r| r.id == "generic-api-key")
1108 .unwrap();
1109
1110 for input in test_cases {
1111 println!("\nAnalyzing: {}", input);
1112
1113 if let Ok(regex) = create_simple_api_key_regex() {
1114 for mat in regex.find_iter(input) {
1115 let match_text = mat.as_str();
1116 println!(" Match: '{}'", match_text);
1117
1118 if let Some(global_allowlist) = &config.allowlist {
1120 println!(" Checking global allowlist:");
1121
1122 if let Some(regexes) = &global_allowlist.regexes {
1124 for (i, pattern) in regexes.iter().enumerate() {
1125 if let Ok(regex) = Regex::new(pattern)
1126 && regex.is_match(match_text)
1127 {
1128 println!(" ✗ FILTERED by global regex {}: '{}'", i, pattern);
1129 }
1130 }
1131 }
1132
1133 if let Some(stopwords) = &global_allowlist.stopwords {
1135 for stopword in stopwords {
1136 if match_text.to_lowercase().contains(&stopword.to_lowercase()) {
1137 println!(" ✗ FILTERED by global stopword: '{}'", stopword);
1138 }
1139 }
1140 }
1141 }
1142
1143 if let Some(rule_allowlists) = &generic_rule.allowlists {
1145 for (rule_idx, allowlist) in rule_allowlists.iter().enumerate() {
1146 println!(" Checking rule allowlist {}:", rule_idx);
1147
1148 if let Some(regexes) = &allowlist.regexes {
1150 for (i, pattern) in regexes.iter().enumerate() {
1151 if let Ok(regex) = Regex::new(pattern)
1152 && regex.is_match(match_text)
1153 {
1154 println!(
1155 " ✗ FILTERED by rule regex {}: '{}'",
1156 i, pattern
1157 );
1158 }
1159 }
1160 }
1161
1162 if let Some(stopwords) = &allowlist.stopwords {
1164 for stopword in stopwords {
1165 if match_text.to_lowercase().contains(&stopword.to_lowercase())
1166 {
1167 println!(" ✗ FILTERED by rule stopword: '{}'", stopword);
1168 }
1169 }
1170 }
1171 }
1172 }
1173 }
1174 }
1175 }
1176 }
1177
1178 #[test]
1179 fn test_debug_new_allowlist_logic() {
1180 println!("=== DEBUGGING NEW ALLOWLIST LOGIC ===");
1181
1182 let test_cases = vec![
1183 "SECRET_TOKEN=Kx9mP2nQ8rT4vW7yZ3cF6hJ1lN5sA0bD8eF",
1184 "PASSWORD=supersecretpassword123456",
1185 "PASSWORD=password123", "API_KEY=example_key", ];
1188
1189 let config = &*GITLEAKS_CONFIG;
1190 let generic_rule = config
1191 .rules
1192 .iter()
1193 .find(|r| r.id == "generic-api-key")
1194 .unwrap();
1195
1196 for input in test_cases {
1197 println!("\nTesting: {}", input);
1198
1199 if let Ok(regex) = create_simple_api_key_regex() {
1200 for mat in regex.find_iter(input) {
1201 let match_text = mat.as_str();
1202 println!(" Match: '{}'", match_text);
1203
1204 if let Some(equals_pos) = match_text.find('=') {
1206 let value = &match_text[equals_pos + 1..];
1207 println!(" Value: '{}'", value);
1208
1209 let test_stopwords = ["token", "password", "super", "word"];
1211 for stopword in test_stopwords {
1212 let value_lower = value.to_lowercase();
1213 let stopword_lower = stopword.to_lowercase();
1214
1215 if value_lower == stopword_lower {
1216 println!(" '{}' - Exact match: YES", stopword);
1217 } else if value.len() < 15 && value_lower.contains(&stopword_lower) {
1218 let without_stopword = value_lower.replace(&stopword_lower, "");
1219 let is_simple = without_stopword.chars().all(|c| {
1220 c.is_ascii_digit() || "!@#$%^&*()_+-=[]{}|;:,.<>?".contains(c)
1221 });
1222 println!(
1223 " '{}' - Short+contains: len={}, without='{}', simple={}",
1224 stopword,
1225 value.len(),
1226 without_stopword,
1227 is_simple
1228 );
1229 } else {
1230 println!(" '{}' - No filter", stopword);
1231 }
1232 }
1233 }
1234
1235 if let Some(rule_allowlists) = &generic_rule.allowlists {
1237 for (rule_idx, allowlist) in rule_allowlists.iter().enumerate() {
1238 let allowed = is_allowed_by_rule_allowlist(
1239 input,
1240 None,
1241 match_text,
1242 mat.start(),
1243 mat.end(),
1244 allowlist,
1245 );
1246 println!(" Rule allowlist {}: allowed = {}", rule_idx, allowed);
1247 }
1248 }
1249 }
1250 }
1251 }
1252 }
1253
1254 #[test]
1255 fn test_redact_password_basic() {
1256 let content = "User password is supersecret123 and should be hidden";
1257 let password = "supersecret123";
1258 let result = redact_password(content, password, &HashMap::new());
1259
1260 assert!(!result.redacted_string.contains(password));
1262 assert!(
1263 result
1264 .redacted_string
1265 .contains("[REDACTED_SECRET:password:")
1266 );
1267 assert_eq!(result.redaction_map.len(), 1);
1268
1269 let redacted_password = result.redaction_map.values().next().unwrap();
1271 assert_eq!(redacted_password, password);
1272 }
1273
1274 #[test]
1275 fn test_redact_password_empty() {
1276 let content = "Some content without password";
1277 let password = "";
1278 let result = redact_password(content, password, &HashMap::new());
1279
1280 assert_eq!(result.redacted_string, content);
1282 assert!(result.redaction_map.is_empty());
1283 }
1284
1285 #[test]
1286 fn test_redact_password_multiple_occurrences() {
1287 let content = "Password is mypass123 and again mypass123 appears here";
1288 let password = "mypass123";
1289 let result = redact_password(content, password, &HashMap::new());
1290
1291 assert!(!result.redacted_string.contains(password));
1293 assert_eq!(result.redaction_map.len(), 1);
1294
1295 let redaction_key = result.redaction_map.keys().next().unwrap();
1297 let count = result.redacted_string.matches(redaction_key).count();
1298 assert_eq!(count, 2);
1299 }
1300
1301 #[test]
1302 fn test_redact_password_reuse_existing_key() {
1303 let mut existing_map = HashMap::new();
1305 existing_map.insert(
1306 "[REDACTED_SECRET:password:abc123]".to_string(),
1307 "mypassword".to_string(),
1308 );
1309
1310 let content = "The password mypassword should use existing key";
1311 let password = "mypassword";
1312 let result = redact_password(content, password, &existing_map);
1313
1314 assert_eq!(result.redaction_map.len(), 1);
1316 assert!(
1317 result
1318 .redaction_map
1319 .contains_key("[REDACTED_SECRET:password:abc123]")
1320 );
1321 assert!(
1322 result
1323 .redacted_string
1324 .contains("[REDACTED_SECRET:password:abc123]")
1325 );
1326 }
1327
1328 #[test]
1329 fn test_redact_password_with_existing_different_secrets() {
1330 let mut existing_map = HashMap::new();
1332 existing_map.insert(
1333 "[REDACTED_SECRET:api-key:xyz789]".to_string(),
1334 "some_api_key".to_string(),
1335 );
1336
1337 let content = "API key is some_api_key and password is newpassword123";
1338 let password = "newpassword123";
1339 let result = redact_password(content, password, &existing_map);
1340
1341 assert_eq!(result.redaction_map.len(), 2);
1343 assert!(
1344 result
1345 .redaction_map
1346 .contains_key("[REDACTED_SECRET:api-key:xyz789]")
1347 );
1348 assert!(
1349 result
1350 .redaction_map
1351 .get("[REDACTED_SECRET:api-key:xyz789]")
1352 .unwrap()
1353 == "some_api_key"
1354 );
1355
1356 let new_keys: Vec<_> = result
1358 .redaction_map
1359 .keys()
1360 .filter(|k| k.contains("password"))
1361 .collect();
1362 assert_eq!(new_keys.len(), 1);
1363 let password_key = new_keys[0];
1364 assert_eq!(
1365 result.redaction_map.get(password_key).unwrap(),
1366 "newpassword123"
1367 );
1368 }
1369
1370 #[test]
1371 fn test_redact_password_no_match() {
1372 let content = "This content has no matching password";
1373 let password = "notfound";
1374 let result = redact_password(content, password, &HashMap::new());
1375
1376 assert_eq!(result.redacted_string, content);
1378 assert_eq!(result.redaction_map.len(), 1);
1379 assert_eq!(result.redaction_map.values().next().unwrap(), "notfound");
1380 }
1381
1382 #[test]
1383 fn test_redact_password_integration_with_restore() {
1384 let content = "Login with username admin and password secret456";
1385 let password = "secret456";
1386 let result = redact_password(content, password, &HashMap::new());
1387
1388 assert!(!result.redacted_string.contains(password));
1390 assert!(result.redacted_string.contains("username admin"));
1391
1392 let restored = restore_secrets(&result.redacted_string, &result.redaction_map);
1394 assert_eq!(restored, content);
1395 }
1396
1397 #[test]
1398 fn test_redact_secrets_with_existing_redaction_map() {
1399 let content = "The secret value is mysecretvalue123 and another is anothersecret456";
1401
1402 let result_empty = redact_secrets(content, None, &HashMap::new(), false);
1404
1405 assert!(result_empty.redacted_string.contains("mysecretvalue123"));
1407 let mut existing_redaction_map = HashMap::new();
1409 existing_redaction_map.insert(
1410 "[REDACTED_SECRET:manual:abc123]".to_string(),
1411 "mysecretvalue123".to_string(),
1412 );
1413
1414 let result = redact_secrets(content, None, &existing_redaction_map, false);
1415
1416 assert!(
1418 result
1419 .redacted_string
1420 .contains("[REDACTED_SECRET:manual:abc123]")
1421 );
1422 assert!(!result.redacted_string.contains("mysecretvalue123"));
1423
1424 assert!(
1426 result
1427 .redaction_map
1428 .contains_key("[REDACTED_SECRET:manual:abc123]")
1429 );
1430 assert_eq!(
1431 result
1432 .redaction_map
1433 .get("[REDACTED_SECRET:manual:abc123]")
1434 .unwrap(),
1435 "mysecretvalue123"
1436 );
1437 }
1438
1439 #[test]
1440 fn test_redact_secrets_skip_already_redacted() {
1441 let content = "The password is [REDACTED_SECRET:password:abc123] and API key is [REDACTED_SECRET:api-key:xyz789]";
1443 let result = redact_secrets(content, None, &HashMap::new(), false);
1444
1445 assert_eq!(result.redacted_string, content);
1447 assert!(result.redaction_map.is_empty());
1449 }
1450
1451 #[test]
1452 fn test_redact_password_skip_already_redacted() {
1453 let content = "[REDACTED_SECRET:password:existing123]";
1455 let password = "newpassword";
1456 let result = redact_password(content, password, &HashMap::new());
1457
1458 assert_eq!(result.redacted_string, content);
1460 assert!(result.redaction_map.is_empty());
1462 }
1463
1464 #[test]
1465 fn test_redact_secrets_skip_nested_redaction() {
1466 let original_password = "MySecureP@ssw0rd!";
1468
1469 let first_result = redact_password(original_password, original_password, &HashMap::new());
1471 assert!(
1472 first_result
1473 .redacted_string
1474 .contains("[REDACTED_SECRET:password:")
1475 );
1476
1477 let second_result =
1479 redact_secrets(&first_result.redacted_string, None, &HashMap::new(), false);
1480
1481 assert_eq!(second_result.redacted_string, first_result.redacted_string);
1483 assert!(second_result.redaction_map.is_empty());
1484 }
1485}