1pub mod gitleaks;
2use crate::helper::generate_simple_id;
3pub use gitleaks::initialize_gitleaks_config;
5use gitleaks::{DetectedSecret, detect_secrets};
6use std::collections::HashMap;
7use std::fmt;
8
9#[derive(Debug, Clone)]
11pub struct RedactionResult {
12 pub redacted_string: String,
14 pub redaction_map: HashMap<String, String>,
16}
17
18impl RedactionResult {
19 pub fn new(redacted_string: String, redaction_map: HashMap<String, String>) -> Self {
20 Self {
21 redacted_string,
22 redaction_map,
23 }
24 }
25}
26
27impl fmt::Display for RedactionResult {
28 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
29 write!(f, "{}", self.redacted_string)
30 }
31}
32
33pub fn redact_secrets(
37 content: &str,
38 path: Option<&str>,
39 old_redaction_map: &HashMap<String, String>,
40 privacy_mode: bool,
41) -> RedactionResult {
42 let mut secrets = detect_secrets(content, path, privacy_mode);
43
44 let mut redaction_map = old_redaction_map.clone();
45 let mut reverse_redaction_map: HashMap<String, String> = old_redaction_map
46 .clone()
47 .into_iter()
48 .map(|(k, v)| (v, k))
49 .collect();
50
51 for (original_secret, redaction_key) in &reverse_redaction_map {
52 let key_parts = redaction_key.split(':').collect::<Vec<&str>>();
54 if key_parts.len() == 3 {
55 let rule_id = key_parts[1].to_string();
56 if let Some(start) = content.find(original_secret) {
57 let end = start + original_secret.len();
58 secrets.push(DetectedSecret {
59 rule_id,
60 value: original_secret.clone(),
61 start_pos: start,
62 end_pos: end,
63 });
64 }
65 }
66 }
67
68 if secrets.is_empty() {
69 return RedactionResult::new(content.to_string(), HashMap::new());
70 }
71
72 let mut redacted_string = content.to_string();
73
74 let mut deduplicated_secrets: Vec<DetectedSecret> = Vec::new();
76 let mut sorted_by_start = secrets;
77 sorted_by_start.sort_by(|a, b| a.start_pos.cmp(&b.start_pos));
78
79 for secret in sorted_by_start {
80 let mut should_add = true;
81 let mut to_remove = Vec::new();
82
83 for (i, existing) in deduplicated_secrets.iter().enumerate() {
84 let overlaps =
86 secret.start_pos < existing.end_pos && secret.end_pos > existing.start_pos;
87
88 if overlaps {
89 if secret.value.len() > existing.value.len() {
91 to_remove.push(i);
92 } else {
93 should_add = false;
94 break;
95 }
96 }
97 }
98
99 for &i in to_remove.iter().rev() {
101 deduplicated_secrets.remove(i);
102 }
103
104 if should_add {
105 deduplicated_secrets.push(secret);
106 }
107 }
108
109 deduplicated_secrets.sort_by(|a, b| b.start_pos.cmp(&a.start_pos));
111
112 for secret in deduplicated_secrets {
113 if !content.is_char_boundary(secret.start_pos) || !content.is_char_boundary(secret.end_pos)
115 {
116 continue;
117 }
118
119 if secret.start_pos >= redacted_string.len() || secret.end_pos > redacted_string.len() {
121 continue;
122 }
123
124 let redaction_key = if let Some(existing_key) = reverse_redaction_map.get(&secret.value) {
127 existing_key.clone()
128 } else {
129 let key = generate_redaction_key(&secret.rule_id);
130 redaction_map.insert(key.clone(), secret.value.clone());
132 reverse_redaction_map.insert(secret.value, key.clone());
133 key
134 };
135
136 redacted_string.replace_range(secret.start_pos..secret.end_pos, &redaction_key);
138 }
139
140 RedactionResult::new(redacted_string, redaction_map)
141}
142
143pub fn restore_secrets(redacted_string: &str, redaction_map: &HashMap<String, String>) -> String {
145 let mut restored = redacted_string.to_string();
146
147 for (redaction_key, original_value) in redaction_map {
148 restored = restored.replace(redaction_key, original_value);
149 }
150
151 restored
152}
153
154pub fn redact_password(
156 content: &str,
157 password: &str,
158 old_redaction_map: &HashMap<String, String>,
159) -> RedactionResult {
160 if password.is_empty() {
161 return RedactionResult::new(content.to_string(), HashMap::new());
162 }
163
164 let mut redacted_string = content.to_string();
165 let mut redaction_map = old_redaction_map.clone();
166 let mut reverse_redaction_map: HashMap<String, String> = old_redaction_map
167 .clone()
168 .into_iter()
169 .map(|(k, v)| (v, k))
170 .collect();
171
172 let redaction_key = if let Some(existing_key) = reverse_redaction_map.get(password) {
174 existing_key.clone()
175 } else {
176 let key = generate_redaction_key("password");
177 redaction_map.insert(key.clone(), password.to_string());
179 reverse_redaction_map.insert(password.to_string(), key.clone());
180 key
181 };
182
183 redacted_string = redacted_string.replace(password, &redaction_key);
185
186 RedactionResult::new(redacted_string, redaction_map)
187}
188
189fn generate_redaction_key(rule_id: &str) -> String {
191 let id = generate_simple_id(6);
192 format!("[REDACTED_SECRET:{rule_id}:{id}]")
193}
194
195#[cfg(test)]
196mod tests {
197 use regex::Regex;
198
199 use crate::secrets::gitleaks::{
200 GITLEAKS_CONFIG, calculate_entropy, contains_any_keyword, create_simple_api_key_regex,
201 is_allowed_by_rule_allowlist, should_allow_match,
202 };
203
204 use super::*;
205
206 #[test]
207 fn test_redaction_key_generation() {
208 let key1 = generate_redaction_key("test");
209 let key2 = generate_redaction_key("my-rule");
210
211 assert_ne!(key1, key2);
213
214 assert!(key1.starts_with("[REDACTED_SECRET:test:"));
216 assert!(key1.ends_with("]"));
217 assert!(key2.starts_with("[REDACTED_SECRET:my-rule:"));
218 assert!(key2.ends_with("]"));
219 }
220
221 #[test]
222 fn test_empty_input() {
223 let result = redact_secrets("", None, &HashMap::new(), false);
224 assert_eq!(result.redacted_string, "");
225 assert!(result.redaction_map.is_empty());
226 }
227
228 #[test]
229 fn test_restore_secrets() {
230 let mut redaction_map = HashMap::new();
231 redaction_map.insert("[REDACTED_abc123]".to_string(), "secret123".to_string());
232 redaction_map.insert("[REDACTED_def456]".to_string(), "api_key_xyz".to_string());
233
234 let redacted = "Password is [REDACTED_abc123] and key is [REDACTED_def456]";
235 let restored = restore_secrets(redacted, &redaction_map);
236
237 assert_eq!(restored, "Password is secret123 and key is api_key_xyz");
238 }
239
240 #[test]
241 fn test_redaction_result_display() {
242 let mut redaction_map = HashMap::new();
243 redaction_map.insert("[REDACTED_test]".to_string(), "secret".to_string());
244
245 let result = RedactionResult::new("Hello [REDACTED_test]".to_string(), redaction_map);
246 assert_eq!(format!("{}", result), "Hello [REDACTED_test]");
247 }
248
249 #[test]
250 fn test_redact_secrets_with_api_key() {
251 let input = "export API_KEY=abc123def456ghi789jkl012mno345pqr678";
253 let result = redact_secrets(input, None, &HashMap::new(), false);
254
255 assert!(!result.redaction_map.is_empty());
257 assert!(result.redacted_string.contains("[REDACTED_"));
258 println!("Input: {}", input);
259 println!("Redacted: {}", result.redacted_string);
260 println!("Mapping: {:?}", result.redaction_map);
261 }
262
263 #[test]
264 fn test_redact_secrets_with_aws_key() {
265 let input = "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EX23PLE";
266 let result = redact_secrets(input, None, &HashMap::new(), false);
267
268 assert!(!result.redaction_map.is_empty());
270 println!("Input: {}", input);
271 println!("Redacted: {}", result.redacted_string);
272 println!("Mapping: {:?}", result.redaction_map);
273 }
274
275 #[test]
276 fn test_redaction_identical_secrets() {
277 let input = r#"
278 export AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EX23PLE
279 export AWS_ACCESS_KEY_ID_2=AKIAIOSFODNN7EX23PLE
280 "#;
281 let result = redact_secrets(input, None, &HashMap::new(), false);
282
283 assert_eq!(result.redaction_map.len(), 1);
284 }
285
286 #[test]
287 fn test_redaction_identical_secrets_different_contexts() {
288 let input_1 = r#"
289 export AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EX23PLE
290 "#;
291 let input_2 = r#"
292 export SOME_OTHER_SECRET=AKIAIOSFODNN7EX23PLE
293 "#;
294 let result_1 = redact_secrets(input_1, None, &HashMap::new(), false);
295 let result_2 = redact_secrets(input_2, None, &result_1.redaction_map, false);
296
297 assert_eq!(result_1.redaction_map, result_2.redaction_map);
298 }
299
300 #[test]
301 fn test_redact_secrets_with_github_token() {
302 let input = "GITHUB_TOKEN=ghp_1234567890abcdef1234567890abcdef12345678";
303 let result = redact_secrets(input, None, &HashMap::new(), false);
304
305 assert!(!result.redaction_map.is_empty());
307 println!("Input: {}", input);
308 println!("Redacted: {}", result.redacted_string);
309 println!("Mapping: {:?}", result.redaction_map);
310 }
311
312 #[test]
313 fn test_no_secrets() {
314 let input = "This is just a normal string with no secrets";
315 let result = redact_secrets(input, None, &HashMap::new(), false);
316
317 assert_eq!(result.redaction_map.len(), 0);
319 assert_eq!(result.redacted_string, input);
320 }
321
322 #[test]
323 fn test_debug_generic_api_key() {
324 let config = &*GITLEAKS_CONFIG;
325
326 let generic_rule = config.rules.iter().find(|r| r.id == "generic-api-key");
328 if let Some(rule) = generic_rule {
329 println!("Generic API Key Rule:");
330 println!(" Regex: {:?}", rule.regex);
331 println!(" Entropy: {:?}", rule.entropy);
332 println!(" Keywords: {:?}", rule.keywords);
333
334 if let Some(regex_pattern) = &rule.regex {
336 if let Ok(regex) = Regex::new(regex_pattern) {
337 let test_input = "API_KEY=abc123def456ghi789jkl012mno345pqr678";
338 println!("\nTesting regex directly:");
339 println!(" Input: {}", test_input);
340
341 for mat in regex.find_iter(test_input) {
342 println!(" Raw match: '{}'", mat.as_str());
343 println!(" Match position: {}-{}", mat.start(), mat.end());
344
345 if let Some(captures) = regex.captures(mat.as_str()) {
347 for (i, cap) in captures.iter().enumerate() {
348 if let Some(cap) = cap {
349 println!(" Capture {}: '{}'", i, cap.as_str());
350 if i == 1 {
351 let entropy = calculate_entropy(cap.as_str());
352 println!(" Entropy of capture 1: {:.2}", entropy);
353 }
354 }
355 }
356 }
357 }
358 }
359 } else {
360 println!(" No regex pattern (path-based rule)");
361 }
362
363 let test_inputs = vec![
365 "API_KEY=abc123def456ghi789jkl012mno345pqr678",
366 "api_key=RaNd0mH1ghEnTr0pyV4luE567890abcdef",
367 "access_key=Kx9mP2nQ8rT4vW7yZ3cF6hJ1lN5sA0bD8eF2gH5jK",
368 "secret_token=1234567890abcdef1234567890abcdef",
369 "password=9k2L8pMvB3nQ7rX1ZdF5GhJwY4AsPo6C",
370 ];
371
372 for input in test_inputs {
373 println!("\nTesting input: {}", input);
374 let result = redact_secrets(input, None, &HashMap::new(), false);
375 println!(" Detected secrets: {}", result.redaction_map.len());
376 if !result.redaction_map.is_empty() {
377 println!(" Redacted: {}", result.redacted_string);
378 }
379 }
380 } else {
381 println!("Generic API key rule not found!");
382 }
383 }
384
385 #[test]
386 fn test_simple_regex_match() {
387 let input = "key=abcdefghijklmnop";
389 println!("Testing simple input: {}", input);
390
391 let config = &*GITLEAKS_CONFIG;
392 let generic_rule = config
393 .rules
394 .iter()
395 .find(|r| r.id == "generic-api-key")
396 .unwrap();
397
398 if let Some(regex_pattern) = &generic_rule.regex {
399 if let Ok(regex) = Regex::new(regex_pattern) {
400 println!("Regex pattern: {}", regex_pattern);
401
402 if regex.is_match(input) {
403 println!("✓ Regex MATCHES the input!");
404
405 for mat in regex.find_iter(input) {
406 println!("Match found: '{}'", mat.as_str());
407
408 if let Some(captures) = regex.captures(mat.as_str()) {
409 println!("Full capture groups:");
410 for (i, cap) in captures.iter().enumerate() {
411 if let Some(cap) = cap {
412 println!(" Group {}: '{}'", i, cap.as_str());
413 if i == 1 {
414 let entropy = calculate_entropy(cap.as_str());
415 println!(" Entropy: {:.2} (threshold: 3.5)", entropy);
416 }
417 }
418 }
419 }
420 }
421 } else {
422 println!("✗ Regex does NOT match the input");
423 }
424 }
425 } else {
426 println!("Rule has no regex pattern (path-based rule)");
427 }
428
429 let result = redact_secrets(input, None, &HashMap::new(), false);
431 println!(
432 "Full function result: {} secrets detected",
433 result.redaction_map.len()
434 );
435 }
436
437 #[test]
438 fn test_regex_breakdown() {
439 let config = &*GITLEAKS_CONFIG;
440 let generic_rule = config
441 .rules
442 .iter()
443 .find(|r| r.id == "generic-api-key")
444 .unwrap();
445
446 if let Some(regex_pattern) = &generic_rule.regex {
447 println!("Full regex: {}", regex_pattern);
448
449 let test_inputs = vec![
451 "key=abcdefghijklmnop",
452 "api_key=abcdefghijklmnop",
453 "secret=abcdefghijklmnop",
454 "token=abcdefghijklmnop",
455 "password=abcdefghijklmnop",
456 "access_key=abcdefghijklmnop",
457 ];
458
459 for input in test_inputs {
460 println!("\nTesting: '{}'", input);
461
462 if let Ok(regex) = Regex::new(regex_pattern) {
464 let matches: Vec<_> = regex.find_iter(input).collect();
465 println!(" Matches found: {}", matches.len());
466
467 for (i, mat) in matches.iter().enumerate() {
468 println!(" Match {}: '{}'", i, mat.as_str());
469
470 if let Some(captures) = regex.captures(mat.as_str()) {
472 for (j, cap) in captures.iter().enumerate() {
473 if let Some(cap) = cap {
474 println!(" Capture {}: '{}'", j, cap.as_str());
475 if j == 1 {
476 let entropy = calculate_entropy(cap.as_str());
477 println!(" Entropy: {:.2} (threshold: 3.5)", entropy);
478 if entropy >= 3.5 {
479 println!(" ✓ Entropy check PASSED");
480 } else {
481 println!(" ✗ Entropy check FAILED");
482 }
483 }
484 }
485 }
486 }
487 }
488 }
489 }
490 } else {
491 println!("Rule has no regex pattern (path-based rule)");
492 }
493
494 println!("\nTesting AWS pattern that we know works:");
496 let aws_input = "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE";
497 println!("Input: {}", aws_input);
498
499 let aws_rule = config
500 .rules
501 .iter()
502 .find(|r| r.id == "aws-access-token")
503 .unwrap();
504 if let Some(aws_regex_pattern) = &aws_rule.regex {
505 if let Ok(regex) = Regex::new(aws_regex_pattern) {
506 for mat in regex.find_iter(aws_input) {
507 println!("AWS Match: '{}'", mat.as_str());
508 if let Some(captures) = regex.captures(mat.as_str()) {
509 for (i, cap) in captures.iter().enumerate() {
510 if let Some(cap) = cap {
511 println!(" AWS Capture {}: '{}'", i, cap.as_str());
512 }
513 }
514 }
515 }
516 }
517 } else {
518 println!("AWS rule has no regex pattern");
519 }
520 }
521
522 #[test]
523 fn test_working_api_key_patterns() {
524 let config = &*GITLEAKS_CONFIG;
525 let generic_rule = config
526 .rules
527 .iter()
528 .find(|r| r.id == "generic-api-key")
529 .unwrap();
530
531 let regex = generic_rule
533 .compiled_regex
534 .as_ref()
535 .expect("Regex should be compiled");
536
537 let test_inputs = vec![
539 "myapp_api_key = \"abc123def456ghi789jklmnop\"",
541 "export SECRET_TOKEN=Kx9mP2nQ8rT4vW7yZ3cF6hJ1lN5sA0bD8eF",
542 "app.auth.password: 9k2L8pMvB3nQ7rX1ZdF5GhJwY4AsPo6C8mN",
543 "config.access_key=\"RaNd0mH1ghEnTr0pyV4luE567890abcdef\";",
544 "DB_CREDENTIALS=xy9mP2nQ8rT4vW7yZ3cF6hJ1lN5sAdefghij",
545 ];
546
547 for input in test_inputs {
548 println!("\nTesting: '{}'", input);
549
550 let matches: Vec<_> = regex.find_iter(input).collect();
551 println!(" Matches found: {}", matches.len());
552
553 for (i, mat) in matches.iter().enumerate() {
554 println!(" Match {}: '{}'", i, mat.as_str());
555
556 if let Some(captures) = regex.captures(mat.as_str()) {
557 for (j, cap) in captures.iter().enumerate() {
558 if let Some(cap) = cap {
559 println!(" Capture {}: '{}'", j, cap.as_str());
560 if j == 1 {
561 let entropy = calculate_entropy(cap.as_str());
562 println!(" Entropy: {:.2} (threshold: 3.5)", entropy);
563
564 let allowed = should_allow_match(
566 input,
567 None,
568 mat.as_str(),
569 mat.start(),
570 mat.end(),
571 generic_rule,
572 &config.allowlist,
573 );
574 println!(" Allowed by allowlist: {}", allowed);
575 }
576 }
577 }
578 }
579 }
580
581 let result = redact_secrets(input, None, &HashMap::new(), false);
583 println!(
584 " Full function detected: {} secrets",
585 result.redaction_map.len()
586 );
587 if !result.redaction_map.is_empty() {
588 println!(" Redacted result: {}", result.redacted_string);
589 }
590 }
591 }
592
593 #[test]
594 fn test_regex_components() {
595 let test_input = "export API_KEY=Kx9mP2nQ8rT4vW7yZ3cF6hJ1lN5sA0bD8eF";
597 println!("Testing input: {}", test_input);
598
599 let test_patterns = vec![
601 (r"API_KEY", "Simple keyword match"),
602 (r"(?i)api_key", "Case insensitive keyword"),
603 (r"(?i).*key.*", "Any text with 'key'"),
604 (r"(?i).*key\s*=", "Key with equals"),
605 (r"(?i).*key\s*=\s*\w+", "Key with value"),
606 (
607 r"(?i)[\w.-]*(?:key).*?=.*?(\w{10,})",
608 "Complex pattern with capture",
609 ),
610 ];
611
612 for (pattern, description) in test_patterns {
613 println!("\nTesting pattern: {} ({})", pattern, description);
614
615 match Regex::new(pattern) {
616 Ok(regex) => {
617 if regex.is_match(test_input) {
618 println!(" ✓ MATCHES");
619 for mat in regex.find_iter(test_input) {
620 println!(" Full match: '{}'", mat.as_str());
621 }
622 if let Some(captures) = regex.captures(test_input) {
623 for (i, cap) in captures.iter().enumerate() {
624 if let Some(cap) = cap {
625 println!(" Capture {}: '{}'", i, cap.as_str());
626 }
627 }
628 }
629 } else {
630 println!(" ✗ NO MATCH");
631 }
632 }
633 Err(e) => println!(" Error: {}", e),
634 }
635 }
636
637 let config = &*GITLEAKS_CONFIG;
639 let generic_rule = config
640 .rules
641 .iter()
642 .find(|r| r.id == "generic-api-key")
643 .unwrap();
644
645 println!("\nTesting actual gitleaks regex:");
646 if let Some(regex_pattern) = &generic_rule.regex {
647 match Regex::new(regex_pattern) {
648 Ok(regex) => {
649 println!(" ✓ Regex compiles successfully");
650 println!(" Testing against: {}", test_input);
651 if regex.is_match(test_input) {
652 println!(" ✓ MATCHES");
653 } else {
654 println!(" ✗ NO MATCH");
655 }
656 }
657 Err(e) => println!(" ✗ Regex compilation error: {}", e),
658 }
659 } else {
660 println!(" Rule has no regex pattern (path-based rule)");
661 }
662 }
663
664 #[test]
665 fn test_comprehensive_secrets_redaction() {
666 let input = r#"
667# Configuration file with various secrets
668export AWS_ACCESS_KEY_ID=AKIAIOSFODNN7REALKEY
669export GITHUB_TOKEN=ghp_1234567890abcdef1234567890abcdef12345678
670export API_KEY=abc123def456ghi789jklmnop
671export SECRET_TOKEN=Kx9mP2nQ8rT4vW7yZ3cF6hJ1lN5sA0bD8eF
672export PASSWORD=supersecretpassword123456
673
674# Some normal configuration
675export DEBUG=true
676export PORT=3000
677"#;
678
679 println!("Original input:\n{}", input);
680
681 let result = redact_secrets(input, None, &HashMap::new(), false);
682
683 println!("Redacted output:\n{}", result.redacted_string);
684 println!("\nDetected {} secrets:", result.redaction_map.len());
685 for (key, value) in &result.redaction_map {
686 println!(" {} -> {}", key, value);
687 }
688
689 assert!(
691 result.redaction_map.len() >= 5,
692 "Should detect at least 5 secrets, found: {}",
693 result.redaction_map.len()
694 );
695
696 assert!(!result.redacted_string.contains("AKIAIOSFODNN7REALKEY"));
698 assert!(
699 !result
700 .redacted_string
701 .contains("ghp_1234567890abcdef1234567890abcdef12345678")
702 );
703 assert!(!result.redacted_string.contains("abc123def456ghi789jklmnop"));
704
705 assert!(result.redacted_string.contains("DEBUG=true"));
707 assert!(result.redacted_string.contains("PORT=3000"));
708 }
709
710 fn count_rules_that_would_process(input: &str) -> Vec<String> {
712 let config = &*GITLEAKS_CONFIG;
713 let mut rules = Vec::new();
714
715 for rule in &config.rules {
716 if rule.keywords.is_empty() || contains_any_keyword(input, &rule.keywords) {
717 rules.push(rule.id.clone());
718 }
719 }
720
721 rules
722 }
723
724 #[test]
725 fn test_keyword_filtering() {
726 println!("=== TESTING KEYWORD FILTERING ===");
727
728 let config = &*GITLEAKS_CONFIG;
729
730 let generic_rule = config
732 .rules
733 .iter()
734 .find(|r| r.id == "generic-api-key")
735 .unwrap();
736 println!("Generic API Key rule keywords: {:?}", generic_rule.keywords);
737
738 let input_with_keywords = "export API_KEY=abc123def456ghi789jklmnop";
740 let result1 = redact_secrets(input_with_keywords, None, &HashMap::new(), false);
741 println!("\nTest 1 - Input WITH keywords:");
742 println!(" Input: {}", input_with_keywords);
743 println!(
744 " Keywords present: {}",
745 contains_any_keyword(input_with_keywords, &generic_rule.keywords)
746 );
747 println!(" Secrets detected: {}", result1.redaction_map.len());
748
749 let input_without_keywords = "export DATABASE_URL=postgresql://user:pass@localhost/db";
751 let result2 = redact_secrets(input_without_keywords, None, &HashMap::new(), false);
752 println!("\nTest 2 - Input WITHOUT generic-api-key keywords:");
753 println!(" Input: {}", input_without_keywords);
754 println!(
755 " Keywords present: {}",
756 contains_any_keyword(input_without_keywords, &generic_rule.keywords)
757 );
758 println!(" Secrets detected: {}", result2.redaction_map.len());
759
760 let aws_rule = config
762 .rules
763 .iter()
764 .find(|r| r.id == "aws-access-token")
765 .unwrap();
766 let aws_input = "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE";
767 let result3 = redact_secrets(aws_input, None, &HashMap::new(), false);
768 println!("\nTest 3 - AWS input:");
769 println!(" Input: {}", aws_input);
770 println!(" AWS rule keywords: {:?}", aws_rule.keywords);
771 println!(
772 " Keywords present: {}",
773 contains_any_keyword(aws_input, &aws_rule.keywords)
774 );
775 println!(" Secrets detected: {}", result3.redaction_map.len());
776
777 assert!(
779 contains_any_keyword(input_with_keywords, &generic_rule.keywords),
780 "API_KEY input should contain generic-api-key keywords"
781 );
782 assert!(
783 !contains_any_keyword(input_without_keywords, &generic_rule.keywords),
784 "DATABASE_URL input should NOT contain generic-api-key keywords"
785 );
786 assert!(
787 contains_any_keyword(aws_input, &aws_rule.keywords),
788 "AWS input should contain AWS rule keywords"
789 );
790 }
791
792 #[test]
793 fn test_keyword_optimization_performance() {
794 println!("=== TESTING KEYWORD OPTIMIZATION PERFORMANCE ===");
795
796 let config = &*GITLEAKS_CONFIG;
797
798 let no_keywords_input = "export DATABASE_CONNECTION=some_long_connection_string_that_has_no_common_secret_keywords";
800 println!("Testing input with no secret keywords:");
801 println!(" Input: {}", no_keywords_input);
802
803 let mut keyword_matches = 0;
804 for rule in &config.rules {
805 if contains_any_keyword(no_keywords_input, &rule.keywords) {
806 keyword_matches += 1;
807 println!(" Rule '{}' keywords match: {:?}", rule.id, rule.keywords);
808 }
809 }
810 println!(
811 " Rules with matching keywords: {} out of {}",
812 keyword_matches,
813 config.rules.len()
814 );
815
816 let result = redact_secrets(no_keywords_input, None, &HashMap::new(), false);
817 println!(" Secrets detected: {}", result.redaction_map.len());
818
819 let specific_keywords_input = "export GITHUB_TOKEN=ghp_1234567890abcdef";
821 println!("\nTesting input with specific keywords (github):");
822 println!(" Input: {}", specific_keywords_input);
823
824 let mut matching_rules = Vec::new();
825 for rule in &config.rules {
826 if contains_any_keyword(specific_keywords_input, &rule.keywords) {
827 matching_rules.push(&rule.id);
828 }
829 }
830 println!(" Rules that would be processed: {:?}", matching_rules);
831
832 let result = redact_secrets(specific_keywords_input, None, &HashMap::new(), false);
833 println!(" Secrets detected: {}", result.redaction_map.len());
834
835 let rules_without_keywords: Vec<_> = config
837 .rules
838 .iter()
839 .filter(|rule| rule.keywords.is_empty())
840 .collect();
841 println!(
842 "\nRules without keywords (always processed): {}",
843 rules_without_keywords.len()
844 );
845 for rule in &rules_without_keywords {
846 println!(" - {}", rule.id);
847 }
848
849 assert!(
851 keyword_matches < config.rules.len(),
852 "Input with no keywords should not match all rules"
853 );
854 assert!(
855 !matching_rules.is_empty(),
856 "GitHub token input should match some rules"
857 );
858 assert!(
859 matching_rules.contains(&&"github-pat".to_string())
860 || matching_rules
861 .iter()
862 .any(|rule_id| rule_id.contains("github")),
863 "GitHub token should match GitHub-related rules"
864 );
865 }
866
867 #[test]
868 fn test_keyword_filtering_efficiency() {
869 println!("=== KEYWORD FILTERING EFFICIENCY TEST ===");
870
871 let config = &*GITLEAKS_CONFIG;
872 println!("Total rules in config: {}", config.rules.len());
873
874 let non_secret_input = "export DATABASE_URL=localhost PORT=3000 DEBUG=true TIMEOUT=30";
876 println!("\nTesting non-secret input: {}", non_secret_input);
877
878 let mut rules_skipped = 0;
879 let mut rules_processed = 0;
880
881 for rule in &config.rules {
882 if rule.keywords.is_empty() || contains_any_keyword(non_secret_input, &rule.keywords) {
883 rules_processed += 1;
884 } else {
885 rules_skipped += 1;
886 }
887 }
888
889 println!(
890 " Rules skipped due to keyword filtering: {}",
891 rules_skipped
892 );
893 println!(" Rules that would be processed: {}", rules_processed);
894 println!(
895 " Efficiency gain: {:.1}% of rules skipped",
896 (rules_skipped as f64 / config.rules.len() as f64) * 100.0
897 );
898
899 let result = redact_secrets(non_secret_input, None, &HashMap::new(), false);
901 println!(" Secrets detected: {}", result.redaction_map.len());
902
903 let secret_input =
905 "export API_KEY=abc123def456ghi789jklmnop SECRET_TOKEN=xyz789uvw012rst345def678";
906 println!("\nTesting input WITH secret keywords:");
907 println!(" Input: {}", secret_input);
908
909 let mut rules_with_keywords = 0;
910 for rule in &config.rules {
911 if contains_any_keyword(secret_input, &rule.keywords) {
912 rules_with_keywords += 1;
913 }
914 }
915
916 println!(" Rules that match keywords: {}", rules_with_keywords);
917
918 let result = redact_secrets(secret_input, None, &HashMap::new(), false);
919 println!(" Secrets detected: {}", result.redaction_map.len());
920
921 assert!(
923 rules_skipped > 0,
924 "Should skip at least some rules for non-secret input"
925 );
926 assert!(
927 rules_with_keywords > 0,
928 "Should find matching rules for secret input"
929 );
930 assert!(
931 !result.redaction_map.is_empty(),
932 "Should detect at least one secret"
933 );
934 }
935
936 #[test]
937 fn test_keyword_validation_summary() {
938 println!("=== KEYWORD VALIDATION SUMMARY ===");
939
940 let config = &*GITLEAKS_CONFIG;
941 let total_rules = config.rules.len();
942 println!("Total rules in gitleaks config: {}", total_rules);
943
944 let no_keyword_input = "export DATABASE_URL=localhost PORT=3000";
946 println!("\n--- No keywords - should skip all rules ---");
947 println!("Input: {}", no_keyword_input);
948
949 let no_keyword_rules = count_rules_that_would_process(no_keyword_input);
950 println!(
951 "Rules that would be processed: {} out of {}",
952 no_keyword_rules.len(),
953 total_rules
954 );
955 println!(" Rules: {:?}", no_keyword_rules);
956
957 let no_keyword_secrets = detect_secrets(no_keyword_input, None, false);
958 println!(
959 "Secrets detected: {} (expected: 0)",
960 no_keyword_secrets.len()
961 );
962 assert_eq!(no_keyword_secrets.len(), 0, "Should not detect any secrets");
963 println!("✅ Test passed");
964
965 let api_input = "export API_KEY=abc123def456ghi789jklmnop";
967 println!("\n--- API keyword - should process generic-api-key rule ---");
968 println!("Input: {}", api_input);
969
970 let api_rules = count_rules_that_would_process(api_input);
971 println!(
972 "Rules that would be processed: {} out of {}",
973 api_rules.len(),
974 total_rules
975 );
976 println!(" Rules: {:?}", api_rules);
977
978 let api_secrets = detect_secrets(api_input, None, false);
979 println!("Secrets detected: {} (expected: 1)", api_secrets.len());
980 assert!(!api_secrets.is_empty(), "Should detect at least 1 secrets");
981 println!("✅ Test passed");
982
983 let aws_input = "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7REALKEY";
986 println!("\n--- AWS keyword - should process aws-access-token rule ---");
987 println!("Input: {}", aws_input);
988
989 let aws_rules = count_rules_that_would_process(aws_input);
990 println!(
991 "Rules that would be processed: {} out of {}",
992 aws_rules.len(),
993 total_rules
994 );
995 println!(" Rules: {:?}", aws_rules);
996
997 let aws_secrets = detect_secrets(aws_input, None, false);
998 println!("Secrets detected: {} (expected: 1)", aws_secrets.len());
999
1000 assert!(!aws_secrets.is_empty(), "Should detect at least 1 secrets");
1002 println!("✅ Test passed");
1003 }
1004
1005 #[test]
1006 fn test_debug_missing_secrets() {
1007 println!("=== DEBUGGING MISSING SECRETS ===");
1008
1009 let test_cases = vec![
1010 "SECRET_TOKEN=Kx9mP2nQ8rT4vW7yZ3cF6hJ1lN5sA0bD8eF",
1011 "PASSWORD=supersecretpassword123456",
1012 ];
1013
1014 for input in test_cases {
1015 println!("\nTesting: {}", input);
1016
1017 let parts: Vec<&str> = input.split('=').collect();
1019 if parts.len() == 2 {
1020 let secret_value = parts[1];
1021 let entropy = calculate_entropy(secret_value);
1022 println!(" Secret value: '{}'", secret_value);
1023 println!(" Entropy: {:.2} (threshold: 3.5)", entropy);
1024
1025 if entropy >= 3.5 {
1026 println!(" ✓ Entropy check PASSED");
1027 } else {
1028 println!(" ✗ Entropy check FAILED - this is why it's not detected");
1029 }
1030 }
1031
1032 if let Ok(regex) = create_simple_api_key_regex() {
1034 println!(" Testing fallback regex:");
1035 if regex.is_match(input) {
1036 println!(" ✓ Fallback regex MATCHES");
1037 for mat in regex.find_iter(input) {
1038 println!(" Match: '{}'", mat.as_str());
1039 if let Some(captures) = regex.captures(mat.as_str()) {
1040 for (i, cap) in captures.iter().enumerate() {
1041 if let Some(cap) = cap {
1042 println!(" Capture {}: '{}'", i, cap.as_str());
1043 }
1044 }
1045 }
1046
1047 let config = &*GITLEAKS_CONFIG;
1049 let generic_rule = config
1050 .rules
1051 .iter()
1052 .find(|r| r.id == "generic-api-key")
1053 .unwrap();
1054 let allowed = should_allow_match(
1055 input,
1056 None,
1057 mat.as_str(),
1058 mat.start(),
1059 mat.end(),
1060 generic_rule,
1061 &config.allowlist,
1062 );
1063 println!(" Allowed by allowlist: {}", allowed);
1064 if allowed {
1065 println!(
1066 " ✗ FILTERED OUT by allowlist - this is why it's not detected"
1067 );
1068 }
1069 }
1070 } else {
1071 println!(" ✗ Fallback regex does NOT match");
1072 }
1073 }
1074
1075 let result = redact_secrets(input, None, &HashMap::new(), false);
1077 println!(
1078 " Full detection result: {} secrets",
1079 result.redaction_map.len()
1080 );
1081 }
1082 }
1083
1084 #[test]
1085 fn test_debug_allowlist_filtering() {
1086 println!("=== DEBUGGING ALLOWLIST FILTERING ===");
1087
1088 let test_cases = vec![
1089 "SECRET_TOKEN=Kx9mP2nQ8rT4vW7yZ3cF6hJ1lN5sA0bD8eF",
1090 "PASSWORD=supersecretpassword123456",
1091 ];
1092
1093 let config = &*GITLEAKS_CONFIG;
1094 let generic_rule = config
1095 .rules
1096 .iter()
1097 .find(|r| r.id == "generic-api-key")
1098 .unwrap();
1099
1100 for input in test_cases {
1101 println!("\nAnalyzing: {}", input);
1102
1103 if let Ok(regex) = create_simple_api_key_regex() {
1104 for mat in regex.find_iter(input) {
1105 let match_text = mat.as_str();
1106 println!(" Match: '{}'", match_text);
1107
1108 if let Some(global_allowlist) = &config.allowlist {
1110 println!(" Checking global allowlist:");
1111
1112 if let Some(regexes) = &global_allowlist.regexes {
1114 for (i, pattern) in regexes.iter().enumerate() {
1115 if let Ok(regex) = Regex::new(pattern)
1116 && regex.is_match(match_text)
1117 {
1118 println!(" ✗ FILTERED by global regex {}: '{}'", i, pattern);
1119 }
1120 }
1121 }
1122
1123 if let Some(stopwords) = &global_allowlist.stopwords {
1125 for stopword in stopwords {
1126 if match_text.to_lowercase().contains(&stopword.to_lowercase()) {
1127 println!(" ✗ FILTERED by global stopword: '{}'", stopword);
1128 }
1129 }
1130 }
1131 }
1132
1133 if let Some(rule_allowlists) = &generic_rule.allowlists {
1135 for (rule_idx, allowlist) in rule_allowlists.iter().enumerate() {
1136 println!(" Checking rule allowlist {}:", rule_idx);
1137
1138 if let Some(regexes) = &allowlist.regexes {
1140 for (i, pattern) in regexes.iter().enumerate() {
1141 if let Ok(regex) = Regex::new(pattern)
1142 && regex.is_match(match_text)
1143 {
1144 println!(
1145 " ✗ FILTERED by rule regex {}: '{}'",
1146 i, pattern
1147 );
1148 }
1149 }
1150 }
1151
1152 if let Some(stopwords) = &allowlist.stopwords {
1154 for stopword in stopwords {
1155 if match_text.to_lowercase().contains(&stopword.to_lowercase())
1156 {
1157 println!(" ✗ FILTERED by rule stopword: '{}'", stopword);
1158 }
1159 }
1160 }
1161 }
1162 }
1163 }
1164 }
1165 }
1166 }
1167
1168 #[test]
1169 fn test_debug_new_allowlist_logic() {
1170 println!("=== DEBUGGING NEW ALLOWLIST LOGIC ===");
1171
1172 let test_cases = vec![
1173 "SECRET_TOKEN=Kx9mP2nQ8rT4vW7yZ3cF6hJ1lN5sA0bD8eF",
1174 "PASSWORD=supersecretpassword123456",
1175 "PASSWORD=password123", "API_KEY=example_key", ];
1178
1179 let config = &*GITLEAKS_CONFIG;
1180 let generic_rule = config
1181 .rules
1182 .iter()
1183 .find(|r| r.id == "generic-api-key")
1184 .unwrap();
1185
1186 for input in test_cases {
1187 println!("\nTesting: {}", input);
1188
1189 if let Ok(regex) = create_simple_api_key_regex() {
1190 for mat in regex.find_iter(input) {
1191 let match_text = mat.as_str();
1192 println!(" Match: '{}'", match_text);
1193
1194 if let Some(equals_pos) = match_text.find('=') {
1196 let value = &match_text[equals_pos + 1..];
1197 println!(" Value: '{}'", value);
1198
1199 let test_stopwords = ["token", "password", "super", "word"];
1201 for stopword in test_stopwords {
1202 let value_lower = value.to_lowercase();
1203 let stopword_lower = stopword.to_lowercase();
1204
1205 if value_lower == stopword_lower {
1206 println!(" '{}' - Exact match: YES", stopword);
1207 } else if value.len() < 15 && value_lower.contains(&stopword_lower) {
1208 let without_stopword = value_lower.replace(&stopword_lower, "");
1209 let is_simple = without_stopword.chars().all(|c| {
1210 c.is_ascii_digit() || "!@#$%^&*()_+-=[]{}|;:,.<>?".contains(c)
1211 });
1212 println!(
1213 " '{}' - Short+contains: len={}, without='{}', simple={}",
1214 stopword,
1215 value.len(),
1216 without_stopword,
1217 is_simple
1218 );
1219 } else {
1220 println!(" '{}' - No filter", stopword);
1221 }
1222 }
1223 }
1224
1225 if let Some(rule_allowlists) = &generic_rule.allowlists {
1227 for (rule_idx, allowlist) in rule_allowlists.iter().enumerate() {
1228 let allowed = is_allowed_by_rule_allowlist(
1229 input,
1230 None,
1231 match_text,
1232 mat.start(),
1233 mat.end(),
1234 allowlist,
1235 );
1236 println!(" Rule allowlist {}: allowed = {}", rule_idx, allowed);
1237 }
1238 }
1239 }
1240 }
1241 }
1242 }
1243
1244 #[test]
1245 fn test_redact_password_basic() {
1246 let content = "User password is supersecret123 and should be hidden";
1247 let password = "supersecret123";
1248 let result = redact_password(content, password, &HashMap::new());
1249
1250 assert!(!result.redacted_string.contains(password));
1252 assert!(
1253 result
1254 .redacted_string
1255 .contains("[REDACTED_SECRET:password:")
1256 );
1257 assert_eq!(result.redaction_map.len(), 1);
1258
1259 let redacted_password = result.redaction_map.values().next().unwrap();
1261 assert_eq!(redacted_password, password);
1262 }
1263
1264 #[test]
1265 fn test_redact_password_empty() {
1266 let content = "Some content without password";
1267 let password = "";
1268 let result = redact_password(content, password, &HashMap::new());
1269
1270 assert_eq!(result.redacted_string, content);
1272 assert!(result.redaction_map.is_empty());
1273 }
1274
1275 #[test]
1276 fn test_redact_password_multiple_occurrences() {
1277 let content = "Password is mypass123 and again mypass123 appears here";
1278 let password = "mypass123";
1279 let result = redact_password(content, password, &HashMap::new());
1280
1281 assert!(!result.redacted_string.contains(password));
1283 assert_eq!(result.redaction_map.len(), 1);
1284
1285 let redaction_key = result.redaction_map.keys().next().unwrap();
1287 let count = result.redacted_string.matches(redaction_key).count();
1288 assert_eq!(count, 2);
1289 }
1290
1291 #[test]
1292 fn test_redact_password_reuse_existing_key() {
1293 let mut existing_map = HashMap::new();
1295 existing_map.insert(
1296 "[REDACTED_SECRET:password:abc123]".to_string(),
1297 "mypassword".to_string(),
1298 );
1299
1300 let content = "The password mypassword should use existing key";
1301 let password = "mypassword";
1302 let result = redact_password(content, password, &existing_map);
1303
1304 assert_eq!(result.redaction_map.len(), 1);
1306 assert!(
1307 result
1308 .redaction_map
1309 .contains_key("[REDACTED_SECRET:password:abc123]")
1310 );
1311 assert!(
1312 result
1313 .redacted_string
1314 .contains("[REDACTED_SECRET:password:abc123]")
1315 );
1316 }
1317
1318 #[test]
1319 fn test_redact_password_with_existing_different_secrets() {
1320 let mut existing_map = HashMap::new();
1322 existing_map.insert(
1323 "[REDACTED_SECRET:api-key:xyz789]".to_string(),
1324 "some_api_key".to_string(),
1325 );
1326
1327 let content = "API key is some_api_key and password is newpassword123";
1328 let password = "newpassword123";
1329 let result = redact_password(content, password, &existing_map);
1330
1331 assert_eq!(result.redaction_map.len(), 2);
1333 assert!(
1334 result
1335 .redaction_map
1336 .contains_key("[REDACTED_SECRET:api-key:xyz789]")
1337 );
1338 assert!(
1339 result
1340 .redaction_map
1341 .get("[REDACTED_SECRET:api-key:xyz789]")
1342 .unwrap()
1343 == "some_api_key"
1344 );
1345
1346 let new_keys: Vec<_> = result
1348 .redaction_map
1349 .keys()
1350 .filter(|k| k.contains("password"))
1351 .collect();
1352 assert_eq!(new_keys.len(), 1);
1353 let password_key = new_keys[0];
1354 assert_eq!(
1355 result.redaction_map.get(password_key).unwrap(),
1356 "newpassword123"
1357 );
1358 }
1359
1360 #[test]
1361 fn test_redact_password_no_match() {
1362 let content = "This content has no matching password";
1363 let password = "notfound";
1364 let result = redact_password(content, password, &HashMap::new());
1365
1366 assert_eq!(result.redacted_string, content);
1368 assert_eq!(result.redaction_map.len(), 1);
1369 assert_eq!(result.redaction_map.values().next().unwrap(), "notfound");
1370 }
1371
1372 #[test]
1373 fn test_redact_password_integration_with_restore() {
1374 let content = "Login with username admin and password secret456";
1375 let password = "secret456";
1376 let result = redact_password(content, password, &HashMap::new());
1377
1378 assert!(!result.redacted_string.contains(password));
1380 assert!(result.redacted_string.contains("username admin"));
1381
1382 let restored = restore_secrets(&result.redacted_string, &result.redaction_map);
1384 assert_eq!(restored, content);
1385 }
1386
1387 #[test]
1388 fn test_redact_secrets_with_existing_redaction_map() {
1389 let content = "The secret value is mysecretvalue123 and another is anothersecret456";
1391
1392 let result_empty = redact_secrets(content, None, &HashMap::new(), false);
1394
1395 assert!(result_empty.redacted_string.contains("mysecretvalue123"));
1397 let mut existing_redaction_map = HashMap::new();
1399 existing_redaction_map.insert(
1400 "[REDACTED_SECRET:manual:abc123]".to_string(),
1401 "mysecretvalue123".to_string(),
1402 );
1403
1404 let result = redact_secrets(content, None, &existing_redaction_map, false);
1405
1406 assert!(
1408 result
1409 .redacted_string
1410 .contains("[REDACTED_SECRET:manual:abc123]")
1411 );
1412 assert!(!result.redacted_string.contains("mysecretvalue123"));
1413
1414 assert!(
1416 result
1417 .redaction_map
1418 .contains_key("[REDACTED_SECRET:manual:abc123]")
1419 );
1420 assert_eq!(
1421 result
1422 .redaction_map
1423 .get("[REDACTED_SECRET:manual:abc123]")
1424 .unwrap(),
1425 "mysecretvalue123"
1426 );
1427 }
1428}