1use regex::Regex;
3use serde::{Deserialize, Serialize};
4use std::sync::LazyLock;
5
6#[derive(Debug, Deserialize, Clone)]
7pub struct GitleaksConfig {
8 #[allow(dead_code)]
9 pub title: Option<String>,
10 pub allowlist: Option<Allowlist>,
11 pub rules: Vec<Rule>,
12}
13
14#[derive(Debug, Deserialize, Clone)]
15pub struct Allowlist {
16 #[allow(dead_code)]
17 pub description: Option<String>,
18 #[allow(dead_code)]
19 pub paths: Option<Vec<String>>,
20 pub regexes: Option<Vec<String>>,
21 pub stopwords: Option<Vec<String>>,
22 #[serde(skip)]
24 pub compiled_regexes: Vec<Regex>,
25}
26
27#[derive(Debug, Deserialize, Clone)]
28pub struct Rule {
29 pub id: String,
30 #[allow(dead_code)]
31 pub description: String,
32 pub regex: Option<String>,
33 pub entropy: Option<f64>,
34 #[serde(default)]
35 pub keywords: Vec<String>,
36 #[allow(dead_code)]
37 pub path: Option<String>,
38 pub allowlists: Option<Vec<RuleAllowlist>>,
39 #[serde(skip)]
41 pub compiled_regex: Option<Regex>,
42}
43
44#[derive(Debug, Deserialize, Clone)]
45pub struct RuleAllowlist {
46 #[allow(dead_code)]
47 pub description: Option<String>,
48 pub condition: Option<String>, pub paths: Option<Vec<String>>,
50 pub regexes: Option<Vec<String>>,
51 pub stopwords: Option<Vec<String>>,
52 #[serde(rename = "regexTarget")]
53 pub regex_target: Option<String>, #[serde(skip)]
56 pub compiled_regexes: Vec<Regex>,
57}
58
59#[derive(Debug, Clone)]
61pub struct DetectedSecret {
62 pub rule_id: String,
64 pub value: String,
66 pub start_pos: usize,
68 pub end_pos: usize,
70}
71
72#[derive(Debug, Default, Serialize)]
73pub struct CompilationErrors {
74 pub regex_errors: Vec<(String, String)>, pub warnings: Vec<String>,
76}
77
78impl CompilationErrors {
79 pub fn add_regex_error(&mut self, rule_id: String, error: String) {
80 self.regex_errors.push((rule_id, error));
81 }
82
83 pub fn add_warning(&mut self, warning: String) {
84 self.warnings.push(warning);
85 }
86
87 #[allow(dead_code)]
88 pub fn is_empty(&self) -> bool {
89 self.regex_errors.is_empty() && self.warnings.is_empty()
90 }
91}
92
93pub trait RegexCompilable {
95 fn compile_regexes(&mut self) -> CompilationErrors;
96}
97
98impl RegexCompilable for Allowlist {
99 fn compile_regexes(&mut self) -> CompilationErrors {
100 let mut errors = CompilationErrors::default();
101 self.compiled_regexes.clear();
102
103 if let Some(regexes) = &self.regexes {
104 for pattern in regexes {
105 match Regex::new(pattern) {
106 Ok(regex) => self.compiled_regexes.push(regex),
107 Err(e) => errors.add_warning(format!(
108 "Failed to compile allowlist regex '{}': {}",
109 pattern, e
110 )),
111 }
112 }
113 }
114
115 errors
116 }
117}
118
119impl RegexCompilable for RuleAllowlist {
120 fn compile_regexes(&mut self) -> CompilationErrors {
121 let mut errors = CompilationErrors::default();
122 self.compiled_regexes.clear();
123
124 if let Some(regexes) = &self.regexes {
125 for pattern in regexes {
126 match Regex::new(pattern) {
127 Ok(regex) => self.compiled_regexes.push(regex),
128 Err(e) => errors.add_warning(format!(
129 "Failed to compile rule allowlist regex '{}': {}",
130 pattern, e
131 )),
132 }
133 }
134 }
135
136 errors
137 }
138}
139
140impl RegexCompilable for Rule {
141 fn compile_regexes(&mut self) -> CompilationErrors {
142 let mut errors = CompilationErrors::default();
143
144 if let Some(regex_pattern) = &self.regex {
146 match Regex::new(regex_pattern) {
147 Ok(regex) => self.compiled_regex = Some(regex),
148 Err(e) => {
149 match self.id.as_str() {
151 "generic-api-key" | "pypi-upload-token" | "vault-batch-token" => {
152 match create_simple_api_key_regex() {
153 Ok(simple_regex) => {
154 self.compiled_regex = Some(simple_regex);
155 errors.add_warning(format!(
156 "Used fallback regex for rule '{}' due to: {}",
157 self.id, e
158 ));
159 }
160 Err(fallback_err) => {
161 errors.add_regex_error(
162 self.id.clone(),
163 format!(
164 "Failed to compile regex and fallback: {} / {}",
165 e, fallback_err
166 ),
167 );
168 }
169 }
170 }
171 _ => {
172 errors.add_regex_error(self.id.clone(), e.to_string());
173 }
174 }
175 }
176 }
177 } else {
178 self.compiled_regex = None;
181 }
182
183 if let Some(allowlists) = &mut self.allowlists {
185 for allowlist in allowlists {
186 let allowlist_errors = allowlist.compile_regexes();
187 errors.warnings.extend(allowlist_errors.warnings);
188 errors.regex_errors.extend(allowlist_errors.regex_errors);
189 }
190 }
191
192 errors
193 }
194}
195
196impl RegexCompilable for GitleaksConfig {
197 fn compile_regexes(&mut self) -> CompilationErrors {
198 let mut errors = CompilationErrors::default();
199
200 if let Some(allowlist) = &mut self.allowlist {
202 let allowlist_errors = allowlist.compile_regexes();
203 errors.warnings.extend(allowlist_errors.warnings);
204 errors.regex_errors.extend(allowlist_errors.regex_errors);
205 }
206
207 let mut compiled_rules = Vec::new();
209 for mut rule in self.rules.drain(..) {
210 let rule_errors = rule.compile_regexes();
211 errors.warnings.extend(rule_errors.warnings);
212 errors.regex_errors.extend(rule_errors.regex_errors);
213
214 if rule.compiled_regex.is_some() || rule.regex.is_none() {
216 compiled_rules.push(rule);
217 }
218 }
219 self.rules = compiled_rules;
220
221 errors
222 }
223}
224
225pub static GITLEAKS_CONFIG: LazyLock<GitleaksConfig> =
227 LazyLock::new(|| create_gitleaks_config(false));
228
229pub static GITLEAKS_CONFIG_WITH_PRIVACY: LazyLock<GitleaksConfig> =
231 LazyLock::new(|| create_gitleaks_config(true));
232
233fn create_gitleaks_config(include_privacy_rules: bool) -> GitleaksConfig {
235 let config_str = include_str!("gitleaks.toml");
237 let mut config: GitleaksConfig =
238 toml::from_str(config_str).expect("Failed to parse gitleaks.toml");
239
240 let additional_config_str = include_str!("additional_rules.toml");
242 let additional_config: GitleaksConfig =
243 toml::from_str(additional_config_str).expect("Failed to parse additional_rules.toml");
244
245 config.rules.extend(additional_config.rules);
247
248 if let Some(additional_allowlist) = additional_config.allowlist {
250 merge_allowlist(&mut config.allowlist, additional_allowlist);
251 }
252
253 if include_privacy_rules {
255 let privacy_config_str = include_str!("privacy_rules.toml");
256 let privacy_config: GitleaksConfig =
257 toml::from_str(privacy_config_str).expect("Failed to parse privacy_rules.toml");
258
259 config.rules.extend(privacy_config.rules);
261
262 if let Some(privacy_allowlist) = privacy_config.allowlist {
264 merge_allowlist(&mut config.allowlist, privacy_allowlist);
265 }
266 }
267
268 let compilation_errors = config.compile_regexes();
269 if !compilation_errors.regex_errors.is_empty() {
270 const ERROR_LOG_FILE: &str = ".stakpak_mcp_secret_detection_errors";
271 if let Ok(json) = serde_json::to_string(&compilation_errors)
273 && let Err(e) = std::fs::write(ERROR_LOG_FILE, json)
274 {
275 eprintln!("Failed to write errors to log file: {}", e);
276 }
277 }
278 config
279}
280
281fn merge_allowlist(target: &mut Option<Allowlist>, source: Allowlist) {
283 match target {
284 Some(existing_allowlist) => {
285 if let Some(additional_regexes) = source.regexes {
287 match &mut existing_allowlist.regexes {
288 Some(existing_regexes) => existing_regexes.extend(additional_regexes),
289 None => existing_allowlist.regexes = Some(additional_regexes),
290 }
291 }
292
293 if let Some(additional_stopwords) = source.stopwords {
295 match &mut existing_allowlist.stopwords {
296 Some(existing_stopwords) => existing_stopwords.extend(additional_stopwords),
297 None => existing_allowlist.stopwords = Some(additional_stopwords),
298 }
299 }
300 }
301 None => *target = Some(source),
302 }
303}
304
305pub fn create_simple_api_key_regex() -> Result<Regex, regex::Error> {
307 let pattern = r#"(?i)[\w.-]{0,30}?(?:access|auth|api|credential|creds|key|password|passwd|secret|token)[\w.-]{0,15}[\s'"]{0,3}(?:=|>|:{1,2}=|\|\||:|=>|\?=|,)[\s'"=]{0,3}([\w.=-]{10,80}|[a-z0-9][a-z0-9+/]{11,}={0,2})(?:[\s'";]|$)"#;
318 Regex::new(pattern)
319}
320
321pub fn calculate_entropy(text: &str) -> f64 {
326 if text.is_empty() {
327 return 0.0;
328 }
329
330 let mut char_counts = std::collections::HashMap::new();
331 let total_chars = text.len() as f64;
332
333 for ch in text.chars() {
335 *char_counts.entry(ch).or_insert(0u32) += 1;
336 }
337
338 let mut entropy = 0.0;
340 for &count in char_counts.values() {
341 let probability = count as f64 / total_chars;
342 if probability > 0.0 {
343 entropy -= probability * probability.log2();
344 }
345 }
346
347 entropy
348}
349
350pub fn detect_secrets(input: &str, path: Option<&str>, privacy_mode: bool) -> Vec<DetectedSecret> {
360 let mut detected_secrets = Vec::new();
361 let config = if privacy_mode {
362 &*GITLEAKS_CONFIG_WITH_PRIVACY
363 } else {
364 &*GITLEAKS_CONFIG
365 };
366
367 for rule in &config.rules {
369 let regex = match &rule.compiled_regex {
371 Some(regex) => regex,
372 None => continue,
373 };
374
375 if !rule.keywords.is_empty() && !contains_any_keyword(input, &rule.keywords) {
377 continue;
378 }
379
380 for mat in regex.find_iter(input) {
382 let match_text = mat.as_str();
383 let start_pos = mat.start();
384 let end_pos = mat.end();
385
386 if should_allow_match(
388 input,
389 path,
390 match_text,
391 start_pos,
392 end_pos,
393 rule,
394 &config.allowlist,
395 ) {
396 continue;
397 }
398
399 let (secret_value, secret_start, secret_end) =
401 if let Some(captures) = regex.captures_at(input, start_pos) {
402 if let Some(capture) = captures.get(1) {
404 (capture.as_str().to_string(), capture.start(), capture.end())
406 } else {
407 (match_text.to_string(), start_pos, end_pos)
408 }
409 } else {
410 (match_text.to_string(), start_pos, end_pos)
411 };
412
413 if let Some(entropy_threshold) = rule.entropy {
415 let calculated_entropy = calculate_entropy(&secret_value);
416 if calculated_entropy < entropy_threshold {
417 continue;
418 }
419 }
420
421 detected_secrets.push(DetectedSecret {
422 rule_id: rule.id.clone(),
423 value: secret_value,
424 start_pos: secret_start,
425 end_pos: secret_end,
426 });
427 }
428 }
429
430 detected_secrets
431}
432
433pub fn should_allow_match(
435 input: &str,
436 path: Option<&str>,
437 match_text: &str,
438 start_pos: usize,
439 end_pos: usize,
440 rule: &Rule,
441 global_allowlist: &Option<Allowlist>,
442) -> bool {
443 if let Some(global) = global_allowlist
445 && is_allowed_by_allowlist(input, match_text, start_pos, end_pos, global)
446 {
447 return true;
448 }
449
450 if let Some(rule_allowlists) = &rule.allowlists {
452 for allowlist in rule_allowlists {
453 if is_allowed_by_rule_allowlist(input, path, match_text, start_pos, end_pos, allowlist)
454 {
455 return true;
456 }
457 }
458 }
459
460 false
461}
462
463fn is_allowed_by_allowlist(
464 _input: &str,
465 match_text: &str,
466 _start_pos: usize,
467 _end_pos: usize,
468 allowlist: &Allowlist,
469) -> bool {
470 for regex in &allowlist.compiled_regexes {
472 if regex.is_match(match_text) {
473 return true;
474 }
475 }
476
477 if let Some(stopwords) = &allowlist.stopwords {
479 for stopword in stopwords {
480 if match_text.to_lowercase().contains(&stopword.to_lowercase()) {
481 return true;
482 }
483 }
484 }
485
486 false
487}
488
489pub fn is_allowed_by_rule_allowlist(
490 input: &str,
491 path: Option<&str>,
492 match_text: &str,
493 start_pos: usize,
494 end_pos: usize,
495 allowlist: &RuleAllowlist,
496) -> bool {
497 let mut checks = Vec::new();
498
499 let target_text = match allowlist.regex_target.as_deref() {
501 Some("match") => match_text,
502 Some("line") => {
503 let line_start = input[..start_pos].rfind('\n').map(|i| i + 1).unwrap_or(0);
505 let line_end = input[end_pos..]
506 .find('\n')
507 .map(|i| end_pos + i)
508 .unwrap_or(input.len());
509 &input[line_start..line_end]
510 }
511 _ => match_text, };
513
514 if !allowlist.compiled_regexes.is_empty() {
516 let regex_matches = allowlist
517 .compiled_regexes
518 .iter()
519 .any(|regex| regex.is_match(target_text));
520 checks.push(regex_matches);
521 }
522
523 if let Some(stopwords) = &allowlist.stopwords {
525 let stopword_matches = stopwords.iter().any(|stopword| {
526 if let Some(equals_pos) = target_text.find('=') {
528 let value = &target_text[equals_pos + 1..];
529
530 let value_lower = value.to_lowercase();
533 let stopword_lower = stopword.to_lowercase();
534
535 if value_lower == stopword_lower {
541 true } else if value.len() < 15 && value_lower.contains(&stopword_lower) {
543 let without_stopword = value_lower.replace(&stopword_lower, "");
545 without_stopword
547 .chars()
548 .all(|c| c.is_ascii_digit() || "!@#$%^&*()_+-=[]{}|;:,.<>?".contains(c))
549 } else {
550 false }
552 } else {
553 let obvious_false_positives = ["example", "test", "demo", "sample", "placeholder"];
556 if obvious_false_positives.contains(&stopword.as_str()) {
557 target_text
558 .to_lowercase()
559 .contains(&stopword.to_lowercase())
560 } else {
561 false
562 }
563 }
564 });
565 checks.push(stopword_matches);
566 }
567
568 if let Some(paths) = &allowlist.paths
570 && let Some(path) = path
571 {
572 checks.push(paths.iter().any(|p| path.contains(p)));
573 }
574
575 if checks.is_empty() {
577 return false;
578 }
579
580 match allowlist.condition.as_deref() {
582 Some("AND") => checks.iter().all(|&check| check),
583 _ => checks.iter().any(|&check| check), }
585}
586
587pub fn contains_any_keyword(input: &str, keywords: &[String]) -> bool {
589 let input_lower = input.to_lowercase();
590 keywords
591 .iter()
592 .any(|keyword| input_lower.contains(&keyword.to_lowercase()))
593}
594
595pub fn initialize_gitleaks_config(privacy_mode: bool) -> usize {
604 let config = if privacy_mode {
606 &*GITLEAKS_CONFIG_WITH_PRIVACY
607 } else {
608 &*GITLEAKS_CONFIG
609 };
610 config.rules.len()
611}
612
613#[cfg(test)]
614mod tests {
615 use super::*;
616
617 #[test]
618 fn test_entropy_calculation() {
619 let high_entropy = calculate_entropy("Kx9mP2nQ8rT4vW7yZ3cF6hJ1lN5sA");
621
622 let low_entropy = calculate_entropy("aaaaaaaaaa");
624
625 let zero_entropy = calculate_entropy("");
627
628 assert!(high_entropy > low_entropy);
629 assert_eq!(zero_entropy, 0.0);
630
631 println!("High entropy: {:.2}", high_entropy);
632 println!("Low entropy: {:.2}", low_entropy);
633 println!("Zero entropy: {:.2}", zero_entropy);
634 }
635
636 #[test]
637 fn test_additional_rules_loaded() {
638 let config = &*GITLEAKS_CONFIG;
639
640 let anthropic_rule = config.rules.iter().find(|r| r.id == "anthropic-api-key");
642 assert!(
643 anthropic_rule.is_some(),
644 "Anthropic API key rule should be loaded from additional_rules.toml"
645 );
646
647 if let Some(rule) = anthropic_rule {
648 assert!(rule.keywords.contains(&"anthropic".to_string()));
649 assert!(
650 rule.compiled_regex.is_some(),
651 "Anthropic rule regex should be compiled"
652 );
653 }
654
655 println!("Total rules loaded: {}", config.rules.len());
656 }
657
658 #[test]
659 fn test_anthropic_api_key_detection() {
660 let test_input =
662 "ANTHROPIC_API_KEY=sk-ant-api03-Kx9mP2nQ8rT4vW7yZ3cF6hJ1lN5sA9bD2eG5kM8pR1tX4zB7";
663 let secrets = detect_secrets(test_input, None, false);
664
665 let anthropic_secret = secrets.iter().find(|s| s.rule_id == "anthropic-api-key");
667 assert!(
668 anthropic_secret.is_some(),
669 "Should detect Anthropic API key"
670 );
671
672 if let Some(secret) = anthropic_secret {
673 assert!(secret.value.starts_with("sk-ant-api03-"));
674 }
675 }
676
677 #[test]
678 fn test_privacy_mode_aws_account_id() {
679 let test_input = "AWS_ACCOUNT_ID=987654321098";
680
681 let secrets = detect_secrets(test_input, None, false);
683 assert!(!secrets.iter().any(|s| s.rule_id == "aws-account-id"));
684
685 let secrets_privacy = detect_secrets(test_input, None, true);
687 let aws_secret = secrets_privacy
688 .iter()
689 .find(|s| s.rule_id == "aws-account-id");
690 assert!(
691 aws_secret.is_some(),
692 "Should detect AWS account ID in privacy mode"
693 );
694
695 if let Some(secret) = aws_secret {
696 assert_eq!(secret.value, "987654321098");
697 }
698 }
699
700 #[test]
701 fn test_privacy_mode_public_ip() {
702 let test_input = "SERVER_IP=203.0.113.195";
703
704 let secrets = detect_secrets(test_input, None, false);
706 assert!(!secrets.iter().any(|s| s.rule_id == "public-ipv4"));
707
708 let secrets_privacy = detect_secrets(test_input, None, true);
710 let ip_secret = secrets_privacy.iter().find(|s| s.rule_id == "public-ipv4");
711 assert!(
712 ip_secret.is_some(),
713 "Should detect public IP in privacy mode"
714 );
715
716 if let Some(secret) = ip_secret {
717 assert_eq!(secret.value, "203.0.113.195");
718 }
719 }
720
721 #[test]
722 fn test_privacy_mode_private_ip_excluded() {
723 let test_input = "LOCAL_IP=192.168.1.1";
724
725 let secrets_privacy = detect_secrets(test_input, None, true);
727 assert!(!secrets_privacy.iter().any(|s| s.rule_id == "public-ipv4"));
728 }
729
730 #[test]
731 fn test_privacy_mode_aws_arn() {
732 let test_input = "ARN=arn:aws:s3:::my-bucket/object";
733
734 let secrets = detect_secrets(test_input, None, false);
736 assert!(!secrets.iter().any(|s| s.rule_id == "aws-account-id"));
737
738 let secrets_privacy = detect_secrets(test_input, None, true);
740 assert!(
742 !secrets_privacy
743 .iter()
744 .any(|s| s.rule_id == "aws-account-id")
745 );
746
747 let test_input_with_account = "ARN=arn:aws:iam::987654321098:role/MyRole";
749 let secrets_with_account = detect_secrets(test_input_with_account, None, true);
750 let aws_secret = secrets_with_account
751 .iter()
752 .find(|s| s.rule_id == "aws-account-id");
753 assert!(
754 aws_secret.is_some(),
755 "Should detect AWS account ID in ARN in privacy mode"
756 );
757
758 if let Some(secret) = aws_secret {
759 assert_eq!(secret.value, "987654321098");
760 }
761 }
762
763 #[test]
764 fn test_privacy_mode_initialization() {
765 let regular_count = initialize_gitleaks_config(false);
767 let privacy_count = initialize_gitleaks_config(true);
768
769 assert!(
771 privacy_count > regular_count,
772 "Privacy mode should have more rules than regular mode"
773 );
774 }
775
776 #[test]
777 fn test_debug_privacy_mode_aws() {
778 let test_input = "AWS_ACCOUNT_ID=987654321098"; let secrets_privacy = detect_secrets(test_input, None, true);
782 println!("Privacy mode detected {} secrets", secrets_privacy.len());
783 for secret in &secrets_privacy {
784 println!(
785 " Rule: {}, Value: '{}', Pos: {}-{}",
786 secret.rule_id, secret.value, secret.start_pos, secret.end_pos
787 );
788 }
789
790 let secrets_regular = detect_secrets(test_input, None, false);
792 println!("Regular mode detected {} secrets", secrets_regular.len());
793 for secret in &secrets_regular {
794 println!(
795 " Rule: {}, Value: '{}', Pos: {}-{}",
796 secret.rule_id, secret.value, secret.start_pos, secret.end_pos
797 );
798 }
799
800 let config_with_privacy = &*GITLEAKS_CONFIG_WITH_PRIVACY;
802 let aws_rule = config_with_privacy
803 .rules
804 .iter()
805 .find(|r| r.id == "aws-account-id");
806 println!("AWS rule found: {}", aws_rule.is_some());
807 if let Some(rule) = aws_rule {
808 println!("AWS rule keywords: {:?}", rule.keywords);
809 if let Some(regex) = &rule.compiled_regex {
810 println!("AWS rule regex compiled: yes");
811 let test_matches: Vec<_> = regex.find_iter(test_input).collect();
812 println!("Direct regex matches: {}", test_matches.len());
813 for mat in test_matches {
814 println!(" Match: '{}'", mat.as_str());
815 }
816
817 let contains_keywords = contains_any_keyword(test_input, &rule.keywords);
819 println!("Contains keywords: {}", contains_keywords);
820
821 if let Some(captures) = regex.captures(test_input) {
823 println!("Capture groups found: {}", captures.len());
824 for (i, cap) in captures.iter().enumerate() {
825 if let Some(cap) = cap {
826 println!(" Capture {}: '{}'", i, cap.as_str());
827 }
828 }
829 } else {
830 println!("No capture groups found");
831 }
832
833 for mat in regex.find_iter(test_input) {
835 if let Some(captures) = regex.captures_at(test_input, mat.start())
836 && let Some(capture) = captures.get(1)
837 {
838 let entropy = calculate_entropy(capture.as_str());
839 println!(
840 " Entropy of first capture '{}': {:.2} (threshold: {:?})",
841 capture.as_str(),
842 entropy,
843 rule.entropy
844 );
845 }
846 }
847 } else {
848 println!("AWS rule regex compiled: no");
849 }
850 }
851 }
852
853 #[test]
854 fn test_debug_privacy_mode_ip() {
855 let test_input = "SERVER_IP=8.8.8.8";
856
857 let secrets_privacy = detect_secrets(test_input, None, true);
859 println!("Privacy mode detected {} secrets", secrets_privacy.len());
860 for secret in &secrets_privacy {
861 println!(
862 " Rule: {}, Value: '{}', Pos: {}-{}",
863 secret.rule_id, secret.value, secret.start_pos, secret.end_pos
864 );
865 }
866
867 let config_with_privacy = &*GITLEAKS_CONFIG_WITH_PRIVACY;
869 let ip_rule = config_with_privacy
870 .rules
871 .iter()
872 .find(|r| r.id == "public-ipv4");
873 println!("IP rule found: {}", ip_rule.is_some());
874 if let Some(rule) = ip_rule {
875 println!("IP rule keywords: {:?}", rule.keywords);
876 if let Some(regex) = &rule.compiled_regex {
877 println!("IP rule regex compiled: yes");
878 let test_matches: Vec<_> = regex.find_iter(test_input).collect();
879 println!("Direct regex matches: {}", test_matches.len());
880 for mat in test_matches {
881 println!(" Match: '{}'", mat.as_str());
882 }
883
884 let contains_keywords = contains_any_keyword(test_input, &rule.keywords);
886 println!("Contains keywords: {}", contains_keywords);
887
888 if let Some(captures) = regex.captures(test_input) {
890 println!("Capture groups found: {}", captures.len());
891 for (i, cap) in captures.iter().enumerate() {
892 if let Some(cap) = cap {
893 println!(" Capture {}: '{}'", i, cap.as_str());
894 }
895 }
896 } else {
897 println!("No capture groups found");
898 }
899 } else {
900 println!("IP rule regex compiled: no");
901 }
902 }
903 }
904
905 #[test]
906 fn test_comprehensive_ip_detection() {
907 println!("=== COMPREHENSIVE IP DETECTION TEST ===");
908
909 let test_cases = vec![
910 ("16.170.172.114", true),
912 ("8.8.8.8", true),
913 ("1.1.1.1", true),
914 ("203.0.113.195", true),
915 ("13.107.42.14", true),
916 ("192.168.1.1", false),
918 ("10.0.0.1", false),
919 ("172.16.0.1", false),
920 ("127.0.0.1", false),
921 ("169.254.1.1", false),
922 ("0.0.0.0", false),
923 ("255.255.255.255", false),
924 ];
925
926 for (ip, should_detect) in test_cases {
927 let secrets = detect_secrets(ip, None, true);
928 let detected = secrets.iter().any(|s| s.rule_id == "public-ipv4");
929
930 println!(
931 "IP: {} | Should detect: {} | Detected: {}",
932 ip, should_detect, detected
933 );
934
935 if should_detect {
936 assert!(detected, "Should detect public IP: {}", ip);
937 } else {
938 assert!(!detected, "Should NOT detect private IP: {}", ip);
939 }
940 }
941
942 let context_tests = vec![
944 "IP address: 16.170.172.114",
945 "Connect to 16.170.172.114",
946 "16.170.172.114:8080",
947 "ping 16.170.172.114",
948 "https://16.170.172.114/api",
949 ];
950
951 for context in context_tests {
952 let secrets = detect_secrets(context, None, true);
953 let detected = secrets.iter().any(|s| s.rule_id == "public-ipv4");
954 println!("Context: '{}' | Detected: {}", context, detected);
955 assert!(detected, "Should detect IP in context: {}", context);
956 }
957 }
958
959 #[test]
960 fn test_standalone_ip_detection() {
961 println!("=== TESTING STANDALONE IP DETECTION ===");
962
963 let standalone_ip = "16.170.172.114";
965 let secrets = detect_secrets(standalone_ip, None, true);
966
967 println!(
968 "Standalone IP '{}' detected {} secrets",
969 standalone_ip,
970 secrets.len()
971 );
972 for secret in &secrets {
973 println!(" Rule: {}, Value: '{}'", secret.rule_id, secret.value);
974 }
975
976 let ip_with_context = "SERVER_IP=16.170.172.114";
978 let secrets_with_context = detect_secrets(ip_with_context, None, true);
979
980 println!(
981 "IP with context '{}' detected {} secrets",
982 ip_with_context,
983 secrets_with_context.len()
984 );
985 for secret in &secrets_with_context {
986 println!(" Rule: {}, Value: '{}'", secret.rule_id, secret.value);
987 }
988
989 let config = &*GITLEAKS_CONFIG_WITH_PRIVACY;
991 let ip_rule = config.rules.iter().find(|r| r.id == "public-ipv4");
992 if let Some(rule) = ip_rule {
993 println!("IP rule keywords: {:?}", rule.keywords);
994 println!(
995 "Standalone IP contains keywords: {}",
996 contains_any_keyword(standalone_ip, &rule.keywords)
997 );
998 println!(
999 "IP with context contains keywords: {}",
1000 contains_any_keyword(ip_with_context, &rule.keywords)
1001 );
1002 }
1003 }
1004
1005 #[test]
1006 fn test_user_provided_json_snippet() {
1007 println!("=== TESTING USER PROVIDED JSON SNIPPET ===");
1008
1009 let json_snippet = r#"{
1010 "UserId": "AIDAX5UI4H55WM6GS6NIJ",
1011 "Account": "544388841223",
1012 "Arn": "arn:aws:iam::544388841223:user/terraform-mac"
1013}"#;
1014
1015 let secrets = detect_secrets(json_snippet, None, true);
1016 let aws_secrets: Vec<_> = secrets
1017 .iter()
1018 .filter(|s| s.rule_id == "aws-account-id")
1019 .collect();
1020
1021 println!("Detected {} AWS account ID secrets", aws_secrets.len());
1022 for secret in &aws_secrets {
1023 println!(
1024 " Value: '{}' at position {}-{}",
1025 secret.value, secret.start_pos, secret.end_pos
1026 );
1027 }
1028
1029 assert!(
1031 !aws_secrets.is_empty(),
1032 "Should detect at least one AWS account ID"
1033 );
1034 assert!(
1035 aws_secrets.iter().any(|s| s.value == "544388841223"),
1036 "Should detect account ID 544388841223"
1037 );
1038
1039 println!("✅ JSON snippet test passed - Account field is now detected");
1041 }
1042
1043 #[test]
1044 fn test_aws_account_id_json_field() {
1045 println!("=== TESTING AWS ACCOUNT ID JSON FIELD DETECTION ===");
1046
1047 let test_cases = vec![
1048 r#""Account": "544388841223""#,
1050 r#""AccountId": "544388841223""#,
1051 r#""account": "544388841223""#,
1052 r#""accountId": "544388841223""#,
1053 "AWS_ACCOUNT_ID=544388841223",
1055 "account.id=544388841223",
1056 "account_id: 544388841223",
1057 "arn:aws:iam::544388841223:user/test",
1058 "544388841223 arn:aws:iam::544388841223:user/terraform-mac AIDAX5UI4H55WM6GS6NIJ",
1059 ];
1060
1061 for test_case in test_cases {
1062 let secrets = detect_secrets(test_case, None, true);
1063 let detected = secrets.iter().any(|s| s.rule_id == "aws-account-id");
1064
1065 println!("Test case: '{}' | Detected: {}", test_case, detected);
1066 assert!(detected, "Should detect AWS account ID in: {}", test_case);
1067
1068 if let Some(secret) = secrets.iter().find(|s| s.rule_id == "aws-account-id") {
1070 assert_eq!(secret.value, "544388841223");
1071 println!(" -> Detected value: '{}'", secret.value);
1072 }
1073 }
1074 }
1075}