1use regex::Regex;
3use serde::{Deserialize, Serialize};
4use std::sync::LazyLock;
5
6#[derive(Debug, Deserialize, Clone)]
7pub struct GitleaksConfig {
8 #[allow(dead_code)]
9 pub title: Option<String>,
10 pub allowlist: Option<Allowlist>,
11 pub rules: Vec<Rule>,
12}
13
14#[derive(Debug, Deserialize, Clone)]
15pub struct Allowlist {
16 #[allow(dead_code)]
17 pub description: Option<String>,
18 #[allow(dead_code)]
19 pub paths: Option<Vec<String>>,
20 pub regexes: Option<Vec<String>>,
21 pub stopwords: Option<Vec<String>>,
22 #[serde(skip)]
24 pub compiled_regexes: Vec<Regex>,
25}
26
27#[derive(Debug, Deserialize, Clone)]
28pub struct Rule {
29 pub id: String,
30 #[allow(dead_code)]
31 pub description: String,
32 pub regex: Option<String>,
33 pub entropy: Option<f64>,
34 #[serde(default)]
35 pub keywords: Vec<String>,
36 #[allow(dead_code)]
37 pub path: Option<String>,
38 pub allowlists: Option<Vec<RuleAllowlist>>,
39 #[serde(skip)]
41 pub compiled_regex: Option<Regex>,
42}
43
44#[derive(Debug, Deserialize, Clone)]
45pub struct RuleAllowlist {
46 #[allow(dead_code)]
47 pub description: Option<String>,
48 pub condition: Option<String>, pub paths: Option<Vec<String>>,
50 pub regexes: Option<Vec<String>>,
51 pub stopwords: Option<Vec<String>>,
52 #[serde(rename = "regexTarget")]
53 pub regex_target: Option<String>, #[serde(skip)]
56 pub compiled_regexes: Vec<Regex>,
57}
58
59#[derive(Debug, Clone)]
61pub struct DetectedSecret {
62 pub rule_id: String,
64 pub value: String,
66 pub start_pos: usize,
68 pub end_pos: usize,
70}
71
72#[derive(Debug, Default, Serialize)]
73pub struct CompilationErrors {
74 pub regex_errors: Vec<(String, String)>, pub warnings: Vec<String>,
76}
77
78impl CompilationErrors {
79 pub fn add_regex_error(&mut self, rule_id: String, error: String) {
80 self.regex_errors.push((rule_id, error));
81 }
82
83 pub fn add_warning(&mut self, warning: String) {
84 self.warnings.push(warning);
85 }
86
87 #[allow(dead_code)]
88 pub fn is_empty(&self) -> bool {
89 self.regex_errors.is_empty() && self.warnings.is_empty()
90 }
91}
92
93pub trait RegexCompilable {
95 fn compile_regexes(&mut self) -> CompilationErrors;
96}
97
98impl RegexCompilable for Allowlist {
99 fn compile_regexes(&mut self) -> CompilationErrors {
100 let mut errors = CompilationErrors::default();
101 self.compiled_regexes.clear();
102
103 if let Some(regexes) = &self.regexes {
104 for pattern in regexes {
105 match Regex::new(pattern) {
106 Ok(regex) => self.compiled_regexes.push(regex),
107 Err(e) => errors.add_warning(format!(
108 "Failed to compile allowlist regex '{}': {}",
109 pattern, e
110 )),
111 }
112 }
113 }
114
115 errors
116 }
117}
118
119impl RegexCompilable for RuleAllowlist {
120 fn compile_regexes(&mut self) -> CompilationErrors {
121 let mut errors = CompilationErrors::default();
122 self.compiled_regexes.clear();
123
124 if let Some(regexes) = &self.regexes {
125 for pattern in regexes {
126 match Regex::new(pattern) {
127 Ok(regex) => self.compiled_regexes.push(regex),
128 Err(e) => errors.add_warning(format!(
129 "Failed to compile rule allowlist regex '{}': {}",
130 pattern, e
131 )),
132 }
133 }
134 }
135
136 errors
137 }
138}
139
140impl RegexCompilable for Rule {
141 fn compile_regexes(&mut self) -> CompilationErrors {
142 let mut errors = CompilationErrors::default();
143
144 if let Some(regex_pattern) = &self.regex {
146 match Regex::new(regex_pattern) {
147 Ok(regex) => self.compiled_regex = Some(regex),
148 Err(e) => {
149 match self.id.as_str() {
151 "generic-api-key" | "pypi-upload-token" | "vault-batch-token" => {
152 match create_simple_api_key_regex() {
153 Ok(simple_regex) => {
154 self.compiled_regex = Some(simple_regex);
155 errors.add_warning(format!(
156 "Used fallback regex for rule '{}' due to: {}",
157 self.id, e
158 ));
159 }
160 Err(fallback_err) => {
161 errors.add_regex_error(
162 self.id.clone(),
163 format!(
164 "Failed to compile regex and fallback: {} / {}",
165 e, fallback_err
166 ),
167 );
168 }
169 }
170 }
171 _ => {
172 errors.add_regex_error(self.id.clone(), e.to_string());
173 }
174 }
175 }
176 }
177 } else {
178 self.compiled_regex = None;
181 }
182
183 if let Some(allowlists) = &mut self.allowlists {
185 for allowlist in allowlists {
186 let allowlist_errors = allowlist.compile_regexes();
187 errors.warnings.extend(allowlist_errors.warnings);
188 errors.regex_errors.extend(allowlist_errors.regex_errors);
189 }
190 }
191
192 errors
193 }
194}
195
196impl RegexCompilable for GitleaksConfig {
197 fn compile_regexes(&mut self) -> CompilationErrors {
198 let mut errors = CompilationErrors::default();
199
200 if let Some(allowlist) = &mut self.allowlist {
202 let allowlist_errors = allowlist.compile_regexes();
203 errors.warnings.extend(allowlist_errors.warnings);
204 errors.regex_errors.extend(allowlist_errors.regex_errors);
205 }
206
207 let mut compiled_rules = Vec::new();
209 for mut rule in self.rules.drain(..) {
210 let rule_errors = rule.compile_regexes();
211 errors.warnings.extend(rule_errors.warnings);
212 errors.regex_errors.extend(rule_errors.regex_errors);
213
214 if rule.compiled_regex.is_some() || rule.regex.is_none() {
216 compiled_rules.push(rule);
217 }
218 }
219 self.rules = compiled_rules;
220
221 errors
222 }
223}
224
225pub static GITLEAKS_CONFIG: LazyLock<GitleaksConfig> =
227 LazyLock::new(|| create_gitleaks_config(false));
228
229pub static GITLEAKS_CONFIG_WITH_PRIVACY: LazyLock<GitleaksConfig> =
231 LazyLock::new(|| create_gitleaks_config(true));
232
233fn create_gitleaks_config(include_privacy_rules: bool) -> GitleaksConfig {
235 let config_str = include_str!("gitleaks.toml");
237 let mut config: GitleaksConfig =
238 toml::from_str(config_str).expect("Failed to parse gitleaks.toml");
239
240 let additional_config_str = include_str!("additional_rules.toml");
242 let additional_config: GitleaksConfig =
243 toml::from_str(additional_config_str).expect("Failed to parse additional_rules.toml");
244
245 config.rules.extend(additional_config.rules);
247
248 if let Some(additional_allowlist) = additional_config.allowlist {
250 merge_allowlist(&mut config.allowlist, additional_allowlist);
251 }
252
253 if include_privacy_rules {
255 let privacy_config_str = include_str!("privacy_rules.toml");
256 let privacy_config: GitleaksConfig =
257 toml::from_str(privacy_config_str).expect("Failed to parse privacy_rules.toml");
258
259 config.rules.extend(privacy_config.rules);
261
262 if let Some(privacy_allowlist) = privacy_config.allowlist {
264 merge_allowlist(&mut config.allowlist, privacy_allowlist);
265 }
266 }
267
268 let compilation_errors = config.compile_regexes();
269 if !compilation_errors.regex_errors.is_empty() {
270 const ERROR_LOG_FILE: &str = ".stakpak_mcp_secret_detection_errors";
271 if let Ok(json) = serde_json::to_string(&compilation_errors)
273 && let Err(e) = std::fs::write(ERROR_LOG_FILE, json)
274 {
275 eprintln!("Failed to write errors to log file: {}", e);
276 }
277 }
278 config
279}
280
281fn merge_allowlist(target: &mut Option<Allowlist>, source: Allowlist) {
283 match target {
284 Some(existing_allowlist) => {
285 if let Some(additional_regexes) = source.regexes {
287 match &mut existing_allowlist.regexes {
288 Some(existing_regexes) => existing_regexes.extend(additional_regexes),
289 None => existing_allowlist.regexes = Some(additional_regexes),
290 }
291 }
292
293 if let Some(additional_stopwords) = source.stopwords {
295 match &mut existing_allowlist.stopwords {
296 Some(existing_stopwords) => existing_stopwords.extend(additional_stopwords),
297 None => existing_allowlist.stopwords = Some(additional_stopwords),
298 }
299 }
300 }
301 None => *target = Some(source),
302 }
303}
304
305pub fn create_simple_api_key_regex() -> Result<Regex, regex::Error> {
307 let pattern = r#"(?i)[\w.-]{0,30}?(?:access|auth|api|credential|creds|key|password|passwd|secret|token)[\w.-]{0,15}[\s'"]{0,3}(?:=|>|:{1,2}=|\|\||:|=>|\?=|,)[\s'"=]{0,3}([\w.=-]{10,80}|[a-z0-9][a-z0-9+/]{11,}={0,2})(?:[\s'";]|$)"#;
318 Regex::new(pattern)
319}
320
321pub fn calculate_entropy(text: &str) -> f64 {
326 if text.is_empty() {
327 return 0.0;
328 }
329
330 let mut char_counts = std::collections::HashMap::new();
331 let total_chars = text.len() as f64;
332
333 for ch in text.chars() {
335 *char_counts.entry(ch).or_insert(0u32) += 1;
336 }
337
338 let mut entropy = 0.0;
340 for &count in char_counts.values() {
341 let probability = count as f64 / total_chars;
342 if probability > 0.0 {
343 entropy -= probability * probability.log2();
344 }
345 }
346
347 entropy
348}
349
350pub fn detect_secrets(input: &str, path: Option<&str>, privacy_mode: bool) -> Vec<DetectedSecret> {
360 let mut detected_secrets = Vec::new();
361 let config = if privacy_mode {
362 &*GITLEAKS_CONFIG_WITH_PRIVACY
363 } else {
364 &*GITLEAKS_CONFIG
365 };
366
367 for rule in &config.rules {
369 let regex = match &rule.compiled_regex {
371 Some(regex) => regex,
372 None => continue,
373 };
374
375 if !rule.keywords.is_empty() && !contains_any_keyword(input, &rule.keywords) {
377 continue;
378 }
379
380 for mat in regex.find_iter(input) {
382 let match_text = mat.as_str();
383 let start_pos = mat.start();
384 let end_pos = mat.end();
385
386 if should_allow_match(
388 input,
389 path,
390 match_text,
391 start_pos,
392 end_pos,
393 rule,
394 &config.allowlist,
395 ) {
396 continue;
397 }
398
399 let (secret_value, secret_start, secret_end) =
401 if let Some(captures) = regex.captures_at(input, start_pos) {
402 if let Some(capture) = captures.get(1) {
404 (capture.as_str().to_string(), capture.start(), capture.end())
406 } else {
407 (match_text.to_string(), start_pos, end_pos)
408 }
409 } else {
410 (match_text.to_string(), start_pos, end_pos)
411 };
412
413 if let Some(entropy_threshold) = rule.entropy {
415 let calculated_entropy = calculate_entropy(&secret_value);
416 if calculated_entropy < entropy_threshold {
417 continue;
418 }
419 }
420
421 detected_secrets.push(DetectedSecret {
422 rule_id: rule.id.clone(),
423 value: secret_value,
424 start_pos: secret_start,
425 end_pos: secret_end,
426 });
427 }
428 }
429
430 detected_secrets
431}
432
433pub fn should_allow_match(
435 input: &str,
436 path: Option<&str>,
437 match_text: &str,
438 start_pos: usize,
439 end_pos: usize,
440 rule: &Rule,
441 global_allowlist: &Option<Allowlist>,
442) -> bool {
443 if let Some(global) = global_allowlist
445 && is_allowed_by_allowlist(input, match_text, start_pos, end_pos, global)
446 {
447 return true;
448 }
449
450 if let Some(rule_allowlists) = &rule.allowlists {
452 for allowlist in rule_allowlists {
453 if is_allowed_by_rule_allowlist(input, path, match_text, start_pos, end_pos, allowlist)
454 {
455 return true;
456 }
457 }
458 }
459
460 false
461}
462
463fn is_allowed_by_allowlist(
464 _input: &str,
465 match_text: &str,
466 _start_pos: usize,
467 _end_pos: usize,
468 allowlist: &Allowlist,
469) -> bool {
470 for regex in &allowlist.compiled_regexes {
472 if regex.is_match(match_text) {
473 return true;
474 }
475 }
476
477 if let Some(stopwords) = &allowlist.stopwords {
479 for stopword in stopwords {
480 if match_text.to_lowercase().contains(&stopword.to_lowercase()) {
481 return true;
482 }
483 }
484 }
485
486 false
487}
488
489#[allow(clippy::string_slice)]
492pub fn is_allowed_by_rule_allowlist(
493 input: &str,
494 path: Option<&str>,
495 match_text: &str,
496 start_pos: usize,
497 end_pos: usize,
498 allowlist: &RuleAllowlist,
499) -> bool {
500 let mut checks = Vec::new();
501
502 if start_pos > input.len()
504 || end_pos > input.len()
505 || !input.is_char_boundary(start_pos)
506 || !input.is_char_boundary(end_pos)
507 {
508 return false;
509 }
510
511 let target_text = match allowlist.regex_target.as_deref() {
513 Some("match") => match_text,
514 Some("line") => {
515 let line_start = input[..start_pos].rfind('\n').map(|i| i + 1).unwrap_or(0);
517 let line_end = input[end_pos..]
518 .find('\n')
519 .map(|i| end_pos + i)
520 .unwrap_or(input.len());
521 &input[line_start..line_end]
522 }
523 _ => match_text, };
525
526 if !allowlist.compiled_regexes.is_empty() {
528 let regex_matches = allowlist
529 .compiled_regexes
530 .iter()
531 .any(|regex| regex.is_match(target_text));
532 checks.push(regex_matches);
533 }
534
535 if let Some(stopwords) = &allowlist.stopwords {
537 let stopword_matches = stopwords.iter().any(|stopword| {
538 if let Some(equals_pos) = target_text.find('=') {
540 let value = &target_text[equals_pos + 1..];
541
542 let value_lower = value.to_lowercase();
545 let stopword_lower = stopword.to_lowercase();
546
547 if value_lower == stopword_lower {
553 true } else if value.len() < 15 && value_lower.contains(&stopword_lower) {
555 let without_stopword = value_lower.replace(&stopword_lower, "");
557 without_stopword
559 .chars()
560 .all(|c| c.is_ascii_digit() || "!@#$%^&*()_+-=[]{}|;:,.<>?".contains(c))
561 } else {
562 false }
564 } else {
565 let obvious_false_positives = ["example", "test", "demo", "sample", "placeholder"];
568 if obvious_false_positives.contains(&stopword.as_str()) {
569 target_text
570 .to_lowercase()
571 .contains(&stopword.to_lowercase())
572 } else {
573 false
574 }
575 }
576 });
577 checks.push(stopword_matches);
578 }
579
580 if let Some(paths) = &allowlist.paths
582 && let Some(path) = path
583 {
584 checks.push(paths.iter().any(|p| path.contains(p)));
585 }
586
587 if checks.is_empty() {
589 return false;
590 }
591
592 match allowlist.condition.as_deref() {
594 Some("AND") => checks.iter().all(|&check| check),
595 _ => checks.iter().any(|&check| check), }
597}
598
599pub fn contains_any_keyword(input: &str, keywords: &[String]) -> bool {
601 let input_lower = input.to_lowercase();
602 keywords
603 .iter()
604 .any(|keyword| input_lower.contains(&keyword.to_lowercase()))
605}
606
607pub fn initialize_gitleaks_config(privacy_mode: bool) -> usize {
616 let config = if privacy_mode {
618 &*GITLEAKS_CONFIG_WITH_PRIVACY
619 } else {
620 &*GITLEAKS_CONFIG
621 };
622 config.rules.len()
623}
624
625#[cfg(test)]
626mod tests {
627 use super::*;
628
629 #[test]
630 fn test_entropy_calculation() {
631 let high_entropy = calculate_entropy("Kx9mP2nQ8rT4vW7yZ3cF6hJ1lN5sA");
633
634 let low_entropy = calculate_entropy("aaaaaaaaaa");
636
637 let zero_entropy = calculate_entropy("");
639
640 assert!(high_entropy > low_entropy);
641 assert_eq!(zero_entropy, 0.0);
642
643 println!("High entropy: {:.2}", high_entropy);
644 println!("Low entropy: {:.2}", low_entropy);
645 println!("Zero entropy: {:.2}", zero_entropy);
646 }
647
648 #[test]
649 fn test_additional_rules_loaded() {
650 let config = &*GITLEAKS_CONFIG;
651
652 let anthropic_rule = config.rules.iter().find(|r| r.id == "anthropic-api-key");
654 assert!(
655 anthropic_rule.is_some(),
656 "Anthropic API key rule should be loaded from additional_rules.toml"
657 );
658
659 if let Some(rule) = anthropic_rule {
660 assert!(rule.keywords.contains(&"anthropic".to_string()));
661 assert!(
662 rule.compiled_regex.is_some(),
663 "Anthropic rule regex should be compiled"
664 );
665 }
666
667 println!("Total rules loaded: {}", config.rules.len());
668 }
669
670 #[test]
671 fn test_anthropic_api_key_detection() {
672 let test_input =
674 "ANTHROPIC_API_KEY=sk-ant-api03-Kx9mP2nQ8rT4vW7yZ3cF6hJ1lN5sA9bD2eG5kM8pR1tX4zB7";
675 let secrets = detect_secrets(test_input, None, false);
676
677 let anthropic_secret = secrets.iter().find(|s| s.rule_id == "anthropic-api-key");
679 assert!(
680 anthropic_secret.is_some(),
681 "Should detect Anthropic API key"
682 );
683
684 if let Some(secret) = anthropic_secret {
685 assert!(secret.value.starts_with("sk-ant-api03-"));
686 }
687 }
688
689 #[test]
690 fn test_privacy_mode_aws_account_id() {
691 let test_input = "AWS_ACCOUNT_ID=987654321098";
692
693 let secrets = detect_secrets(test_input, None, false);
695 assert!(!secrets.iter().any(|s| s.rule_id == "aws-account-id"));
696
697 let secrets_privacy = detect_secrets(test_input, None, true);
699 let aws_secret = secrets_privacy
700 .iter()
701 .find(|s| s.rule_id == "aws-account-id");
702 assert!(
703 aws_secret.is_some(),
704 "Should detect AWS account ID in privacy mode"
705 );
706
707 if let Some(secret) = aws_secret {
708 assert_eq!(secret.value, "987654321098");
709 }
710 }
711
712 #[test]
713 fn test_privacy_mode_public_ip() {
714 let test_input = "SERVER_IP=203.0.113.195";
715
716 let secrets = detect_secrets(test_input, None, false);
718 assert!(!secrets.iter().any(|s| s.rule_id == "public-ipv4"));
719
720 let secrets_privacy = detect_secrets(test_input, None, true);
722 let ip_secret = secrets_privacy.iter().find(|s| s.rule_id == "public-ipv4");
723 assert!(
724 ip_secret.is_some(),
725 "Should detect public IP in privacy mode"
726 );
727
728 if let Some(secret) = ip_secret {
729 assert_eq!(secret.value, "203.0.113.195");
730 }
731 }
732
733 #[test]
734 fn test_privacy_mode_private_ip_excluded() {
735 let test_input = "LOCAL_IP=192.168.1.1";
736
737 let secrets_privacy = detect_secrets(test_input, None, true);
739 assert!(!secrets_privacy.iter().any(|s| s.rule_id == "public-ipv4"));
740 }
741
742 #[test]
743 fn test_privacy_mode_aws_arn() {
744 let test_input = "ARN=arn:aws:s3:::my-bucket/object";
745
746 let secrets = detect_secrets(test_input, None, false);
748 assert!(!secrets.iter().any(|s| s.rule_id == "aws-account-id"));
749
750 let secrets_privacy = detect_secrets(test_input, None, true);
752 assert!(
754 !secrets_privacy
755 .iter()
756 .any(|s| s.rule_id == "aws-account-id")
757 );
758
759 let test_input_with_account = "ARN=arn:aws:iam::987654321098:role/MyRole";
761 let secrets_with_account = detect_secrets(test_input_with_account, None, true);
762 let aws_secret = secrets_with_account
763 .iter()
764 .find(|s| s.rule_id == "aws-account-id");
765 assert!(
766 aws_secret.is_some(),
767 "Should detect AWS account ID in ARN in privacy mode"
768 );
769
770 if let Some(secret) = aws_secret {
771 assert_eq!(secret.value, "987654321098");
772 }
773 }
774
775 #[test]
776 fn test_privacy_mode_initialization() {
777 let regular_count = initialize_gitleaks_config(false);
779 let privacy_count = initialize_gitleaks_config(true);
780
781 assert!(
783 privacy_count > regular_count,
784 "Privacy mode should have more rules than regular mode"
785 );
786 }
787
788 #[test]
789 fn test_debug_privacy_mode_aws() {
790 let test_input = "AWS_ACCOUNT_ID=987654321098"; let secrets_privacy = detect_secrets(test_input, None, true);
794 println!("Privacy mode detected {} secrets", secrets_privacy.len());
795 for secret in &secrets_privacy {
796 println!(
797 " Rule: {}, Value: '{}', Pos: {}-{}",
798 secret.rule_id, secret.value, secret.start_pos, secret.end_pos
799 );
800 }
801
802 let secrets_regular = detect_secrets(test_input, None, false);
804 println!("Regular mode detected {} secrets", secrets_regular.len());
805 for secret in &secrets_regular {
806 println!(
807 " Rule: {}, Value: '{}', Pos: {}-{}",
808 secret.rule_id, secret.value, secret.start_pos, secret.end_pos
809 );
810 }
811
812 let config_with_privacy = &*GITLEAKS_CONFIG_WITH_PRIVACY;
814 let aws_rule = config_with_privacy
815 .rules
816 .iter()
817 .find(|r| r.id == "aws-account-id");
818 println!("AWS rule found: {}", aws_rule.is_some());
819 if let Some(rule) = aws_rule {
820 println!("AWS rule keywords: {:?}", rule.keywords);
821 if let Some(regex) = &rule.compiled_regex {
822 println!("AWS rule regex compiled: yes");
823 let test_matches: Vec<_> = regex.find_iter(test_input).collect();
824 println!("Direct regex matches: {}", test_matches.len());
825 for mat in test_matches {
826 println!(" Match: '{}'", mat.as_str());
827 }
828
829 let contains_keywords = contains_any_keyword(test_input, &rule.keywords);
831 println!("Contains keywords: {}", contains_keywords);
832
833 if let Some(captures) = regex.captures(test_input) {
835 println!("Capture groups found: {}", captures.len());
836 for (i, cap) in captures.iter().enumerate() {
837 if let Some(cap) = cap {
838 println!(" Capture {}: '{}'", i, cap.as_str());
839 }
840 }
841 } else {
842 println!("No capture groups found");
843 }
844
845 for mat in regex.find_iter(test_input) {
847 if let Some(captures) = regex.captures_at(test_input, mat.start())
848 && let Some(capture) = captures.get(1)
849 {
850 let entropy = calculate_entropy(capture.as_str());
851 println!(
852 " Entropy of first capture '{}': {:.2} (threshold: {:?})",
853 capture.as_str(),
854 entropy,
855 rule.entropy
856 );
857 }
858 }
859 } else {
860 println!("AWS rule regex compiled: no");
861 }
862 }
863 }
864
865 #[test]
866 fn test_debug_privacy_mode_ip() {
867 let test_input = "SERVER_IP=8.8.8.8";
868
869 let secrets_privacy = detect_secrets(test_input, None, true);
871 println!("Privacy mode detected {} secrets", secrets_privacy.len());
872 for secret in &secrets_privacy {
873 println!(
874 " Rule: {}, Value: '{}', Pos: {}-{}",
875 secret.rule_id, secret.value, secret.start_pos, secret.end_pos
876 );
877 }
878
879 let config_with_privacy = &*GITLEAKS_CONFIG_WITH_PRIVACY;
881 let ip_rule = config_with_privacy
882 .rules
883 .iter()
884 .find(|r| r.id == "public-ipv4");
885 println!("IP rule found: {}", ip_rule.is_some());
886 if let Some(rule) = ip_rule {
887 println!("IP rule keywords: {:?}", rule.keywords);
888 if let Some(regex) = &rule.compiled_regex {
889 println!("IP rule regex compiled: yes");
890 let test_matches: Vec<_> = regex.find_iter(test_input).collect();
891 println!("Direct regex matches: {}", test_matches.len());
892 for mat in test_matches {
893 println!(" Match: '{}'", mat.as_str());
894 }
895
896 let contains_keywords = contains_any_keyword(test_input, &rule.keywords);
898 println!("Contains keywords: {}", contains_keywords);
899
900 if let Some(captures) = regex.captures(test_input) {
902 println!("Capture groups found: {}", captures.len());
903 for (i, cap) in captures.iter().enumerate() {
904 if let Some(cap) = cap {
905 println!(" Capture {}: '{}'", i, cap.as_str());
906 }
907 }
908 } else {
909 println!("No capture groups found");
910 }
911 } else {
912 println!("IP rule regex compiled: no");
913 }
914 }
915 }
916
917 #[test]
918 fn test_comprehensive_ip_detection() {
919 println!("=== COMPREHENSIVE IP DETECTION TEST ===");
920
921 let test_cases = vec![
922 ("16.170.172.114", true),
924 ("8.8.8.8", true),
925 ("1.1.1.1", true),
926 ("203.0.113.195", true),
927 ("13.107.42.14", true),
928 ("192.168.1.1", false),
930 ("10.0.0.1", false),
931 ("172.16.0.1", false),
932 ("127.0.0.1", false),
933 ("169.254.1.1", false),
934 ("0.0.0.0", false),
935 ("255.255.255.255", false),
936 ];
937
938 for (ip, should_detect) in test_cases {
939 let secrets = detect_secrets(ip, None, true);
940 let detected = secrets.iter().any(|s| s.rule_id == "public-ipv4");
941
942 println!(
943 "IP: {} | Should detect: {} | Detected: {}",
944 ip, should_detect, detected
945 );
946
947 if should_detect {
948 assert!(detected, "Should detect public IP: {}", ip);
949 } else {
950 assert!(!detected, "Should NOT detect private IP: {}", ip);
951 }
952 }
953
954 let context_tests = vec![
956 "IP address: 16.170.172.114",
957 "Connect to 16.170.172.114",
958 "16.170.172.114:8080",
959 "ping 16.170.172.114",
960 "https://16.170.172.114/api",
961 ];
962
963 for context in context_tests {
964 let secrets = detect_secrets(context, None, true);
965 let detected = secrets.iter().any(|s| s.rule_id == "public-ipv4");
966 println!("Context: '{}' | Detected: {}", context, detected);
967 assert!(detected, "Should detect IP in context: {}", context);
968 }
969 }
970
971 #[test]
972 fn test_standalone_ip_detection() {
973 println!("=== TESTING STANDALONE IP DETECTION ===");
974
975 let standalone_ip = "16.170.172.114";
977 let secrets = detect_secrets(standalone_ip, None, true);
978
979 println!(
980 "Standalone IP '{}' detected {} secrets",
981 standalone_ip,
982 secrets.len()
983 );
984 for secret in &secrets {
985 println!(" Rule: {}, Value: '{}'", secret.rule_id, secret.value);
986 }
987
988 let ip_with_context = "SERVER_IP=16.170.172.114";
990 let secrets_with_context = detect_secrets(ip_with_context, None, true);
991
992 println!(
993 "IP with context '{}' detected {} secrets",
994 ip_with_context,
995 secrets_with_context.len()
996 );
997 for secret in &secrets_with_context {
998 println!(" Rule: {}, Value: '{}'", secret.rule_id, secret.value);
999 }
1000
1001 let config = &*GITLEAKS_CONFIG_WITH_PRIVACY;
1003 let ip_rule = config.rules.iter().find(|r| r.id == "public-ipv4");
1004 if let Some(rule) = ip_rule {
1005 println!("IP rule keywords: {:?}", rule.keywords);
1006 println!(
1007 "Standalone IP contains keywords: {}",
1008 contains_any_keyword(standalone_ip, &rule.keywords)
1009 );
1010 println!(
1011 "IP with context contains keywords: {}",
1012 contains_any_keyword(ip_with_context, &rule.keywords)
1013 );
1014 }
1015 }
1016
1017 #[test]
1018 fn test_user_provided_json_snippet() {
1019 println!("=== TESTING USER PROVIDED JSON SNIPPET ===");
1020
1021 let json_snippet = r#"{
1022 "UserId": "AIDAX5UI4H55WM6GS6NIJ",
1023 "Account": "544388841223",
1024 "Arn": "arn:aws:iam::544388841223:user/terraform-mac"
1025}"#;
1026
1027 let secrets = detect_secrets(json_snippet, None, true);
1028 let aws_secrets: Vec<_> = secrets
1029 .iter()
1030 .filter(|s| s.rule_id == "aws-account-id")
1031 .collect();
1032
1033 println!("Detected {} AWS account ID secrets", aws_secrets.len());
1034 for secret in &aws_secrets {
1035 println!(
1036 " Value: '{}' at position {}-{}",
1037 secret.value, secret.start_pos, secret.end_pos
1038 );
1039 }
1040
1041 assert!(
1043 !aws_secrets.is_empty(),
1044 "Should detect at least one AWS account ID"
1045 );
1046 assert!(
1047 aws_secrets.iter().any(|s| s.value == "544388841223"),
1048 "Should detect account ID 544388841223"
1049 );
1050
1051 println!("✅ JSON snippet test passed - Account field is now detected");
1053 }
1054
1055 #[test]
1056 fn test_aws_account_id_json_field() {
1057 println!("=== TESTING AWS ACCOUNT ID JSON FIELD DETECTION ===");
1058
1059 let test_cases = vec![
1060 r#""Account": "544388841223""#,
1062 r#""AccountId": "544388841223""#,
1063 r#""account": "544388841223""#,
1064 r#""accountId": "544388841223""#,
1065 "AWS_ACCOUNT_ID=544388841223",
1067 "account.id=544388841223",
1068 "account_id: 544388841223",
1069 "arn:aws:iam::544388841223:user/test",
1070 "544388841223 arn:aws:iam::544388841223:user/terraform-mac AIDAX5UI4H55WM6GS6NIJ",
1071 ];
1072
1073 for test_case in test_cases {
1074 let secrets = detect_secrets(test_case, None, true);
1075 let detected = secrets.iter().any(|s| s.rule_id == "aws-account-id");
1076
1077 println!("Test case: '{}' | Detected: {}", test_case, detected);
1078 assert!(detected, "Should detect AWS account ID in: {}", test_case);
1079
1080 if let Some(secret) = secrets.iter().find(|s| s.rule_id == "aws-account-id") {
1082 assert_eq!(secret.value, "544388841223");
1083 println!(" -> Detected value: '{}'", secret.value);
1084 }
1085 }
1086 }
1087}