Skip to main content

stakpak_shared/secrets/
gitleaks.rs

1// Secret redaction implementation based on gitleaks (https://github.com/gitleaks/gitleaks)
2use regex::Regex;
3use serde::{Deserialize, Serialize};
4use std::sync::LazyLock;
5
6#[derive(Debug, Deserialize, Clone)]
7pub struct GitleaksConfig {
8    #[allow(dead_code)]
9    pub title: Option<String>,
10    pub allowlist: Option<Allowlist>,
11    pub rules: Vec<Rule>,
12}
13
14#[derive(Debug, Deserialize, Clone)]
15pub struct Allowlist {
16    #[allow(dead_code)]
17    pub description: Option<String>,
18    #[allow(dead_code)]
19    pub paths: Option<Vec<String>>,
20    pub regexes: Option<Vec<String>>,
21    pub stopwords: Option<Vec<String>>,
22    /// Pre-compiled regexes (not serialized)
23    #[serde(skip)]
24    pub compiled_regexes: Vec<Regex>,
25}
26
27#[derive(Debug, Deserialize, Clone)]
28pub struct Rule {
29    pub id: String,
30    #[allow(dead_code)]
31    pub description: String,
32    pub regex: Option<String>,
33    pub entropy: Option<f64>,
34    #[serde(default)]
35    pub keywords: Vec<String>,
36    #[allow(dead_code)]
37    pub path: Option<String>,
38    pub allowlists: Option<Vec<RuleAllowlist>>,
39    /// Pre-compiled regex (not serialized)
40    #[serde(skip)]
41    pub compiled_regex: Option<Regex>,
42}
43
44#[derive(Debug, Deserialize, Clone)]
45pub struct RuleAllowlist {
46    #[allow(dead_code)]
47    pub description: Option<String>,
48    pub condition: Option<String>, // "AND" or default "OR"
49    pub paths: Option<Vec<String>>,
50    pub regexes: Option<Vec<String>>,
51    pub stopwords: Option<Vec<String>>,
52    #[serde(rename = "regexTarget")]
53    pub regex_target: Option<String>, // "match", "line", etc.
54    /// Pre-compiled regexes (not serialized)
55    #[serde(skip)]
56    pub compiled_regexes: Vec<Regex>,
57}
58
59/// Represents a detected secret with its position and value
60#[derive(Debug, Clone)]
61pub struct DetectedSecret {
62    /// Detection rule id
63    pub rule_id: String,
64    /// The secret value
65    pub value: String,
66    /// Start position in the original string
67    pub start_pos: usize,
68    /// End position in the original string
69    pub end_pos: usize,
70}
71
72#[derive(Debug, Default, Serialize)]
73pub struct CompilationErrors {
74    pub regex_errors: Vec<(String, String)>, // (rule_id, error_message)
75    pub warnings: Vec<String>,
76}
77
78impl CompilationErrors {
79    pub fn add_regex_error(&mut self, rule_id: String, error: String) {
80        self.regex_errors.push((rule_id, error));
81    }
82
83    pub fn add_warning(&mut self, warning: String) {
84        self.warnings.push(warning);
85    }
86
87    #[allow(dead_code)]
88    pub fn is_empty(&self) -> bool {
89        self.regex_errors.is_empty() && self.warnings.is_empty()
90    }
91}
92
93/// Trait for compiling regex patterns in configuration structures
94pub trait RegexCompilable {
95    fn compile_regexes(&mut self) -> CompilationErrors;
96}
97
98impl RegexCompilable for Allowlist {
99    fn compile_regexes(&mut self) -> CompilationErrors {
100        let mut errors = CompilationErrors::default();
101        self.compiled_regexes.clear();
102
103        if let Some(regexes) = &self.regexes {
104            for pattern in regexes {
105                match Regex::new(pattern) {
106                    Ok(regex) => self.compiled_regexes.push(regex),
107                    Err(e) => errors.add_warning(format!(
108                        "Failed to compile allowlist regex '{}': {}",
109                        pattern, e
110                    )),
111                }
112            }
113        }
114
115        errors
116    }
117}
118
119impl RegexCompilable for RuleAllowlist {
120    fn compile_regexes(&mut self) -> CompilationErrors {
121        let mut errors = CompilationErrors::default();
122        self.compiled_regexes.clear();
123
124        if let Some(regexes) = &self.regexes {
125            for pattern in regexes {
126                match Regex::new(pattern) {
127                    Ok(regex) => self.compiled_regexes.push(regex),
128                    Err(e) => errors.add_warning(format!(
129                        "Failed to compile rule allowlist regex '{}': {}",
130                        pattern, e
131                    )),
132                }
133            }
134        }
135
136        errors
137    }
138}
139
140impl RegexCompilable for Rule {
141    fn compile_regexes(&mut self) -> CompilationErrors {
142        let mut errors = CompilationErrors::default();
143
144        // Compile main regex with fallback handling
145        if let Some(regex_pattern) = &self.regex {
146            match Regex::new(regex_pattern) {
147                Ok(regex) => self.compiled_regex = Some(regex),
148                Err(e) => {
149                    // Handle regex compilation errors with specific fallbacks
150                    match self.id.as_str() {
151                        "generic-api-key" | "pypi-upload-token" | "vault-batch-token" => {
152                            match create_simple_api_key_regex() {
153                                Ok(simple_regex) => {
154                                    self.compiled_regex = Some(simple_regex);
155                                    errors.add_warning(format!(
156                                        "Used fallback regex for rule '{}' due to: {}",
157                                        self.id, e
158                                    ));
159                                }
160                                Err(fallback_err) => {
161                                    errors.add_regex_error(
162                                        self.id.clone(),
163                                        format!(
164                                            "Failed to compile regex and fallback: {} / {}",
165                                            e, fallback_err
166                                        ),
167                                    );
168                                }
169                            }
170                        }
171                        _ => {
172                            errors.add_regex_error(self.id.clone(), e.to_string());
173                        }
174                    }
175                }
176            }
177        } else {
178            // Rule has no regex pattern (e.g., path-only rules like pkcs12-file)
179            // This is valid for certain types of rules, so no error
180            self.compiled_regex = None;
181        }
182
183        // Compile allowlist regexes
184        if let Some(allowlists) = &mut self.allowlists {
185            for allowlist in allowlists {
186                let allowlist_errors = allowlist.compile_regexes();
187                errors.warnings.extend(allowlist_errors.warnings);
188                errors.regex_errors.extend(allowlist_errors.regex_errors);
189            }
190        }
191
192        errors
193    }
194}
195
196impl RegexCompilable for GitleaksConfig {
197    fn compile_regexes(&mut self) -> CompilationErrors {
198        let mut errors = CompilationErrors::default();
199
200        // Compile global allowlist
201        if let Some(allowlist) = &mut self.allowlist {
202            let allowlist_errors = allowlist.compile_regexes();
203            errors.warnings.extend(allowlist_errors.warnings);
204            errors.regex_errors.extend(allowlist_errors.regex_errors);
205        }
206
207        // Compile rules (keeping only successfully compiled ones)
208        let mut compiled_rules = Vec::new();
209        for mut rule in self.rules.drain(..) {
210            let rule_errors = rule.compile_regexes();
211            errors.warnings.extend(rule_errors.warnings);
212            errors.regex_errors.extend(rule_errors.regex_errors);
213
214            // Keep rules that either compiled successfully or don't have regex patterns (e.g., path-only rules)
215            if rule.compiled_regex.is_some() || rule.regex.is_none() {
216                compiled_rules.push(rule);
217            }
218        }
219        self.rules = compiled_rules;
220
221        errors
222    }
223}
224
225/// Lazy-loaded gitleaks configuration
226pub static GITLEAKS_CONFIG: LazyLock<GitleaksConfig> =
227    LazyLock::new(|| create_gitleaks_config(false));
228
229/// Lazy-loaded gitleaks configuration with privacy rules
230pub static GITLEAKS_CONFIG_WITH_PRIVACY: LazyLock<GitleaksConfig> =
231    LazyLock::new(|| create_gitleaks_config(true));
232
233/// Creates a gitleaks configuration with optional privacy rules
234fn create_gitleaks_config(include_privacy_rules: bool) -> GitleaksConfig {
235    // Load main gitleaks configuration
236    let config_str = include_str!("gitleaks.toml");
237    let mut config: GitleaksConfig =
238        toml::from_str(config_str).expect("Failed to parse gitleaks.toml");
239
240    // Load additional rules configuration
241    let additional_config_str = include_str!("additional_rules.toml");
242    let additional_config: GitleaksConfig =
243        toml::from_str(additional_config_str).expect("Failed to parse additional_rules.toml");
244
245    // Merge additional rules into the main configuration
246    config.rules.extend(additional_config.rules);
247
248    // Merge additional allowlist if present
249    if let Some(additional_allowlist) = additional_config.allowlist {
250        merge_allowlist(&mut config.allowlist, additional_allowlist);
251    }
252
253    // Load privacy rules if enabled
254    if include_privacy_rules {
255        let privacy_config_str = include_str!("privacy_rules.toml");
256        let privacy_config: GitleaksConfig =
257            toml::from_str(privacy_config_str).expect("Failed to parse privacy_rules.toml");
258
259        // Merge privacy rules into the main configuration
260        config.rules.extend(privacy_config.rules);
261
262        // Merge privacy allowlist if present
263        if let Some(privacy_allowlist) = privacy_config.allowlist {
264            merge_allowlist(&mut config.allowlist, privacy_allowlist);
265        }
266    }
267
268    let compilation_errors = config.compile_regexes();
269    if !compilation_errors.regex_errors.is_empty() {
270        const ERROR_LOG_FILE: &str = ".stakpak_mcp_secret_detection_errors";
271        // Write errors to log file
272        if let Ok(json) = serde_json::to_string(&compilation_errors)
273            && let Err(e) = std::fs::write(ERROR_LOG_FILE, json)
274        {
275            eprintln!("Failed to write errors to log file: {}", e);
276        }
277    }
278    config
279}
280
281/// Helper function to merge allowlists
282fn merge_allowlist(target: &mut Option<Allowlist>, source: Allowlist) {
283    match target {
284        Some(existing_allowlist) => {
285            // Merge regexes
286            if let Some(additional_regexes) = source.regexes {
287                match &mut existing_allowlist.regexes {
288                    Some(existing_regexes) => existing_regexes.extend(additional_regexes),
289                    None => existing_allowlist.regexes = Some(additional_regexes),
290                }
291            }
292
293            // Merge stopwords
294            if let Some(additional_stopwords) = source.stopwords {
295                match &mut existing_allowlist.stopwords {
296                    Some(existing_stopwords) => existing_stopwords.extend(additional_stopwords),
297                    None => existing_allowlist.stopwords = Some(additional_stopwords),
298                }
299            }
300        }
301        None => *target = Some(source),
302    }
303}
304
305/// Creates a simplified API key regex that works within Rust's regex engine limits
306pub fn create_simple_api_key_regex() -> Result<Regex, regex::Error> {
307    // The original Gitleaks generic pattern is too complex for Rust's regex engine.
308    // We'll use a simpler but still effective pattern that captures the essence:
309    // 1. Optional prefix (identifier)
310    // 2. Keywords (access, auth, api, etc.)
311    // 3. Optional suffix
312    // 4. Assignment operators
313    // 5. Optional quotes/spaces
314    // 6. The actual secret value (captured)
315    // 7. Terminator
316
317    let pattern = r#"(?i)[\w.-]{0,30}?(?:access|auth|api|credential|creds|key|password|passwd|secret|token)[\w.-]{0,15}[\s'"]{0,3}(?:=|>|:{1,2}=|\|\||:|=>|\?=|,)[\s'"=]{0,3}([\w.=-]{10,80}|[a-z0-9][a-z0-9+/]{11,}={0,2})(?:[\s'";]|$)"#;
318    Regex::new(pattern)
319}
320
321/// Calculate Shannon entropy for a string
322///
323/// Entropy measures the randomness/unpredictability of characters in a string.
324/// Higher entropy suggests more randomness, which is characteristic of secrets.
325pub fn calculate_entropy(text: &str) -> f64 {
326    if text.is_empty() {
327        return 0.0;
328    }
329
330    let mut char_counts = std::collections::HashMap::new();
331    let total_chars = text.len() as f64;
332
333    // Count character frequencies
334    for ch in text.chars() {
335        *char_counts.entry(ch).or_insert(0u32) += 1;
336    }
337
338    // Calculate Shannon entropy: H = -Σ(p(x) * log2(p(x)))
339    let mut entropy = 0.0;
340    for &count in char_counts.values() {
341        let probability = count as f64 / total_chars;
342        if probability > 0.0 {
343            entropy -= probability * probability.log2();
344        }
345    }
346
347    entropy
348}
349
350/// Detects secrets in the input string using gitleaks configuration
351///
352/// This implementation follows the gitleaks methodology:
353/// 1. Apply regex rules to find potential secrets
354/// 2. Check entropy thresholds to filter out low-entropy matches
355/// 3. Apply allowlists to exclude known false positives
356/// 4. Check keywords to ensure relevance
357///
358/// When privacy_mode is enabled, also detects private data like IP addresses and AWS account IDs
359pub fn detect_secrets(input: &str, path: Option<&str>, privacy_mode: bool) -> Vec<DetectedSecret> {
360    let mut detected_secrets = Vec::new();
361    let config = if privacy_mode {
362        &*GITLEAKS_CONFIG_WITH_PRIVACY
363    } else {
364        &*GITLEAKS_CONFIG
365    };
366
367    // Apply each compiled rule from the configuration
368    for rule in &config.rules {
369        // Skip rules that don't have regex patterns (e.g., path-only rules)
370        let regex = match &rule.compiled_regex {
371            Some(regex) => regex,
372            None => continue,
373        };
374
375        // Pre-filter: Skip rule if none of its keywords are present in the input
376        if !rule.keywords.is_empty() && !contains_any_keyword(input, &rule.keywords) {
377            continue;
378        }
379
380        // Find all matches for this rule using the pre-compiled regex
381        for mat in regex.find_iter(input) {
382            let match_text = mat.as_str();
383            let start_pos = mat.start();
384            let end_pos = mat.end();
385
386            // Check if this match should be filtered out
387            if should_allow_match(
388                input,
389                path,
390                match_text,
391                start_pos,
392                end_pos,
393                rule,
394                &config.allowlist,
395            ) {
396                continue;
397            }
398
399            // Extract the captured secret value and its position
400            let (secret_value, secret_start, secret_end) =
401                if let Some(captures) = regex.captures_at(input, start_pos) {
402                    // Try to get the first capture group, fallback to full match
403                    if let Some(capture) = captures.get(1) {
404                        // Capture positions are already relative to the full input
405                        (capture.as_str().to_string(), capture.start(), capture.end())
406                    } else {
407                        (match_text.to_string(), start_pos, end_pos)
408                    }
409                } else {
410                    (match_text.to_string(), start_pos, end_pos)
411                };
412
413            // Check entropy if specified - apply to the captured secret value, not the full match
414            if let Some(entropy_threshold) = rule.entropy {
415                let calculated_entropy = calculate_entropy(&secret_value);
416                if calculated_entropy < entropy_threshold {
417                    continue;
418                }
419            }
420
421            detected_secrets.push(DetectedSecret {
422                rule_id: rule.id.clone(),
423                value: secret_value,
424                start_pos: secret_start,
425                end_pos: secret_end,
426            });
427        }
428    }
429
430    detected_secrets
431}
432
433/// Check if a match should be allowed (filtered out) based on allowlists
434pub fn should_allow_match(
435    input: &str,
436    path: Option<&str>,
437    match_text: &str,
438    start_pos: usize,
439    end_pos: usize,
440    rule: &Rule,
441    global_allowlist: &Option<Allowlist>,
442) -> bool {
443    // Check global allowlist first
444    if let Some(global) = global_allowlist
445        && is_allowed_by_allowlist(input, match_text, start_pos, end_pos, global)
446    {
447        return true;
448    }
449
450    // Check rule-specific allowlists
451    if let Some(rule_allowlists) = &rule.allowlists {
452        for allowlist in rule_allowlists {
453            if is_allowed_by_rule_allowlist(input, path, match_text, start_pos, end_pos, allowlist)
454            {
455                return true;
456            }
457        }
458    }
459
460    false
461}
462
463fn is_allowed_by_allowlist(
464    _input: &str,
465    match_text: &str,
466    _start_pos: usize,
467    _end_pos: usize,
468    allowlist: &Allowlist,
469) -> bool {
470    // Check regex patterns
471    for regex in &allowlist.compiled_regexes {
472        if regex.is_match(match_text) {
473            return true;
474        }
475    }
476
477    // Check stopwords
478    if let Some(stopwords) = &allowlist.stopwords {
479        for stopword in stopwords {
480            if match_text.to_lowercase().contains(&stopword.to_lowercase()) {
481                return true;
482            }
483        }
484    }
485
486    false
487}
488
489pub fn is_allowed_by_rule_allowlist(
490    input: &str,
491    path: Option<&str>,
492    match_text: &str,
493    start_pos: usize,
494    end_pos: usize,
495    allowlist: &RuleAllowlist,
496) -> bool {
497    let mut checks = Vec::new();
498
499    // Determine the target text based on regex_target
500    let target_text = match allowlist.regex_target.as_deref() {
501        Some("match") => match_text,
502        Some("line") => {
503            // Extract the line containing the match
504            let line_start = input[..start_pos].rfind('\n').map(|i| i + 1).unwrap_or(0);
505            let line_end = input[end_pos..]
506                .find('\n')
507                .map(|i| end_pos + i)
508                .unwrap_or(input.len());
509            &input[line_start..line_end]
510        }
511        _ => match_text, // Default to match
512    };
513
514    // Check regex patterns using pre-compiled regexes
515    if !allowlist.compiled_regexes.is_empty() {
516        let regex_matches = allowlist
517            .compiled_regexes
518            .iter()
519            .any(|regex| regex.is_match(target_text));
520        checks.push(regex_matches);
521    }
522
523    // Check stopwords with configuration-aware logic
524    if let Some(stopwords) = &allowlist.stopwords {
525        let stopword_matches = stopwords.iter().any(|stopword| {
526            // For configuration-style patterns (KEY=VALUE), be more permissive
527            if let Some(equals_pos) = target_text.find('=') {
528                let value = &target_text[equals_pos + 1..];
529
530                // Only filter if the value itself is obviously a placeholder/test value
531                // Check if the entire value is just the stopword or a simple variation
532                let value_lower = value.to_lowercase();
533                let stopword_lower = stopword.to_lowercase();
534
535                // Filter only if:
536                // 1. The value is exactly the stopword (e.g., "password")
537                // 2. The value is a simple variation like "password123" or "secretkey"
538                // 3. The value contains the stopword and is very short/simple
539
540                if value_lower == stopword_lower {
541                    true // Exact match: PASSWORD=password
542                } else if value.len() < 15 && value_lower.contains(&stopword_lower) {
543                    // Short values containing stopwords: PASSWORD=password123
544                    let without_stopword = value_lower.replace(&stopword_lower, "");
545                    // If removing the stopword leaves only numbers/simple chars, it's likely a test value
546                    without_stopword
547                        .chars()
548                        .all(|c| c.is_ascii_digit() || "!@#$%^&*()_+-=[]{}|;:,.<>?".contains(c))
549                } else {
550                    false // Don't filter longer/complex values
551                }
552            } else {
553                // For non-KEY=VALUE patterns, use original logic but be more restrictive
554                // Only filter on very obvious stopwords
555                let obvious_false_positives = ["example", "test", "demo", "sample", "placeholder"];
556                if obvious_false_positives.contains(&stopword.as_str()) {
557                    target_text
558                        .to_lowercase()
559                        .contains(&stopword.to_lowercase())
560                } else {
561                    false
562                }
563            }
564        });
565        checks.push(stopword_matches);
566    }
567
568    // Check paths
569    if let Some(paths) = &allowlist.paths
570        && let Some(path) = path
571    {
572        checks.push(paths.iter().any(|p| path.contains(p)));
573    }
574
575    // If no checks were added, this allowlist doesn't apply
576    if checks.is_empty() {
577        return false;
578    }
579
580    // Apply condition logic (AND vs OR)
581    match allowlist.condition.as_deref() {
582        Some("AND") => checks.iter().all(|&check| check),
583        _ => checks.iter().any(|&check| check), // Default to OR
584    }
585}
586
587/// Helper function to check if input contains any of the rule keywords
588pub fn contains_any_keyword(input: &str, keywords: &[String]) -> bool {
589    let input_lower = input.to_lowercase();
590    keywords
591        .iter()
592        .any(|keyword| input_lower.contains(&keyword.to_lowercase()))
593}
594
595/// Forces initialization of the gitleaks configuration
596///
597/// This function should be called during application startup to preload and compile
598/// the gitleaks rules, avoiding delays on the first call to detect_secrets.
599///
600/// When privacy_mode is enabled, also loads privacy rules for detecting IP addresses and AWS account IDs
601///
602/// Returns the number of successfully compiled rules.
603pub fn initialize_gitleaks_config(privacy_mode: bool) -> usize {
604    // Force evaluation of the lazy static
605    let config = if privacy_mode {
606        &*GITLEAKS_CONFIG_WITH_PRIVACY
607    } else {
608        &*GITLEAKS_CONFIG
609    };
610    config.rules.len()
611}
612
613#[cfg(test)]
614mod tests {
615    use super::*;
616
617    #[test]
618    fn test_entropy_calculation() {
619        // Test high entropy (random-like) string
620        let high_entropy = calculate_entropy("Kx9mP2nQ8rT4vW7yZ3cF6hJ1lN5sA");
621
622        // Test low entropy (repetitive) string
623        let low_entropy = calculate_entropy("aaaaaaaaaa");
624
625        // Test empty string
626        let zero_entropy = calculate_entropy("");
627
628        assert!(high_entropy > low_entropy);
629        assert_eq!(zero_entropy, 0.0);
630
631        println!("High entropy: {:.2}", high_entropy);
632        println!("Low entropy: {:.2}", low_entropy);
633        println!("Zero entropy: {:.2}", zero_entropy);
634    }
635
636    #[test]
637    fn test_additional_rules_loaded() {
638        let config = &*GITLEAKS_CONFIG;
639
640        // Check that the Anthropic API key rule from additional_rules.toml is loaded
641        let anthropic_rule = config.rules.iter().find(|r| r.id == "anthropic-api-key");
642        assert!(
643            anthropic_rule.is_some(),
644            "Anthropic API key rule should be loaded from additional_rules.toml"
645        );
646
647        if let Some(rule) = anthropic_rule {
648            assert!(rule.keywords.contains(&"anthropic".to_string()));
649            assert!(
650                rule.compiled_regex.is_some(),
651                "Anthropic rule regex should be compiled"
652            );
653        }
654
655        println!("Total rules loaded: {}", config.rules.len());
656    }
657
658    #[test]
659    fn test_anthropic_api_key_detection() {
660        // Use a more realistic API key that doesn't contain alphabet sequences
661        let test_input =
662            "ANTHROPIC_API_KEY=sk-ant-api03-Kx9mP2nQ8rT4vW7yZ3cF6hJ1lN5sA9bD2eG5kM8pR1tX4zB7";
663        let secrets = detect_secrets(test_input, None, false);
664
665        // Should detect the Anthropic API key
666        let anthropic_secret = secrets.iter().find(|s| s.rule_id == "anthropic-api-key");
667        assert!(
668            anthropic_secret.is_some(),
669            "Should detect Anthropic API key"
670        );
671
672        if let Some(secret) = anthropic_secret {
673            assert!(secret.value.starts_with("sk-ant-api03-"));
674        }
675    }
676
677    #[test]
678    fn test_privacy_mode_aws_account_id() {
679        let test_input = "AWS_ACCOUNT_ID=987654321098";
680
681        // Should not detect AWS account ID in regular mode
682        let secrets = detect_secrets(test_input, None, false);
683        assert!(!secrets.iter().any(|s| s.rule_id == "aws-account-id"));
684
685        // Should detect AWS account ID in privacy mode
686        let secrets_privacy = detect_secrets(test_input, None, true);
687        let aws_secret = secrets_privacy
688            .iter()
689            .find(|s| s.rule_id == "aws-account-id");
690        assert!(
691            aws_secret.is_some(),
692            "Should detect AWS account ID in privacy mode"
693        );
694
695        if let Some(secret) = aws_secret {
696            assert_eq!(secret.value, "987654321098");
697        }
698    }
699
700    #[test]
701    fn test_privacy_mode_public_ip() {
702        let test_input = "SERVER_IP=203.0.113.195";
703
704        // Should not detect public IP in regular mode
705        let secrets = detect_secrets(test_input, None, false);
706        assert!(!secrets.iter().any(|s| s.rule_id == "public-ipv4"));
707
708        // Should detect public IP in privacy mode
709        let secrets_privacy = detect_secrets(test_input, None, true);
710        let ip_secret = secrets_privacy.iter().find(|s| s.rule_id == "public-ipv4");
711        assert!(
712            ip_secret.is_some(),
713            "Should detect public IP in privacy mode"
714        );
715
716        if let Some(secret) = ip_secret {
717            assert_eq!(secret.value, "203.0.113.195");
718        }
719    }
720
721    #[test]
722    fn test_privacy_mode_private_ip_excluded() {
723        let test_input = "LOCAL_IP=192.168.1.1";
724
725        // Should not detect private IP even in privacy mode
726        let secrets_privacy = detect_secrets(test_input, None, true);
727        assert!(!secrets_privacy.iter().any(|s| s.rule_id == "public-ipv4"));
728    }
729
730    #[test]
731    fn test_privacy_mode_aws_arn() {
732        let test_input = "ARN=arn:aws:s3:::my-bucket/object";
733
734        // Should not detect AWS account ID in regular mode
735        let secrets = detect_secrets(test_input, None, false);
736        assert!(!secrets.iter().any(|s| s.rule_id == "aws-account-id"));
737
738        // Should detect AWS account ID in ARN in privacy mode
739        let secrets_privacy = detect_secrets(test_input, None, true);
740        // This specific ARN doesn't contain an account ID, so it shouldn't be detected
741        assert!(
742            !secrets_privacy
743                .iter()
744                .any(|s| s.rule_id == "aws-account-id")
745        );
746
747        // Test with an ARN that contains an account ID
748        let test_input_with_account = "ARN=arn:aws:iam::987654321098:role/MyRole";
749        let secrets_with_account = detect_secrets(test_input_with_account, None, true);
750        let aws_secret = secrets_with_account
751            .iter()
752            .find(|s| s.rule_id == "aws-account-id");
753        assert!(
754            aws_secret.is_some(),
755            "Should detect AWS account ID in ARN in privacy mode"
756        );
757
758        if let Some(secret) = aws_secret {
759            assert_eq!(secret.value, "987654321098");
760        }
761    }
762
763    #[test]
764    fn test_privacy_mode_initialization() {
765        // Test that privacy mode initialization works
766        let regular_count = initialize_gitleaks_config(false);
767        let privacy_count = initialize_gitleaks_config(true);
768
769        // Privacy mode should have more rules
770        assert!(
771            privacy_count > regular_count,
772            "Privacy mode should have more rules than regular mode"
773        );
774    }
775
776    #[test]
777    fn test_debug_privacy_mode_aws() {
778        let test_input = "AWS_ACCOUNT_ID=987654321098"; // Different from allowlist
779
780        // Test with privacy mode
781        let secrets_privacy = detect_secrets(test_input, None, true);
782        println!("Privacy mode detected {} secrets", secrets_privacy.len());
783        for secret in &secrets_privacy {
784            println!(
785                "  Rule: {}, Value: '{}', Pos: {}-{}",
786                secret.rule_id, secret.value, secret.start_pos, secret.end_pos
787            );
788        }
789
790        // Test without privacy mode
791        let secrets_regular = detect_secrets(test_input, None, false);
792        println!("Regular mode detected {} secrets", secrets_regular.len());
793        for secret in &secrets_regular {
794            println!(
795                "  Rule: {}, Value: '{}', Pos: {}-{}",
796                secret.rule_id, secret.value, secret.start_pos, secret.end_pos
797            );
798        }
799
800        // Check if privacy config loaded properly
801        let config_with_privacy = &*GITLEAKS_CONFIG_WITH_PRIVACY;
802        let aws_rule = config_with_privacy
803            .rules
804            .iter()
805            .find(|r| r.id == "aws-account-id");
806        println!("AWS rule found: {}", aws_rule.is_some());
807        if let Some(rule) = aws_rule {
808            println!("AWS rule keywords: {:?}", rule.keywords);
809            if let Some(regex) = &rule.compiled_regex {
810                println!("AWS rule regex compiled: yes");
811                let test_matches: Vec<_> = regex.find_iter(test_input).collect();
812                println!("Direct regex matches: {}", test_matches.len());
813                for mat in test_matches {
814                    println!("  Match: '{}'", mat.as_str());
815                }
816
817                // Test keyword filtering
818                let contains_keywords = contains_any_keyword(test_input, &rule.keywords);
819                println!("Contains keywords: {}", contains_keywords);
820
821                // Test capture groups
822                if let Some(captures) = regex.captures(test_input) {
823                    println!("Capture groups found: {}", captures.len());
824                    for (i, cap) in captures.iter().enumerate() {
825                        if let Some(cap) = cap {
826                            println!("  Capture {}: '{}'", i, cap.as_str());
827                        }
828                    }
829                } else {
830                    println!("No capture groups found");
831                }
832
833                // Test entropy if there are captures
834                for mat in regex.find_iter(test_input) {
835                    if let Some(captures) = regex.captures_at(test_input, mat.start())
836                        && let Some(capture) = captures.get(1)
837                    {
838                        let entropy = calculate_entropy(capture.as_str());
839                        println!(
840                            "  Entropy of first capture '{}': {:.2} (threshold: {:?})",
841                            capture.as_str(),
842                            entropy,
843                            rule.entropy
844                        );
845                    }
846                }
847            } else {
848                println!("AWS rule regex compiled: no");
849            }
850        }
851    }
852
853    #[test]
854    fn test_debug_privacy_mode_ip() {
855        let test_input = "SERVER_IP=8.8.8.8";
856
857        // Test with privacy mode
858        let secrets_privacy = detect_secrets(test_input, None, true);
859        println!("Privacy mode detected {} secrets", secrets_privacy.len());
860        for secret in &secrets_privacy {
861            println!(
862                "  Rule: {}, Value: '{}', Pos: {}-{}",
863                secret.rule_id, secret.value, secret.start_pos, secret.end_pos
864            );
865        }
866
867        // Check if privacy config loaded properly
868        let config_with_privacy = &*GITLEAKS_CONFIG_WITH_PRIVACY;
869        let ip_rule = config_with_privacy
870            .rules
871            .iter()
872            .find(|r| r.id == "public-ipv4");
873        println!("IP rule found: {}", ip_rule.is_some());
874        if let Some(rule) = ip_rule {
875            println!("IP rule keywords: {:?}", rule.keywords);
876            if let Some(regex) = &rule.compiled_regex {
877                println!("IP rule regex compiled: yes");
878                let test_matches: Vec<_> = regex.find_iter(test_input).collect();
879                println!("Direct regex matches: {}", test_matches.len());
880                for mat in test_matches {
881                    println!("  Match: '{}'", mat.as_str());
882                }
883
884                // Test keyword filtering
885                let contains_keywords = contains_any_keyword(test_input, &rule.keywords);
886                println!("Contains keywords: {}", contains_keywords);
887
888                // Test capture groups
889                if let Some(captures) = regex.captures(test_input) {
890                    println!("Capture groups found: {}", captures.len());
891                    for (i, cap) in captures.iter().enumerate() {
892                        if let Some(cap) = cap {
893                            println!("  Capture {}: '{}'", i, cap.as_str());
894                        }
895                    }
896                } else {
897                    println!("No capture groups found");
898                }
899            } else {
900                println!("IP rule regex compiled: no");
901            }
902        }
903    }
904
905    #[test]
906    fn test_comprehensive_ip_detection() {
907        println!("=== COMPREHENSIVE IP DETECTION TEST ===");
908
909        let test_cases = vec![
910            // Public IPs that should be detected
911            ("16.170.172.114", true),
912            ("8.8.8.8", true),
913            ("1.1.1.1", true),
914            ("203.0.113.195", true),
915            ("13.107.42.14", true),
916            // Private IPs that should NOT be detected
917            ("192.168.1.1", false),
918            ("10.0.0.1", false),
919            ("172.16.0.1", false),
920            ("127.0.0.1", false),
921            ("169.254.1.1", false),
922            ("0.0.0.0", false),
923            ("255.255.255.255", false),
924        ];
925
926        for (ip, should_detect) in test_cases {
927            let secrets = detect_secrets(ip, None, true);
928            let detected = secrets.iter().any(|s| s.rule_id == "public-ipv4");
929
930            println!(
931                "IP: {} | Should detect: {} | Detected: {}",
932                ip, should_detect, detected
933            );
934
935            if should_detect {
936                assert!(detected, "Should detect public IP: {}", ip);
937            } else {
938                assert!(!detected, "Should NOT detect private IP: {}", ip);
939            }
940        }
941
942        // Test IP in various contexts
943        let context_tests = vec![
944            "IP address: 16.170.172.114",
945            "Connect to 16.170.172.114",
946            "16.170.172.114:8080",
947            "ping 16.170.172.114",
948            "https://16.170.172.114/api",
949        ];
950
951        for context in context_tests {
952            let secrets = detect_secrets(context, None, true);
953            let detected = secrets.iter().any(|s| s.rule_id == "public-ipv4");
954            println!("Context: '{}' | Detected: {}", context, detected);
955            assert!(detected, "Should detect IP in context: {}", context);
956        }
957    }
958
959    #[test]
960    fn test_standalone_ip_detection() {
961        println!("=== TESTING STANDALONE IP DETECTION ===");
962
963        // Test standalone IP that should be detected
964        let standalone_ip = "16.170.172.114";
965        let secrets = detect_secrets(standalone_ip, None, true);
966
967        println!(
968            "Standalone IP '{}' detected {} secrets",
969            standalone_ip,
970            secrets.len()
971        );
972        for secret in &secrets {
973            println!("  Rule: {}, Value: '{}'", secret.rule_id, secret.value);
974        }
975
976        // Test IP with context that should be detected
977        let ip_with_context = "SERVER_IP=16.170.172.114";
978        let secrets_with_context = detect_secrets(ip_with_context, None, true);
979
980        println!(
981            "IP with context '{}' detected {} secrets",
982            ip_with_context,
983            secrets_with_context.len()
984        );
985        for secret in &secrets_with_context {
986            println!("  Rule: {}, Value: '{}'", secret.rule_id, secret.value);
987        }
988
989        // Test keyword filtering
990        let config = &*GITLEAKS_CONFIG_WITH_PRIVACY;
991        let ip_rule = config.rules.iter().find(|r| r.id == "public-ipv4");
992        if let Some(rule) = ip_rule {
993            println!("IP rule keywords: {:?}", rule.keywords);
994            println!(
995                "Standalone IP contains keywords: {}",
996                contains_any_keyword(standalone_ip, &rule.keywords)
997            );
998            println!(
999                "IP with context contains keywords: {}",
1000                contains_any_keyword(ip_with_context, &rule.keywords)
1001            );
1002        }
1003    }
1004
1005    #[test]
1006    fn test_user_provided_json_snippet() {
1007        println!("=== TESTING USER PROVIDED JSON SNIPPET ===");
1008
1009        let json_snippet = r#"{
1010    "UserId": "AIDAX5UI4H55WM6GS6NIJ",
1011    "Account": "544388841223",
1012    "Arn": "arn:aws:iam::544388841223:user/terraform-mac"
1013}"#;
1014
1015        let secrets = detect_secrets(json_snippet, None, true);
1016        let aws_secrets: Vec<_> = secrets
1017            .iter()
1018            .filter(|s| s.rule_id == "aws-account-id")
1019            .collect();
1020
1021        println!("Detected {} AWS account ID secrets", aws_secrets.len());
1022        for secret in &aws_secrets {
1023            println!(
1024                "  Value: '{}' at position {}-{}",
1025                secret.value, secret.start_pos, secret.end_pos
1026            );
1027        }
1028
1029        // Should detect the account ID in the "Account" field
1030        assert!(
1031            !aws_secrets.is_empty(),
1032            "Should detect at least one AWS account ID"
1033        );
1034        assert!(
1035            aws_secrets.iter().any(|s| s.value == "544388841223"),
1036            "Should detect account ID 544388841223"
1037        );
1038
1039        // The ARN might also contain a redacted reference but that's already handled
1040        println!("✅ JSON snippet test passed - Account field is now detected");
1041    }
1042
1043    #[test]
1044    fn test_aws_account_id_json_field() {
1045        println!("=== TESTING AWS ACCOUNT ID JSON FIELD DETECTION ===");
1046
1047        let test_cases = vec![
1048            // JSON field patterns that should be detected
1049            r#""Account": "544388841223""#,
1050            r#""AccountId": "544388841223""#,
1051            r#""account": "544388841223""#,
1052            r#""accountId": "544388841223""#,
1053            // Other patterns that should still work
1054            "AWS_ACCOUNT_ID=544388841223",
1055            "account.id=544388841223",
1056            "account_id: 544388841223",
1057            "arn:aws:iam::544388841223:user/test",
1058            "544388841223    arn:aws:iam::544388841223:user/terraform-mac    AIDAX5UI4H55WM6GS6NIJ",
1059        ];
1060
1061        for test_case in test_cases {
1062            let secrets = detect_secrets(test_case, None, true);
1063            let detected = secrets.iter().any(|s| s.rule_id == "aws-account-id");
1064
1065            println!("Test case: '{}' | Detected: {}", test_case, detected);
1066            assert!(detected, "Should detect AWS account ID in: {}", test_case);
1067
1068            // Check that the detected value is the expected account ID
1069            if let Some(secret) = secrets.iter().find(|s| s.rule_id == "aws-account-id") {
1070                assert_eq!(secret.value, "544388841223");
1071                println!("  -> Detected value: '{}'", secret.value);
1072            }
1073        }
1074    }
1075}