Skip to main content

stakpak_shared/secrets/
gitleaks.rs

1// Secret redaction implementation based on gitleaks (https://github.com/gitleaks/gitleaks)
2use regex::Regex;
3use serde::{Deserialize, Serialize};
4use std::sync::LazyLock;
5
6#[derive(Debug, Deserialize, Clone)]
7pub struct GitleaksConfig {
8    #[allow(dead_code)]
9    pub title: Option<String>,
10    pub allowlist: Option<Allowlist>,
11    pub rules: Vec<Rule>,
12}
13
14#[derive(Debug, Deserialize, Clone)]
15pub struct Allowlist {
16    #[allow(dead_code)]
17    pub description: Option<String>,
18    #[allow(dead_code)]
19    pub paths: Option<Vec<String>>,
20    pub regexes: Option<Vec<String>>,
21    pub stopwords: Option<Vec<String>>,
22    /// Pre-compiled regexes (not serialized)
23    #[serde(skip)]
24    pub compiled_regexes: Vec<Regex>,
25}
26
27#[derive(Debug, Deserialize, Clone)]
28pub struct Rule {
29    pub id: String,
30    #[allow(dead_code)]
31    pub description: String,
32    pub regex: Option<String>,
33    pub entropy: Option<f64>,
34    #[serde(default)]
35    pub keywords: Vec<String>,
36    #[allow(dead_code)]
37    pub path: Option<String>,
38    pub allowlists: Option<Vec<RuleAllowlist>>,
39    /// Pre-compiled regex (not serialized)
40    #[serde(skip)]
41    pub compiled_regex: Option<Regex>,
42}
43
44#[derive(Debug, Deserialize, Clone)]
45pub struct RuleAllowlist {
46    #[allow(dead_code)]
47    pub description: Option<String>,
48    pub condition: Option<String>, // "AND" or default "OR"
49    pub paths: Option<Vec<String>>,
50    pub regexes: Option<Vec<String>>,
51    pub stopwords: Option<Vec<String>>,
52    #[serde(rename = "regexTarget")]
53    pub regex_target: Option<String>, // "match", "line", etc.
54    /// Pre-compiled regexes (not serialized)
55    #[serde(skip)]
56    pub compiled_regexes: Vec<Regex>,
57}
58
59/// Represents a detected secret with its position and value
60#[derive(Debug, Clone)]
61pub struct DetectedSecret {
62    /// Detection rule id
63    pub rule_id: String,
64    /// The secret value
65    pub value: String,
66    /// Start position in the original string
67    pub start_pos: usize,
68    /// End position in the original string
69    pub end_pos: usize,
70}
71
72#[derive(Debug, Default, Serialize)]
73pub struct CompilationErrors {
74    pub regex_errors: Vec<(String, String)>, // (rule_id, error_message)
75    pub warnings: Vec<String>,
76}
77
78impl CompilationErrors {
79    pub fn add_regex_error(&mut self, rule_id: String, error: String) {
80        self.regex_errors.push((rule_id, error));
81    }
82
83    pub fn add_warning(&mut self, warning: String) {
84        self.warnings.push(warning);
85    }
86
87    #[allow(dead_code)]
88    pub fn is_empty(&self) -> bool {
89        self.regex_errors.is_empty() && self.warnings.is_empty()
90    }
91}
92
93/// Trait for compiling regex patterns in configuration structures
94pub trait RegexCompilable {
95    fn compile_regexes(&mut self) -> CompilationErrors;
96}
97
98impl RegexCompilable for Allowlist {
99    fn compile_regexes(&mut self) -> CompilationErrors {
100        let mut errors = CompilationErrors::default();
101        self.compiled_regexes.clear();
102
103        if let Some(regexes) = &self.regexes {
104            for pattern in regexes {
105                match Regex::new(pattern) {
106                    Ok(regex) => self.compiled_regexes.push(regex),
107                    Err(e) => errors.add_warning(format!(
108                        "Failed to compile allowlist regex '{}': {}",
109                        pattern, e
110                    )),
111                }
112            }
113        }
114
115        errors
116    }
117}
118
119impl RegexCompilable for RuleAllowlist {
120    fn compile_regexes(&mut self) -> CompilationErrors {
121        let mut errors = CompilationErrors::default();
122        self.compiled_regexes.clear();
123
124        if let Some(regexes) = &self.regexes {
125            for pattern in regexes {
126                match Regex::new(pattern) {
127                    Ok(regex) => self.compiled_regexes.push(regex),
128                    Err(e) => errors.add_warning(format!(
129                        "Failed to compile rule allowlist regex '{}': {}",
130                        pattern, e
131                    )),
132                }
133            }
134        }
135
136        errors
137    }
138}
139
140impl RegexCompilable for Rule {
141    fn compile_regexes(&mut self) -> CompilationErrors {
142        let mut errors = CompilationErrors::default();
143
144        // Compile main regex with fallback handling
145        if let Some(regex_pattern) = &self.regex {
146            match Regex::new(regex_pattern) {
147                Ok(regex) => self.compiled_regex = Some(regex),
148                Err(e) => {
149                    // Handle regex compilation errors with specific fallbacks
150                    match self.id.as_str() {
151                        "generic-api-key" | "pypi-upload-token" | "vault-batch-token" => {
152                            match create_simple_api_key_regex() {
153                                Ok(simple_regex) => {
154                                    self.compiled_regex = Some(simple_regex);
155                                    errors.add_warning(format!(
156                                        "Used fallback regex for rule '{}' due to: {}",
157                                        self.id, e
158                                    ));
159                                }
160                                Err(fallback_err) => {
161                                    errors.add_regex_error(
162                                        self.id.clone(),
163                                        format!(
164                                            "Failed to compile regex and fallback: {} / {}",
165                                            e, fallback_err
166                                        ),
167                                    );
168                                }
169                            }
170                        }
171                        _ => {
172                            errors.add_regex_error(self.id.clone(), e.to_string());
173                        }
174                    }
175                }
176            }
177        } else {
178            // Rule has no regex pattern (e.g., path-only rules like pkcs12-file)
179            // This is valid for certain types of rules, so no error
180            self.compiled_regex = None;
181        }
182
183        // Compile allowlist regexes
184        if let Some(allowlists) = &mut self.allowlists {
185            for allowlist in allowlists {
186                let allowlist_errors = allowlist.compile_regexes();
187                errors.warnings.extend(allowlist_errors.warnings);
188                errors.regex_errors.extend(allowlist_errors.regex_errors);
189            }
190        }
191
192        errors
193    }
194}
195
196impl RegexCompilable for GitleaksConfig {
197    fn compile_regexes(&mut self) -> CompilationErrors {
198        let mut errors = CompilationErrors::default();
199
200        // Compile global allowlist
201        if let Some(allowlist) = &mut self.allowlist {
202            let allowlist_errors = allowlist.compile_regexes();
203            errors.warnings.extend(allowlist_errors.warnings);
204            errors.regex_errors.extend(allowlist_errors.regex_errors);
205        }
206
207        // Compile rules (keeping only successfully compiled ones)
208        let mut compiled_rules = Vec::new();
209        for mut rule in self.rules.drain(..) {
210            let rule_errors = rule.compile_regexes();
211            errors.warnings.extend(rule_errors.warnings);
212            errors.regex_errors.extend(rule_errors.regex_errors);
213
214            // Keep rules that either compiled successfully or don't have regex patterns (e.g., path-only rules)
215            if rule.compiled_regex.is_some() || rule.regex.is_none() {
216                compiled_rules.push(rule);
217            }
218        }
219        self.rules = compiled_rules;
220
221        errors
222    }
223}
224
225/// Lazy-loaded gitleaks configuration
226pub static GITLEAKS_CONFIG: LazyLock<GitleaksConfig> =
227    LazyLock::new(|| create_gitleaks_config(false));
228
229/// Lazy-loaded gitleaks configuration with privacy rules
230pub static GITLEAKS_CONFIG_WITH_PRIVACY: LazyLock<GitleaksConfig> =
231    LazyLock::new(|| create_gitleaks_config(true));
232
233/// Creates a gitleaks configuration with optional privacy rules
234fn create_gitleaks_config(include_privacy_rules: bool) -> GitleaksConfig {
235    // Load main gitleaks configuration
236    let config_str = include_str!("gitleaks.toml");
237    let mut config: GitleaksConfig =
238        toml::from_str(config_str).expect("Failed to parse gitleaks.toml");
239
240    // Load additional rules configuration
241    let additional_config_str = include_str!("additional_rules.toml");
242    let additional_config: GitleaksConfig =
243        toml::from_str(additional_config_str).expect("Failed to parse additional_rules.toml");
244
245    // Merge additional rules into the main configuration
246    config.rules.extend(additional_config.rules);
247
248    // Merge additional allowlist if present
249    if let Some(additional_allowlist) = additional_config.allowlist {
250        merge_allowlist(&mut config.allowlist, additional_allowlist);
251    }
252
253    // Load privacy rules if enabled
254    if include_privacy_rules {
255        let privacy_config_str = include_str!("privacy_rules.toml");
256        let privacy_config: GitleaksConfig =
257            toml::from_str(privacy_config_str).expect("Failed to parse privacy_rules.toml");
258
259        // Merge privacy rules into the main configuration
260        config.rules.extend(privacy_config.rules);
261
262        // Merge privacy allowlist if present
263        if let Some(privacy_allowlist) = privacy_config.allowlist {
264            merge_allowlist(&mut config.allowlist, privacy_allowlist);
265        }
266    }
267
268    let compilation_errors = config.compile_regexes();
269    if !compilation_errors.regex_errors.is_empty() {
270        const ERROR_LOG_FILE: &str = ".stakpak_mcp_secret_detection_errors";
271        // Write errors to log file
272        if let Ok(json) = serde_json::to_string(&compilation_errors)
273            && let Err(e) = std::fs::write(ERROR_LOG_FILE, json)
274        {
275            eprintln!("Failed to write errors to log file: {}", e);
276        }
277    }
278    config
279}
280
281/// Helper function to merge allowlists
282fn merge_allowlist(target: &mut Option<Allowlist>, source: Allowlist) {
283    match target {
284        Some(existing_allowlist) => {
285            // Merge regexes
286            if let Some(additional_regexes) = source.regexes {
287                match &mut existing_allowlist.regexes {
288                    Some(existing_regexes) => existing_regexes.extend(additional_regexes),
289                    None => existing_allowlist.regexes = Some(additional_regexes),
290                }
291            }
292
293            // Merge stopwords
294            if let Some(additional_stopwords) = source.stopwords {
295                match &mut existing_allowlist.stopwords {
296                    Some(existing_stopwords) => existing_stopwords.extend(additional_stopwords),
297                    None => existing_allowlist.stopwords = Some(additional_stopwords),
298                }
299            }
300        }
301        None => *target = Some(source),
302    }
303}
304
305/// Creates a simplified API key regex that works within Rust's regex engine limits
306pub fn create_simple_api_key_regex() -> Result<Regex, regex::Error> {
307    // The original Gitleaks generic pattern is too complex for Rust's regex engine.
308    // We'll use a simpler but still effective pattern that captures the essence:
309    // 1. Optional prefix (identifier)
310    // 2. Keywords (access, auth, api, etc.)
311    // 3. Optional suffix
312    // 4. Assignment operators
313    // 5. Optional quotes/spaces
314    // 6. The actual secret value (captured)
315    // 7. Terminator
316
317    let pattern = r#"(?i)[\w.-]{0,30}?(?:access|auth|api|credential|creds|key|password|passwd|secret|token)[\w.-]{0,15}[\s'"]{0,3}(?:=|>|:{1,2}=|\|\||:|=>|\?=|,)[\s'"=]{0,3}([\w.=-]{10,80}|[a-z0-9][a-z0-9+/]{11,}={0,2})(?:[\s'";]|$)"#;
318    Regex::new(pattern)
319}
320
321/// Calculate Shannon entropy for a string
322///
323/// Entropy measures the randomness/unpredictability of characters in a string.
324/// Higher entropy suggests more randomness, which is characteristic of secrets.
325pub fn calculate_entropy(text: &str) -> f64 {
326    if text.is_empty() {
327        return 0.0;
328    }
329
330    let mut char_counts = std::collections::HashMap::new();
331    let total_chars = text.len() as f64;
332
333    // Count character frequencies
334    for ch in text.chars() {
335        *char_counts.entry(ch).or_insert(0u32) += 1;
336    }
337
338    // Calculate Shannon entropy: H = -Σ(p(x) * log2(p(x)))
339    let mut entropy = 0.0;
340    for &count in char_counts.values() {
341        let probability = count as f64 / total_chars;
342        if probability > 0.0 {
343            entropy -= probability * probability.log2();
344        }
345    }
346
347    entropy
348}
349
350/// Detects secrets in the input string using gitleaks configuration
351///
352/// This implementation follows the gitleaks methodology:
353/// 1. Apply regex rules to find potential secrets
354/// 2. Check entropy thresholds to filter out low-entropy matches
355/// 3. Apply allowlists to exclude known false positives
356/// 4. Check keywords to ensure relevance
357///
358/// When privacy_mode is enabled, also detects private data like IP addresses and AWS account IDs
359pub fn detect_secrets(input: &str, path: Option<&str>, privacy_mode: bool) -> Vec<DetectedSecret> {
360    let mut detected_secrets = Vec::new();
361    let config = if privacy_mode {
362        &*GITLEAKS_CONFIG_WITH_PRIVACY
363    } else {
364        &*GITLEAKS_CONFIG
365    };
366
367    // Apply each compiled rule from the configuration
368    for rule in &config.rules {
369        // Skip rules that don't have regex patterns (e.g., path-only rules)
370        let regex = match &rule.compiled_regex {
371            Some(regex) => regex,
372            None => continue,
373        };
374
375        // Pre-filter: Skip rule if none of its keywords are present in the input
376        if !rule.keywords.is_empty() && !contains_any_keyword(input, &rule.keywords) {
377            continue;
378        }
379
380        // Find all matches for this rule using the pre-compiled regex
381        for mat in regex.find_iter(input) {
382            let match_text = mat.as_str();
383            let start_pos = mat.start();
384            let end_pos = mat.end();
385
386            // Check if this match should be filtered out
387            if should_allow_match(
388                input,
389                path,
390                match_text,
391                start_pos,
392                end_pos,
393                rule,
394                &config.allowlist,
395            ) {
396                continue;
397            }
398
399            // Extract the captured secret value and its position
400            let (secret_value, secret_start, secret_end) =
401                if let Some(captures) = regex.captures_at(input, start_pos) {
402                    // Try to get the first capture group, fallback to full match
403                    if let Some(capture) = captures.get(1) {
404                        // Capture positions are already relative to the full input
405                        (capture.as_str().to_string(), capture.start(), capture.end())
406                    } else {
407                        (match_text.to_string(), start_pos, end_pos)
408                    }
409                } else {
410                    (match_text.to_string(), start_pos, end_pos)
411                };
412
413            // Check entropy if specified - apply to the captured secret value, not the full match
414            if let Some(entropy_threshold) = rule.entropy {
415                let calculated_entropy = calculate_entropy(&secret_value);
416                if calculated_entropy < entropy_threshold {
417                    continue;
418                }
419            }
420
421            detected_secrets.push(DetectedSecret {
422                rule_id: rule.id.clone(),
423                value: secret_value,
424                start_pos: secret_start,
425                end_pos: secret_end,
426            });
427        }
428    }
429
430    detected_secrets
431}
432
433/// Check if a match should be allowed (filtered out) based on allowlists
434pub fn should_allow_match(
435    input: &str,
436    path: Option<&str>,
437    match_text: &str,
438    start_pos: usize,
439    end_pos: usize,
440    rule: &Rule,
441    global_allowlist: &Option<Allowlist>,
442) -> bool {
443    // Check global allowlist first
444    if let Some(global) = global_allowlist
445        && is_allowed_by_allowlist(input, match_text, start_pos, end_pos, global)
446    {
447        return true;
448    }
449
450    // Check rule-specific allowlists
451    if let Some(rule_allowlists) = &rule.allowlists {
452        for allowlist in rule_allowlists {
453            if is_allowed_by_rule_allowlist(input, path, match_text, start_pos, end_pos, allowlist)
454            {
455                return true;
456            }
457        }
458    }
459
460    false
461}
462
463fn is_allowed_by_allowlist(
464    _input: &str,
465    match_text: &str,
466    _start_pos: usize,
467    _end_pos: usize,
468    allowlist: &Allowlist,
469) -> bool {
470    // Check regex patterns
471    for regex in &allowlist.compiled_regexes {
472        if regex.is_match(match_text) {
473            return true;
474        }
475    }
476
477    // Check stopwords
478    if let Some(stopwords) = &allowlist.stopwords {
479        for stopword in stopwords {
480            if match_text.to_lowercase().contains(&stopword.to_lowercase()) {
481                return true;
482            }
483        }
484    }
485
486    false
487}
488
489/// All internal indices from rfind/find of ASCII chars ('\n', '=') on same string.
490/// start_pos/end_pos are validated as char boundaries before use.
491#[allow(clippy::string_slice)]
492pub fn is_allowed_by_rule_allowlist(
493    input: &str,
494    path: Option<&str>,
495    match_text: &str,
496    start_pos: usize,
497    end_pos: usize,
498    allowlist: &RuleAllowlist,
499) -> bool {
500    let mut checks = Vec::new();
501
502    // Validate caller-provided indices are valid char boundaries
503    if start_pos > input.len()
504        || end_pos > input.len()
505        || !input.is_char_boundary(start_pos)
506        || !input.is_char_boundary(end_pos)
507    {
508        return false;
509    }
510
511    // Determine the target text based on regex_target
512    let target_text = match allowlist.regex_target.as_deref() {
513        Some("match") => match_text,
514        Some("line") => {
515            // Extract the line containing the match
516            let line_start = input[..start_pos].rfind('\n').map(|i| i + 1).unwrap_or(0);
517            let line_end = input[end_pos..]
518                .find('\n')
519                .map(|i| end_pos + i)
520                .unwrap_or(input.len());
521            &input[line_start..line_end]
522        }
523        _ => match_text, // Default to match
524    };
525
526    // Check regex patterns using pre-compiled regexes
527    if !allowlist.compiled_regexes.is_empty() {
528        let regex_matches = allowlist
529            .compiled_regexes
530            .iter()
531            .any(|regex| regex.is_match(target_text));
532        checks.push(regex_matches);
533    }
534
535    // Check stopwords with configuration-aware logic
536    if let Some(stopwords) = &allowlist.stopwords {
537        let stopword_matches = stopwords.iter().any(|stopword| {
538            // For configuration-style patterns (KEY=VALUE), be more permissive
539            if let Some(equals_pos) = target_text.find('=') {
540                let value = &target_text[equals_pos + 1..];
541
542                // Only filter if the value itself is obviously a placeholder/test value
543                // Check if the entire value is just the stopword or a simple variation
544                let value_lower = value.to_lowercase();
545                let stopword_lower = stopword.to_lowercase();
546
547                // Filter only if:
548                // 1. The value is exactly the stopword (e.g., "password")
549                // 2. The value is a simple variation like "password123" or "secretkey"
550                // 3. The value contains the stopword and is very short/simple
551
552                if value_lower == stopword_lower {
553                    true // Exact match: PASSWORD=password
554                } else if value.len() < 15 && value_lower.contains(&stopword_lower) {
555                    // Short values containing stopwords: PASSWORD=password123
556                    let without_stopword = value_lower.replace(&stopword_lower, "");
557                    // If removing the stopword leaves only numbers/simple chars, it's likely a test value
558                    without_stopword
559                        .chars()
560                        .all(|c| c.is_ascii_digit() || "!@#$%^&*()_+-=[]{}|;:,.<>?".contains(c))
561                } else {
562                    false // Don't filter longer/complex values
563                }
564            } else {
565                // For non-KEY=VALUE patterns, use original logic but be more restrictive
566                // Only filter on very obvious stopwords
567                let obvious_false_positives = ["example", "test", "demo", "sample", "placeholder"];
568                if obvious_false_positives.contains(&stopword.as_str()) {
569                    target_text
570                        .to_lowercase()
571                        .contains(&stopword.to_lowercase())
572                } else {
573                    false
574                }
575            }
576        });
577        checks.push(stopword_matches);
578    }
579
580    // Check paths
581    if let Some(paths) = &allowlist.paths
582        && let Some(path) = path
583    {
584        checks.push(paths.iter().any(|p| path.contains(p)));
585    }
586
587    // If no checks were added, this allowlist doesn't apply
588    if checks.is_empty() {
589        return false;
590    }
591
592    // Apply condition logic (AND vs OR)
593    match allowlist.condition.as_deref() {
594        Some("AND") => checks.iter().all(|&check| check),
595        _ => checks.iter().any(|&check| check), // Default to OR
596    }
597}
598
599/// Helper function to check if input contains any of the rule keywords
600pub fn contains_any_keyword(input: &str, keywords: &[String]) -> bool {
601    let input_lower = input.to_lowercase();
602    keywords
603        .iter()
604        .any(|keyword| input_lower.contains(&keyword.to_lowercase()))
605}
606
607/// Forces initialization of the gitleaks configuration
608///
609/// This function should be called during application startup to preload and compile
610/// the gitleaks rules, avoiding delays on the first call to detect_secrets.
611///
612/// When privacy_mode is enabled, also loads privacy rules for detecting IP addresses and AWS account IDs
613///
614/// Returns the number of successfully compiled rules.
615pub fn initialize_gitleaks_config(privacy_mode: bool) -> usize {
616    // Force evaluation of the lazy static
617    let config = if privacy_mode {
618        &*GITLEAKS_CONFIG_WITH_PRIVACY
619    } else {
620        &*GITLEAKS_CONFIG
621    };
622    config.rules.len()
623}
624
625#[cfg(test)]
626mod tests {
627    use super::*;
628
629    #[test]
630    fn test_entropy_calculation() {
631        // Test high entropy (random-like) string
632        let high_entropy = calculate_entropy("Kx9mP2nQ8rT4vW7yZ3cF6hJ1lN5sA");
633
634        // Test low entropy (repetitive) string
635        let low_entropy = calculate_entropy("aaaaaaaaaa");
636
637        // Test empty string
638        let zero_entropy = calculate_entropy("");
639
640        assert!(high_entropy > low_entropy);
641        assert_eq!(zero_entropy, 0.0);
642
643        println!("High entropy: {:.2}", high_entropy);
644        println!("Low entropy: {:.2}", low_entropy);
645        println!("Zero entropy: {:.2}", zero_entropy);
646    }
647
648    #[test]
649    fn test_additional_rules_loaded() {
650        let config = &*GITLEAKS_CONFIG;
651
652        // Check that the Anthropic API key rule from additional_rules.toml is loaded
653        let anthropic_rule = config.rules.iter().find(|r| r.id == "anthropic-api-key");
654        assert!(
655            anthropic_rule.is_some(),
656            "Anthropic API key rule should be loaded from additional_rules.toml"
657        );
658
659        if let Some(rule) = anthropic_rule {
660            assert!(rule.keywords.contains(&"anthropic".to_string()));
661            assert!(
662                rule.compiled_regex.is_some(),
663                "Anthropic rule regex should be compiled"
664            );
665        }
666
667        println!("Total rules loaded: {}", config.rules.len());
668    }
669
670    #[test]
671    fn test_anthropic_api_key_detection() {
672        // Use a more realistic API key that doesn't contain alphabet sequences
673        let test_input =
674            "ANTHROPIC_API_KEY=sk-ant-api03-Kx9mP2nQ8rT4vW7yZ3cF6hJ1lN5sA9bD2eG5kM8pR1tX4zB7";
675        let secrets = detect_secrets(test_input, None, false);
676
677        // Should detect the Anthropic API key
678        let anthropic_secret = secrets.iter().find(|s| s.rule_id == "anthropic-api-key");
679        assert!(
680            anthropic_secret.is_some(),
681            "Should detect Anthropic API key"
682        );
683
684        if let Some(secret) = anthropic_secret {
685            assert!(secret.value.starts_with("sk-ant-api03-"));
686        }
687    }
688
689    #[test]
690    fn test_privacy_mode_aws_account_id() {
691        let test_input = "AWS_ACCOUNT_ID=987654321098";
692
693        // Should not detect AWS account ID in regular mode
694        let secrets = detect_secrets(test_input, None, false);
695        assert!(!secrets.iter().any(|s| s.rule_id == "aws-account-id"));
696
697        // Should detect AWS account ID in privacy mode
698        let secrets_privacy = detect_secrets(test_input, None, true);
699        let aws_secret = secrets_privacy
700            .iter()
701            .find(|s| s.rule_id == "aws-account-id");
702        assert!(
703            aws_secret.is_some(),
704            "Should detect AWS account ID in privacy mode"
705        );
706
707        if let Some(secret) = aws_secret {
708            assert_eq!(secret.value, "987654321098");
709        }
710    }
711
712    #[test]
713    fn test_privacy_mode_public_ip() {
714        let test_input = "SERVER_IP=203.0.113.195";
715
716        // Should not detect public IP in regular mode
717        let secrets = detect_secrets(test_input, None, false);
718        assert!(!secrets.iter().any(|s| s.rule_id == "public-ipv4"));
719
720        // Should detect public IP in privacy mode
721        let secrets_privacy = detect_secrets(test_input, None, true);
722        let ip_secret = secrets_privacy.iter().find(|s| s.rule_id == "public-ipv4");
723        assert!(
724            ip_secret.is_some(),
725            "Should detect public IP in privacy mode"
726        );
727
728        if let Some(secret) = ip_secret {
729            assert_eq!(secret.value, "203.0.113.195");
730        }
731    }
732
733    #[test]
734    fn test_privacy_mode_private_ip_excluded() {
735        let test_input = "LOCAL_IP=192.168.1.1";
736
737        // Should not detect private IP even in privacy mode
738        let secrets_privacy = detect_secrets(test_input, None, true);
739        assert!(!secrets_privacy.iter().any(|s| s.rule_id == "public-ipv4"));
740    }
741
742    #[test]
743    fn test_privacy_mode_aws_arn() {
744        let test_input = "ARN=arn:aws:s3:::my-bucket/object";
745
746        // Should not detect AWS account ID in regular mode
747        let secrets = detect_secrets(test_input, None, false);
748        assert!(!secrets.iter().any(|s| s.rule_id == "aws-account-id"));
749
750        // Should detect AWS account ID in ARN in privacy mode
751        let secrets_privacy = detect_secrets(test_input, None, true);
752        // This specific ARN doesn't contain an account ID, so it shouldn't be detected
753        assert!(
754            !secrets_privacy
755                .iter()
756                .any(|s| s.rule_id == "aws-account-id")
757        );
758
759        // Test with an ARN that contains an account ID
760        let test_input_with_account = "ARN=arn:aws:iam::987654321098:role/MyRole";
761        let secrets_with_account = detect_secrets(test_input_with_account, None, true);
762        let aws_secret = secrets_with_account
763            .iter()
764            .find(|s| s.rule_id == "aws-account-id");
765        assert!(
766            aws_secret.is_some(),
767            "Should detect AWS account ID in ARN in privacy mode"
768        );
769
770        if let Some(secret) = aws_secret {
771            assert_eq!(secret.value, "987654321098");
772        }
773    }
774
775    #[test]
776    fn test_privacy_mode_initialization() {
777        // Test that privacy mode initialization works
778        let regular_count = initialize_gitleaks_config(false);
779        let privacy_count = initialize_gitleaks_config(true);
780
781        // Privacy mode should have more rules
782        assert!(
783            privacy_count > regular_count,
784            "Privacy mode should have more rules than regular mode"
785        );
786    }
787
788    #[test]
789    fn test_debug_privacy_mode_aws() {
790        let test_input = "AWS_ACCOUNT_ID=987654321098"; // Different from allowlist
791
792        // Test with privacy mode
793        let secrets_privacy = detect_secrets(test_input, None, true);
794        println!("Privacy mode detected {} secrets", secrets_privacy.len());
795        for secret in &secrets_privacy {
796            println!(
797                "  Rule: {}, Value: '{}', Pos: {}-{}",
798                secret.rule_id, secret.value, secret.start_pos, secret.end_pos
799            );
800        }
801
802        // Test without privacy mode
803        let secrets_regular = detect_secrets(test_input, None, false);
804        println!("Regular mode detected {} secrets", secrets_regular.len());
805        for secret in &secrets_regular {
806            println!(
807                "  Rule: {}, Value: '{}', Pos: {}-{}",
808                secret.rule_id, secret.value, secret.start_pos, secret.end_pos
809            );
810        }
811
812        // Check if privacy config loaded properly
813        let config_with_privacy = &*GITLEAKS_CONFIG_WITH_PRIVACY;
814        let aws_rule = config_with_privacy
815            .rules
816            .iter()
817            .find(|r| r.id == "aws-account-id");
818        println!("AWS rule found: {}", aws_rule.is_some());
819        if let Some(rule) = aws_rule {
820            println!("AWS rule keywords: {:?}", rule.keywords);
821            if let Some(regex) = &rule.compiled_regex {
822                println!("AWS rule regex compiled: yes");
823                let test_matches: Vec<_> = regex.find_iter(test_input).collect();
824                println!("Direct regex matches: {}", test_matches.len());
825                for mat in test_matches {
826                    println!("  Match: '{}'", mat.as_str());
827                }
828
829                // Test keyword filtering
830                let contains_keywords = contains_any_keyword(test_input, &rule.keywords);
831                println!("Contains keywords: {}", contains_keywords);
832
833                // Test capture groups
834                if let Some(captures) = regex.captures(test_input) {
835                    println!("Capture groups found: {}", captures.len());
836                    for (i, cap) in captures.iter().enumerate() {
837                        if let Some(cap) = cap {
838                            println!("  Capture {}: '{}'", i, cap.as_str());
839                        }
840                    }
841                } else {
842                    println!("No capture groups found");
843                }
844
845                // Test entropy if there are captures
846                for mat in regex.find_iter(test_input) {
847                    if let Some(captures) = regex.captures_at(test_input, mat.start())
848                        && let Some(capture) = captures.get(1)
849                    {
850                        let entropy = calculate_entropy(capture.as_str());
851                        println!(
852                            "  Entropy of first capture '{}': {:.2} (threshold: {:?})",
853                            capture.as_str(),
854                            entropy,
855                            rule.entropy
856                        );
857                    }
858                }
859            } else {
860                println!("AWS rule regex compiled: no");
861            }
862        }
863    }
864
865    #[test]
866    fn test_debug_privacy_mode_ip() {
867        let test_input = "SERVER_IP=8.8.8.8";
868
869        // Test with privacy mode
870        let secrets_privacy = detect_secrets(test_input, None, true);
871        println!("Privacy mode detected {} secrets", secrets_privacy.len());
872        for secret in &secrets_privacy {
873            println!(
874                "  Rule: {}, Value: '{}', Pos: {}-{}",
875                secret.rule_id, secret.value, secret.start_pos, secret.end_pos
876            );
877        }
878
879        // Check if privacy config loaded properly
880        let config_with_privacy = &*GITLEAKS_CONFIG_WITH_PRIVACY;
881        let ip_rule = config_with_privacy
882            .rules
883            .iter()
884            .find(|r| r.id == "public-ipv4");
885        println!("IP rule found: {}", ip_rule.is_some());
886        if let Some(rule) = ip_rule {
887            println!("IP rule keywords: {:?}", rule.keywords);
888            if let Some(regex) = &rule.compiled_regex {
889                println!("IP rule regex compiled: yes");
890                let test_matches: Vec<_> = regex.find_iter(test_input).collect();
891                println!("Direct regex matches: {}", test_matches.len());
892                for mat in test_matches {
893                    println!("  Match: '{}'", mat.as_str());
894                }
895
896                // Test keyword filtering
897                let contains_keywords = contains_any_keyword(test_input, &rule.keywords);
898                println!("Contains keywords: {}", contains_keywords);
899
900                // Test capture groups
901                if let Some(captures) = regex.captures(test_input) {
902                    println!("Capture groups found: {}", captures.len());
903                    for (i, cap) in captures.iter().enumerate() {
904                        if let Some(cap) = cap {
905                            println!("  Capture {}: '{}'", i, cap.as_str());
906                        }
907                    }
908                } else {
909                    println!("No capture groups found");
910                }
911            } else {
912                println!("IP rule regex compiled: no");
913            }
914        }
915    }
916
917    #[test]
918    fn test_comprehensive_ip_detection() {
919        println!("=== COMPREHENSIVE IP DETECTION TEST ===");
920
921        let test_cases = vec![
922            // Public IPs that should be detected
923            ("16.170.172.114", true),
924            ("8.8.8.8", true),
925            ("1.1.1.1", true),
926            ("203.0.113.195", true),
927            ("13.107.42.14", true),
928            // Private IPs that should NOT be detected
929            ("192.168.1.1", false),
930            ("10.0.0.1", false),
931            ("172.16.0.1", false),
932            ("127.0.0.1", false),
933            ("169.254.1.1", false),
934            ("0.0.0.0", false),
935            ("255.255.255.255", false),
936        ];
937
938        for (ip, should_detect) in test_cases {
939            let secrets = detect_secrets(ip, None, true);
940            let detected = secrets.iter().any(|s| s.rule_id == "public-ipv4");
941
942            println!(
943                "IP: {} | Should detect: {} | Detected: {}",
944                ip, should_detect, detected
945            );
946
947            if should_detect {
948                assert!(detected, "Should detect public IP: {}", ip);
949            } else {
950                assert!(!detected, "Should NOT detect private IP: {}", ip);
951            }
952        }
953
954        // Test IP in various contexts
955        let context_tests = vec![
956            "IP address: 16.170.172.114",
957            "Connect to 16.170.172.114",
958            "16.170.172.114:8080",
959            "ping 16.170.172.114",
960            "https://16.170.172.114/api",
961        ];
962
963        for context in context_tests {
964            let secrets = detect_secrets(context, None, true);
965            let detected = secrets.iter().any(|s| s.rule_id == "public-ipv4");
966            println!("Context: '{}' | Detected: {}", context, detected);
967            assert!(detected, "Should detect IP in context: {}", context);
968        }
969    }
970
971    #[test]
972    fn test_standalone_ip_detection() {
973        println!("=== TESTING STANDALONE IP DETECTION ===");
974
975        // Test standalone IP that should be detected
976        let standalone_ip = "16.170.172.114";
977        let secrets = detect_secrets(standalone_ip, None, true);
978
979        println!(
980            "Standalone IP '{}' detected {} secrets",
981            standalone_ip,
982            secrets.len()
983        );
984        for secret in &secrets {
985            println!("  Rule: {}, Value: '{}'", secret.rule_id, secret.value);
986        }
987
988        // Test IP with context that should be detected
989        let ip_with_context = "SERVER_IP=16.170.172.114";
990        let secrets_with_context = detect_secrets(ip_with_context, None, true);
991
992        println!(
993            "IP with context '{}' detected {} secrets",
994            ip_with_context,
995            secrets_with_context.len()
996        );
997        for secret in &secrets_with_context {
998            println!("  Rule: {}, Value: '{}'", secret.rule_id, secret.value);
999        }
1000
1001        // Test keyword filtering
1002        let config = &*GITLEAKS_CONFIG_WITH_PRIVACY;
1003        let ip_rule = config.rules.iter().find(|r| r.id == "public-ipv4");
1004        if let Some(rule) = ip_rule {
1005            println!("IP rule keywords: {:?}", rule.keywords);
1006            println!(
1007                "Standalone IP contains keywords: {}",
1008                contains_any_keyword(standalone_ip, &rule.keywords)
1009            );
1010            println!(
1011                "IP with context contains keywords: {}",
1012                contains_any_keyword(ip_with_context, &rule.keywords)
1013            );
1014        }
1015    }
1016
1017    #[test]
1018    fn test_user_provided_json_snippet() {
1019        println!("=== TESTING USER PROVIDED JSON SNIPPET ===");
1020
1021        let json_snippet = r#"{
1022    "UserId": "AIDAX5UI4H55WM6GS6NIJ",
1023    "Account": "544388841223",
1024    "Arn": "arn:aws:iam::544388841223:user/terraform-mac"
1025}"#;
1026
1027        let secrets = detect_secrets(json_snippet, None, true);
1028        let aws_secrets: Vec<_> = secrets
1029            .iter()
1030            .filter(|s| s.rule_id == "aws-account-id")
1031            .collect();
1032
1033        println!("Detected {} AWS account ID secrets", aws_secrets.len());
1034        for secret in &aws_secrets {
1035            println!(
1036                "  Value: '{}' at position {}-{}",
1037                secret.value, secret.start_pos, secret.end_pos
1038            );
1039        }
1040
1041        // Should detect the account ID in the "Account" field
1042        assert!(
1043            !aws_secrets.is_empty(),
1044            "Should detect at least one AWS account ID"
1045        );
1046        assert!(
1047            aws_secrets.iter().any(|s| s.value == "544388841223"),
1048            "Should detect account ID 544388841223"
1049        );
1050
1051        // The ARN might also contain a redacted reference but that's already handled
1052        println!("✅ JSON snippet test passed - Account field is now detected");
1053    }
1054
1055    #[test]
1056    fn test_aws_account_id_json_field() {
1057        println!("=== TESTING AWS ACCOUNT ID JSON FIELD DETECTION ===");
1058
1059        let test_cases = vec![
1060            // JSON field patterns that should be detected
1061            r#""Account": "544388841223""#,
1062            r#""AccountId": "544388841223""#,
1063            r#""account": "544388841223""#,
1064            r#""accountId": "544388841223""#,
1065            // Other patterns that should still work
1066            "AWS_ACCOUNT_ID=544388841223",
1067            "account.id=544388841223",
1068            "account_id: 544388841223",
1069            "arn:aws:iam::544388841223:user/test",
1070            "544388841223    arn:aws:iam::544388841223:user/terraform-mac    AIDAX5UI4H55WM6GS6NIJ",
1071        ];
1072
1073        for test_case in test_cases {
1074            let secrets = detect_secrets(test_case, None, true);
1075            let detected = secrets.iter().any(|s| s.rule_id == "aws-account-id");
1076
1077            println!("Test case: '{}' | Detected: {}", test_case, detected);
1078            assert!(detected, "Should detect AWS account ID in: {}", test_case);
1079
1080            // Check that the detected value is the expected account ID
1081            if let Some(secret) = secrets.iter().find(|s| s.rule_id == "aws-account-id") {
1082                assert_eq!(secret.value, "544388841223");
1083                println!("  -> Detected value: '{}'", secret.value);
1084            }
1085        }
1086    }
1087}