Skip to main content

stakpak_shared/secrets/
mod.rs

1pub mod gitleaks;
2use crate::helper::generate_simple_id;
3/// Re-export the gitleaks initialization function for external access
4pub use gitleaks::initialize_gitleaks_config;
5use gitleaks::{DetectedSecret, detect_secrets};
6use regex::Regex;
7use std::collections::HashMap;
8use std::fmt;
9use std::sync::LazyLock;
10
11static REDACTED_SECRET_MARKER_RE: LazyLock<Regex> =
12    LazyLock::new(
13        || match Regex::new(r"\[REDACTED_SECRET:[^:\]]+:[^:\]]+\]") {
14            Ok(regex) => regex,
15            Err(error) => panic!("invalid redacted-secret marker regex: {error}"),
16        },
17    );
18
19fn find_protected_spans(content: &str) -> Vec<(usize, usize)> {
20    REDACTED_SECRET_MARKER_RE
21        .find_iter(content)
22        .map(|marker_match| (marker_match.start(), marker_match.end()))
23        .collect()
24}
25
26fn overlaps_protected_span(start: usize, end: usize, protected_spans: &[(usize, usize)]) -> bool {
27    protected_spans
28        .iter()
29        .any(|(protected_start, protected_end)| start < *protected_end && end > *protected_start)
30}
31
32/// A result containing both the redacted string and the mapping of redaction keys to original secrets
33#[derive(Debug, Clone)]
34pub struct RedactionResult {
35    /// The input string with secrets replaced by redaction keys
36    pub redacted_string: String,
37    /// Mapping from redaction key to the original secret value
38    pub redaction_map: HashMap<String, String>,
39}
40
41impl RedactionResult {
42    pub fn new(redacted_string: String, redaction_map: HashMap<String, String>) -> Self {
43        Self {
44            redacted_string,
45            redaction_map,
46        }
47    }
48}
49
50impl fmt::Display for RedactionResult {
51    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
52        write!(f, "{}", self.redacted_string)
53    }
54}
55
56/// Redacts secrets from the input string and returns both the redacted string and redaction mapping
57///
58/// When privacy_mode is enabled, also detects and redacts private data like IP addresses and AWS account IDs
59pub fn redact_secrets(
60    content: &str,
61    path: Option<&str>,
62    old_redaction_map: &HashMap<String, String>,
63    privacy_mode: bool,
64) -> RedactionResult {
65    let protected_spans = find_protected_spans(content);
66    let mut secrets = detect_secrets(content, path, privacy_mode)
67        .into_iter()
68        .filter(|secret| {
69            !overlaps_protected_span(secret.start_pos, secret.end_pos, &protected_spans)
70        })
71        .collect::<Vec<_>>();
72
73    let mut redaction_map = old_redaction_map.clone();
74    let mut reverse_redaction_map: HashMap<String, String> = old_redaction_map
75        .clone()
76        .into_iter()
77        .map(|(key, value)| (value, key))
78        .collect();
79
80    for (original_secret, redaction_key) in &reverse_redaction_map {
81        if original_secret.is_empty() {
82            continue;
83        }
84
85        let key_parts = redaction_key.split(':').collect::<Vec<&str>>();
86        if key_parts.len() != 3 {
87            continue;
88        }
89
90        let rule_id = key_parts[1].to_string();
91        for (start_pos, _) in content.match_indices(original_secret) {
92            let end_pos = start_pos + original_secret.len();
93            if overlaps_protected_span(start_pos, end_pos, &protected_spans) {
94                continue;
95            }
96
97            secrets.push(DetectedSecret {
98                rule_id: rule_id.clone(),
99                value: original_secret.clone(),
100                start_pos,
101                end_pos,
102            });
103        }
104    }
105
106    if secrets.is_empty() {
107        return RedactionResult::new(content.to_string(), HashMap::new());
108    }
109
110    let mut redacted_string = content.to_string();
111
112    // Deduplicate overlapping secrets - keep the longest one
113    let mut deduplicated_secrets: Vec<DetectedSecret> = Vec::new();
114    let mut sorted_by_start = secrets;
115    sorted_by_start.sort_by(|left, right| left.start_pos.cmp(&right.start_pos));
116
117    for secret in sorted_by_start {
118        let mut should_add = true;
119        let mut to_remove = Vec::new();
120
121        for (index, existing) in deduplicated_secrets.iter().enumerate() {
122            let overlaps =
123                secret.start_pos < existing.end_pos && secret.end_pos > existing.start_pos;
124
125            if overlaps {
126                if secret.value.len() > existing.value.len() {
127                    to_remove.push(index);
128                } else {
129                    should_add = false;
130                    break;
131                }
132            }
133        }
134
135        for &index in to_remove.iter().rev() {
136            deduplicated_secrets.remove(index);
137        }
138
139        if should_add {
140            deduplicated_secrets.push(secret);
141        }
142    }
143
144    deduplicated_secrets.sort_by(|left, right| right.start_pos.cmp(&left.start_pos));
145
146    for secret in deduplicated_secrets {
147        if !content.is_char_boundary(secret.start_pos) || !content.is_char_boundary(secret.end_pos)
148        {
149            continue;
150        }
151
152        if secret.start_pos >= redacted_string.len() || secret.end_pos > redacted_string.len() {
153            continue;
154        }
155
156        let redaction_key = if let Some(existing_key) = reverse_redaction_map.get(&secret.value) {
157            existing_key.clone()
158        } else {
159            let key = generate_redaction_key(&secret.rule_id);
160            redaction_map.insert(key.clone(), secret.value.clone());
161            reverse_redaction_map.insert(secret.value, key.clone());
162            key
163        };
164
165        redacted_string.replace_range(secret.start_pos..secret.end_pos, &redaction_key);
166    }
167
168    RedactionResult::new(redacted_string, redaction_map)
169}
170
171/// Restores secrets in a redacted string using the provided redaction map
172pub fn restore_secrets(redacted_string: &str, redaction_map: &HashMap<String, String>) -> String {
173    let mut restored = String::with_capacity(redacted_string.len());
174    let mut cursor = 0;
175
176    for marker_match in REDACTED_SECRET_MARKER_RE.find_iter(redacted_string) {
177        let Some(prefix) = redacted_string.get(cursor..marker_match.start()) else {
178            return redacted_string.to_string();
179        };
180        restored.push_str(prefix);
181
182        let marker = marker_match.as_str();
183        if let Some(original_value) = redaction_map.get(marker) {
184            restored.push_str(original_value);
185        } else {
186            restored.push_str(marker);
187        }
188
189        cursor = marker_match.end();
190    }
191
192    let Some(suffix) = redacted_string.get(cursor..) else {
193        return redacted_string.to_string();
194    };
195    restored.push_str(suffix);
196    restored
197}
198
199/// Redacts a specific password value from the content without running secret detection
200pub fn redact_password(
201    content: &str,
202    password: &str,
203    old_redaction_map: &HashMap<String, String>,
204) -> RedactionResult {
205    if password.is_empty() {
206        return RedactionResult::new(content.to_string(), HashMap::new());
207    }
208
209    let protected_spans = find_protected_spans(content);
210    let occurrences = content
211        .match_indices(password)
212        .map(|(start_pos, _)| (start_pos, start_pos + password.len()))
213        .filter(|(start_pos, end_pos)| {
214            !overlaps_protected_span(*start_pos, *end_pos, &protected_spans)
215        })
216        .collect::<Vec<_>>();
217
218    if occurrences.is_empty() && !protected_spans.is_empty() {
219        return RedactionResult::new(content.to_string(), HashMap::new());
220    }
221
222    let mut redacted_string = content.to_string();
223    let mut redaction_map = old_redaction_map.clone();
224    let mut reverse_redaction_map: HashMap<String, String> = old_redaction_map
225        .clone()
226        .into_iter()
227        .map(|(key, value)| (value, key))
228        .collect();
229
230    let redaction_key = if let Some(existing_key) = reverse_redaction_map.get(password) {
231        existing_key.clone()
232    } else {
233        let key = generate_redaction_key("password");
234        redaction_map.insert(key.clone(), password.to_string());
235        reverse_redaction_map.insert(password.to_string(), key.clone());
236        key
237    };
238
239    for (start_pos, end_pos) in occurrences.iter().rev().copied() {
240        redacted_string.replace_range(start_pos..end_pos, &redaction_key);
241    }
242
243    RedactionResult::new(redacted_string, redaction_map)
244}
245
246/// Generates a random redaction key
247fn generate_redaction_key(rule_id: &str) -> String {
248    let id = generate_simple_id(6);
249    format!("[REDACTED_SECRET:{rule_id}:{id}]")
250}
251
252#[cfg(test)]
253mod tests {
254    use regex::Regex;
255
256    use crate::secrets::gitleaks::{
257        GITLEAKS_CONFIG, calculate_entropy, contains_any_keyword, create_simple_api_key_regex,
258        is_allowed_by_rule_allowlist, should_allow_match,
259    };
260
261    use super::*;
262
263    fn fake_aws_access_key() -> String {
264        ["AKIA", "IOSFODNN7EX23PLE"].concat()
265    }
266
267    fn fake_aws_access_key_alt() -> String {
268        ["AKIA", "IOSFODNN7REALKEY"].concat()
269    }
270
271    fn fake_aws_access_key_example() -> String {
272        ["AKIA", "IOSFODNN7EXAMPLE"].concat()
273    }
274
275    fn fake_github_token() -> String {
276        ["ghp", "_1234567890abcdef", "1234567890abcdef", "12345678"].concat()
277    }
278
279    fn fake_github_token_short() -> String {
280        ["ghp", "_1234567890abcdef"].concat()
281    }
282
283    fn fake_api_key_long() -> String {
284        ["abc123def456", "ghi789jkl012", "mno345pqr678"].concat()
285    }
286
287    fn fake_api_key() -> String {
288        ["abc123def456", "ghi789jklmnop"].concat()
289    }
290
291    fn fake_secret_token() -> String {
292        ["Kx9mP2nQ8rT4", "vW7yZ3cF6hJ1", "lN5sA0bD8eF"].concat()
293    }
294
295    fn fake_secret_token_long() -> String {
296        ["Kx9mP2nQ8rT4", "vW7yZ3cF6hJ1", "lN5sA0bD8eF2gH5jK"].concat()
297    }
298
299    fn fake_password_secret() -> String {
300        ["super", "secret", "password", "123456"].concat()
301    }
302
303    #[test]
304    fn test_redaction_key_generation() {
305        let key1 = generate_redaction_key("test");
306        let key2 = generate_redaction_key("my-rule");
307
308        // Keys should be different
309        assert_ne!(key1, key2);
310
311        // Keys should follow the expected format
312        assert!(key1.starts_with("[REDACTED_SECRET:test:"));
313        assert!(key1.ends_with("]"));
314        assert!(key2.starts_with("[REDACTED_SECRET:my-rule:"));
315        assert!(key2.ends_with("]"));
316    }
317
318    #[test]
319    fn test_empty_input() {
320        let result = redact_secrets("", None, &HashMap::new(), false);
321        assert_eq!(result.redacted_string, "");
322        assert!(result.redaction_map.is_empty());
323    }
324
325    #[test]
326    fn test_restore_secrets() {
327        let mut redaction_map = HashMap::new();
328        redaction_map.insert(
329            "[REDACTED_SECRET:test:abc123]".to_string(),
330            "secret123".to_string(),
331        );
332        redaction_map.insert(
333            "[REDACTED_SECRET:test:def456]".to_string(),
334            "api_key_xyz".to_string(),
335        );
336
337        let redacted =
338            "Password is [REDACTED_SECRET:test:abc123] and key is [REDACTED_SECRET:test:def456]";
339        let restored = restore_secrets(redacted, &redaction_map);
340
341        assert_eq!(restored, "Password is secret123 and key is api_key_xyz");
342    }
343
344    #[test]
345    fn test_redaction_result_display() {
346        let mut redaction_map = HashMap::new();
347        redaction_map.insert("[REDACTED_test]".to_string(), "secret".to_string());
348
349        let result = RedactionResult::new("Hello [REDACTED_test]".to_string(), redaction_map);
350        assert_eq!(format!("{}", result), "Hello [REDACTED_test]");
351    }
352
353    #[test]
354    fn test_redact_secrets_with_api_key() {
355        // Use a pattern that matches the generic-api-key rule
356        let input = format!("export API_KEY={}", fake_api_key_long());
357        let result = redact_secrets(&input, None, &HashMap::new(), false);
358
359        // Should detect the API key and redact it
360        assert!(!result.redaction_map.is_empty());
361        assert!(result.redacted_string.contains("[REDACTED_"));
362        println!("Input: {}", input);
363        println!("Redacted: {}", result.redacted_string);
364        println!("Mapping: {:?}", result.redaction_map);
365    }
366
367    #[test]
368    fn test_redact_secrets_with_aws_key() {
369        let input = format!("AWS_ACCESS_KEY_ID={}", fake_aws_access_key());
370        let result = redact_secrets(&input, None, &HashMap::new(), false);
371
372        // Should detect the AWS access key
373        assert!(!result.redaction_map.is_empty());
374        println!("Input: {}", input);
375        println!("Redacted: {}", result.redacted_string);
376        println!("Mapping: {:?}", result.redaction_map);
377    }
378
379    #[test]
380    fn test_redaction_identical_secrets() {
381        let aws_key = fake_aws_access_key();
382        let input = format!(
383            "\n        export AWS_ACCESS_KEY_ID={aws_key}\n        export AWS_ACCESS_KEY_ID_2={aws_key}\n        "
384        );
385        let result = redact_secrets(&input, None, &HashMap::new(), false);
386
387        assert_eq!(result.redaction_map.len(), 1);
388    }
389
390    #[test]
391    fn test_redaction_identical_secrets_different_contexts() {
392        let aws_key = fake_aws_access_key();
393        let input_1 = format!("\n        export AWS_ACCESS_KEY_ID={aws_key}\n        ");
394        let input_2 = format!("\n        export SOME_OTHER_SECRET={aws_key}\n        ");
395        let result_1 = redact_secrets(&input_1, None, &HashMap::new(), false);
396        let result_2 = redact_secrets(&input_2, None, &result_1.redaction_map, false);
397
398        assert_eq!(result_1.redaction_map, result_2.redaction_map);
399    }
400
401    #[test]
402    fn test_redact_secrets_with_github_token() {
403        let input = format!("GITHUB_TOKEN={}", fake_github_token());
404        let result = redact_secrets(&input, None, &HashMap::new(), false);
405
406        // Should detect the GitHub PAT
407        assert!(!result.redaction_map.is_empty());
408        println!("Input: {}", input);
409        println!("Redacted: {}", result.redacted_string);
410        println!("Mapping: {:?}", result.redaction_map);
411    }
412
413    #[test]
414    fn test_no_secrets() {
415        let input = "This is just a normal string with no secrets";
416        let result = redact_secrets(input, None, &HashMap::new(), false);
417
418        // Should not detect any secrets
419        assert_eq!(result.redaction_map.len(), 0);
420        assert_eq!(result.redacted_string, input);
421    }
422
423    #[test]
424    fn test_debug_generic_api_key() {
425        let config = &*GITLEAKS_CONFIG;
426
427        // Find the generic-api-key rule
428        let generic_rule = config.rules.iter().find(|r| r.id == "generic-api-key");
429        if let Some(rule) = generic_rule {
430            println!("Generic API Key Rule:");
431            println!("  Regex: {:?}", rule.regex);
432            println!("  Entropy: {:?}", rule.entropy);
433            println!("  Keywords: {:?}", rule.keywords);
434
435            // Test the regex directly first
436            if let Some(regex_pattern) = &rule.regex {
437                if let Ok(regex) = Regex::new(regex_pattern) {
438                    let test_input = format!("API_KEY={}", fake_api_key_long());
439                    println!("\nTesting regex directly:");
440                    println!("  Input: {}", test_input);
441
442                    for mat in regex.find_iter(&test_input) {
443                        println!("  Raw match: '{}'", mat.as_str());
444                        println!("  Match position: {}-{}", mat.start(), mat.end());
445
446                        // Check captures
447                        if let Some(captures) = regex.captures(mat.as_str()) {
448                            for (i, cap) in captures.iter().enumerate() {
449                                if let Some(cap) = cap {
450                                    println!("  Capture {}: '{}'", i, cap.as_str());
451                                    if i == 1 {
452                                        let entropy = calculate_entropy(cap.as_str());
453                                        println!("  Entropy of capture 1: {:.2}", entropy);
454                                    }
455                                }
456                            }
457                        }
458                    }
459                }
460            } else {
461                println!("  No regex pattern (path-based rule)");
462            }
463
464            // Test various input patterns
465            let test_inputs = vec![
466                format!("API_KEY={}", fake_api_key_long()),
467                "api_key=RaNd0mH1ghEnTr0pyV4luE567890abcdef".to_string(),
468                format!("access_key={}", fake_secret_token_long()),
469                "secret_token=1234567890abcdef1234567890abcdef".to_string(),
470                "password=9k2L8pMvB3nQ7rX1ZdF5GhJwY4AsPo6C".to_string(),
471            ];
472
473            for input in test_inputs {
474                println!("\nTesting input: {}", input);
475                let result = redact_secrets(&input, None, &HashMap::new(), false);
476                println!("  Detected secrets: {}", result.redaction_map.len());
477                if !result.redaction_map.is_empty() {
478                    println!("  Redacted: {}", result.redacted_string);
479                }
480            }
481        } else {
482            println!("Generic API key rule not found!");
483        }
484    }
485
486    #[test]
487    fn test_simple_regex_match() {
488        // Test a very simple case that should definitely match
489        let input = "key=abcdefghijklmnop";
490        println!("Testing simple input: {}", input);
491
492        let config = &*GITLEAKS_CONFIG;
493        let generic_rule = config
494            .rules
495            .iter()
496            .find(|r| r.id == "generic-api-key")
497            .unwrap();
498
499        if let Some(regex_pattern) = &generic_rule.regex {
500            if let Ok(regex) = Regex::new(regex_pattern) {
501                println!("Regex pattern: {}", regex_pattern);
502
503                if regex.is_match(input) {
504                    println!("✓ Regex MATCHES the input!");
505
506                    for mat in regex.find_iter(input) {
507                        println!("Match found: '{}'", mat.as_str());
508
509                        if let Some(captures) = regex.captures(mat.as_str()) {
510                            println!("Full capture groups:");
511                            for (i, cap) in captures.iter().enumerate() {
512                                if let Some(cap) = cap {
513                                    println!("  Group {}: '{}'", i, cap.as_str());
514                                    if i == 1 {
515                                        let entropy = calculate_entropy(cap.as_str());
516                                        println!("  Entropy: {:.2} (threshold: 3.5)", entropy);
517                                    }
518                                }
519                            }
520                        }
521                    }
522                } else {
523                    println!("✗ Regex does NOT match the input");
524                }
525            }
526        } else {
527            println!("Rule has no regex pattern (path-based rule)");
528        }
529
530        // Also test the full redact_secrets function
531        let result = redact_secrets(input, None, &HashMap::new(), false);
532        println!(
533            "Full function result: {} secrets detected",
534            result.redaction_map.len()
535        );
536    }
537
538    #[test]
539    fn test_regex_breakdown() {
540        let config = &*GITLEAKS_CONFIG;
541        let generic_rule = config
542            .rules
543            .iter()
544            .find(|r| r.id == "generic-api-key")
545            .unwrap();
546
547        if let Some(regex_pattern) = &generic_rule.regex {
548            println!("Full regex: {}", regex_pattern);
549
550            // Let's break down the regex and test each part
551            let test_inputs = vec![
552                "key=abcdefghijklmnop",
553                "api_key=abcdefghijklmnop",
554                "secret=abcdefghijklmnop",
555                "token=abcdefghijklmnop",
556                "password=abcdefghijklmnop",
557                "access_key=abcdefghijklmnop",
558            ];
559
560            for input in test_inputs {
561                println!("\nTesting: '{}'", input);
562
563                // Test if the regex matches at all
564                if let Ok(regex) = Regex::new(regex_pattern) {
565                    let matches: Vec<_> = regex.find_iter(input).collect();
566                    println!("  Matches found: {}", matches.len());
567
568                    for (i, mat) in matches.iter().enumerate() {
569                        println!("  Match {}: '{}'", i, mat.as_str());
570
571                        // Test captures
572                        if let Some(captures) = regex.captures(mat.as_str()) {
573                            for (j, cap) in captures.iter().enumerate() {
574                                if let Some(cap) = cap {
575                                    println!("    Capture {}: '{}'", j, cap.as_str());
576                                    if j == 1 {
577                                        let entropy = calculate_entropy(cap.as_str());
578                                        println!("    Entropy: {:.2} (threshold: 3.5)", entropy);
579                                        if entropy >= 3.5 {
580                                            println!("    ✓ Entropy check PASSED");
581                                        } else {
582                                            println!("    ✗ Entropy check FAILED");
583                                        }
584                                    }
585                                }
586                            }
587                        }
588                    }
589                }
590            }
591        } else {
592            println!("Rule has no regex pattern (path-based rule)");
593        }
594
595        // Also test with a known working pattern from AWS
596        println!("\nTesting AWS pattern that we know works:");
597        let aws_input = format!("AWS_ACCESS_KEY_ID={}", fake_aws_access_key_example());
598        println!("Input: {}", aws_input);
599
600        let aws_rule = config
601            .rules
602            .iter()
603            .find(|r| r.id == "aws-access-token")
604            .unwrap();
605        if let Some(aws_regex_pattern) = &aws_rule.regex {
606            if let Ok(regex) = Regex::new(aws_regex_pattern) {
607                for mat in regex.find_iter(&aws_input) {
608                    println!("AWS Match: '{}'", mat.as_str());
609                    if let Some(captures) = regex.captures(mat.as_str()) {
610                        for (i, cap) in captures.iter().enumerate() {
611                            if let Some(cap) = cap {
612                                println!("  AWS Capture {}: '{}'", i, cap.as_str());
613                            }
614                        }
615                    }
616                }
617            }
618        } else {
619            println!("AWS rule has no regex pattern");
620        }
621    }
622
623    #[test]
624    fn test_working_api_key_patterns() {
625        let config = &*GITLEAKS_CONFIG;
626        let generic_rule = config
627            .rules
628            .iter()
629            .find(|r| r.id == "generic-api-key")
630            .unwrap();
631
632        // Get the compiled regex
633        let regex = generic_rule
634            .compiled_regex
635            .as_ref()
636            .expect("Regex should be compiled");
637
638        // Create test patterns that should match the regex structure
639        let test_inputs = vec![
640            // Pattern: prefix + keyword + separator + value + terminator
641            format!("myapp_api_key = \"{}\"", fake_api_key()),
642            format!("export SECRET_TOKEN={}", fake_secret_token()),
643            "app.auth.password: 9k2L8pMvB3nQ7rX1ZdF5GhJwY4AsPo6C8mN".to_string(),
644            "config.access_key=\"RaNd0mH1ghEnTr0pyV4luE567890abcdef\";".to_string(),
645            "DB_CREDENTIALS=xy9mP2nQ8rT4vW7yZ3cF6hJ1lN5sAdefghij".to_string(),
646        ];
647
648        for input in test_inputs {
649            println!("\nTesting: '{}'", input);
650
651            let matches: Vec<_> = regex.find_iter(&input).collect();
652            println!("  Matches found: {}", matches.len());
653
654            for (i, mat) in matches.iter().enumerate() {
655                println!("  Match {}: '{}'", i, mat.as_str());
656
657                if let Some(captures) = regex.captures(mat.as_str()) {
658                    for (j, cap) in captures.iter().enumerate() {
659                        if let Some(cap) = cap {
660                            println!("    Capture {}: '{}'", j, cap.as_str());
661                            if j == 1 {
662                                let entropy = calculate_entropy(cap.as_str());
663                                println!("    Entropy: {:.2} (threshold: 3.5)", entropy);
664
665                                // Also check if it would be allowed by allowlists
666                                let allowed = should_allow_match(
667                                    &input,
668                                    None,
669                                    mat.as_str(),
670                                    mat.start(),
671                                    mat.end(),
672                                    generic_rule,
673                                    &config.allowlist,
674                                );
675                                println!("    Allowed by allowlist: {}", allowed);
676                            }
677                        }
678                    }
679                }
680            }
681
682            // Test the full redact_secrets function
683            let result = redact_secrets(&input, None, &HashMap::new(), false);
684            println!(
685                "  Full function detected: {} secrets",
686                result.redaction_map.len()
687            );
688            if !result.redaction_map.is_empty() {
689                println!("  Redacted result: {}", result.redacted_string);
690            }
691        }
692    }
693
694    #[test]
695    fn test_regex_components() {
696        // Test individual components of the generic API key regex
697        let test_input = format!("export API_KEY={}", fake_secret_token());
698        println!("Testing input: {}", test_input);
699
700        // Test simpler regex patterns step by step
701        let test_patterns = vec![
702            (r"API_KEY", "Simple keyword match"),
703            (r"(?i)api_key", "Case insensitive keyword"),
704            (r"(?i).*key.*", "Any text with 'key'"),
705            (r"(?i).*key\s*=", "Key with equals"),
706            (r"(?i).*key\s*=\s*\w+", "Key with value"),
707            (
708                r"(?i)[\w.-]*(?:key).*?=.*?(\w{10,})",
709                "Complex pattern with capture",
710            ),
711        ];
712
713        for (pattern, description) in test_patterns {
714            println!("\nTesting pattern: {} ({})", pattern, description);
715
716            match Regex::new(pattern) {
717                Ok(regex) => {
718                    if regex.is_match(&test_input) {
719                        println!("  ✓ MATCHES");
720                        for mat in regex.find_iter(&test_input) {
721                            println!("    Full match: '{}'", mat.as_str());
722                        }
723                        if let Some(captures) = regex.captures(&test_input) {
724                            for (i, cap) in captures.iter().enumerate() {
725                                if let Some(cap) = cap {
726                                    println!("    Capture {}: '{}'", i, cap.as_str());
727                                }
728                            }
729                        }
730                    } else {
731                        println!("  ✗ NO MATCH");
732                    }
733                }
734                Err(e) => println!("  Error: {}", e),
735            }
736        }
737
738        // Test if there's an issue with the actual gitleaks regex compilation
739        let config = &*GITLEAKS_CONFIG;
740        let generic_rule = config
741            .rules
742            .iter()
743            .find(|r| r.id == "generic-api-key")
744            .unwrap();
745
746        println!("\nTesting actual gitleaks regex:");
747        if let Some(regex_pattern) = &generic_rule.regex {
748            match Regex::new(regex_pattern) {
749                Ok(regex) => {
750                    println!("  ✓ Regex compiles successfully");
751                    println!("  Testing against: {}", test_input);
752                    if regex.is_match(&test_input) {
753                        println!("  ✓ MATCHES");
754                    } else {
755                        println!("  ✗ NO MATCH");
756                    }
757                }
758                Err(e) => println!("  ✗ Regex compilation error: {}", e),
759            }
760        } else {
761            println!("  Rule has no regex pattern (path-based rule)");
762        }
763    }
764
765    #[test]
766    fn test_comprehensive_secrets_redaction() {
767        let aws_key = fake_aws_access_key_alt();
768        let github_token = fake_github_token();
769        let api_key = fake_api_key();
770        let secret_token = fake_secret_token();
771        let password = fake_password_secret();
772        let input = format!(
773            "\n# Configuration file with various secrets\nexport AWS_ACCESS_KEY_ID={aws_key}\nexport GITHUB_TOKEN={github_token}\nexport API_KEY={api_key}\nexport SECRET_TOKEN={secret_token}\nexport PASSWORD={password}\n\n# Some normal configuration\nexport DEBUG=true\nexport PORT=3000\n"
774        );
775
776        println!("Original input:\n{}", input);
777
778        let result = redact_secrets(&input, None, &HashMap::new(), false);
779
780        println!("Redacted output:\n{}", result.redacted_string);
781        println!("\nDetected {} secrets:", result.redaction_map.len());
782        for (key, value) in &result.redaction_map {
783            println!("  {} -> {}", key, value);
784        }
785
786        // Should detect at least 5 secrets: AWS key, GitHub token, API key, secret token, password
787        assert!(
788            result.redaction_map.len() >= 5,
789            "Should detect at least 5 secrets, found: {}",
790            result.redaction_map.len()
791        );
792
793        // Verify specific secrets are redacted
794        assert!(!result.redacted_string.contains(&aws_key));
795        assert!(!result.redacted_string.contains(&github_token));
796        assert!(!result.redacted_string.contains(&api_key));
797
798        // Verify normal config is preserved
799        assert!(result.redacted_string.contains("DEBUG=true"));
800        assert!(result.redacted_string.contains("PORT=3000"));
801    }
802
803    // Helper function for keyword validation tests
804    fn count_rules_that_would_process(input: &str) -> Vec<String> {
805        let config = &*GITLEAKS_CONFIG;
806        let mut rules = Vec::new();
807
808        for rule in &config.rules {
809            if rule.keywords.is_empty() || contains_any_keyword(input, &rule.keywords) {
810                rules.push(rule.id.clone());
811            }
812        }
813
814        rules
815    }
816
817    #[test]
818    fn test_keyword_filtering() {
819        println!("=== TESTING KEYWORD FILTERING ===");
820
821        let config = &*GITLEAKS_CONFIG;
822
823        // Find a rule that has keywords (like generic-api-key)
824        let generic_rule = config
825            .rules
826            .iter()
827            .find(|r| r.id == "generic-api-key")
828            .unwrap();
829        println!("Generic API Key rule keywords: {:?}", generic_rule.keywords);
830
831        // Test 1: Input with keywords should be processed
832        let input_with_keywords = format!("export API_KEY={}", fake_api_key());
833        let result1 = redact_secrets(&input_with_keywords, None, &HashMap::new(), false);
834        println!("\nTest 1 - Input WITH keywords:");
835        println!("  Input: {}", input_with_keywords);
836        println!(
837            "  Keywords present: {}",
838            contains_any_keyword(&input_with_keywords, &generic_rule.keywords)
839        );
840        println!("  Secrets detected: {}", result1.redaction_map.len());
841
842        // Test 2: Input without any keywords should NOT be processed for that rule
843        let input_without_keywords = "export DATABASE_URL=postgresql://user:pass@localhost/db";
844        let result2 = redact_secrets(input_without_keywords, None, &HashMap::new(), false);
845        println!("\nTest 2 - Input WITHOUT generic-api-key keywords:");
846        println!("  Input: {}", input_without_keywords);
847        println!(
848            "  Keywords present: {}",
849            contains_any_keyword(input_without_keywords, &generic_rule.keywords)
850        );
851        println!("  Secrets detected: {}", result2.redaction_map.len());
852
853        // Test 3: Input with different rule's keywords (AWS)
854        let aws_rule = config
855            .rules
856            .iter()
857            .find(|r| r.id == "aws-access-token")
858            .unwrap();
859        let aws_input = format!("AWS_ACCESS_KEY_ID={}", fake_aws_access_key_example());
860        let result3 = redact_secrets(&aws_input, None, &HashMap::new(), false);
861        println!("\nTest 3 - AWS input:");
862        println!("  Input: {}", aws_input);
863        println!("  AWS rule keywords: {:?}", aws_rule.keywords);
864        println!(
865            "  Keywords present: {}",
866            contains_any_keyword(&aws_input, &aws_rule.keywords)
867        );
868        println!("  Secrets detected: {}", result3.redaction_map.len());
869
870        // Validate that keyword filtering is working
871        assert!(
872            contains_any_keyword(&input_with_keywords, &generic_rule.keywords),
873            "API_KEY input should contain generic-api-key keywords"
874        );
875        assert!(
876            !contains_any_keyword(input_without_keywords, &generic_rule.keywords),
877            "DATABASE_URL input should NOT contain generic-api-key keywords"
878        );
879        assert!(
880            contains_any_keyword(&aws_input, &aws_rule.keywords),
881            "AWS input should contain AWS rule keywords"
882        );
883    }
884
885    #[test]
886    fn test_keyword_optimization_performance() {
887        println!("=== TESTING KEYWORD OPTIMIZATION PERFORMANCE ===");
888
889        let config = &*GITLEAKS_CONFIG;
890
891        // Test case 1: Input with NO keywords for any rule should be very fast
892        let no_keywords_input = "export DATABASE_CONNECTION=some_long_connection_string_that_has_no_common_secret_keywords";
893        println!("Testing input with no secret keywords:");
894        println!("  Input: {}", no_keywords_input);
895
896        let mut keyword_matches = 0;
897        for rule in &config.rules {
898            if contains_any_keyword(no_keywords_input, &rule.keywords) {
899                keyword_matches += 1;
900                println!("  Rule '{}' keywords match: {:?}", rule.id, rule.keywords);
901            }
902        }
903        println!(
904            "  Rules with matching keywords: {} out of {}",
905            keyword_matches,
906            config.rules.len()
907        );
908
909        let result = redact_secrets(no_keywords_input, None, &HashMap::new(), false);
910        println!("  Secrets detected: {}", result.redaction_map.len());
911
912        // Test case 2: Input with specific keywords should only process relevant rules
913        let specific_keywords_input = format!("export GITHUB_TOKEN={}", fake_github_token_short());
914        println!("\nTesting input with specific keywords (github):");
915        println!("  Input: {}", specific_keywords_input);
916
917        let mut matching_rules = Vec::new();
918        for rule in &config.rules {
919            if contains_any_keyword(&specific_keywords_input, &rule.keywords) {
920                matching_rules.push(&rule.id);
921            }
922        }
923        println!("  Rules that would be processed: {:?}", matching_rules);
924
925        let result = redact_secrets(&specific_keywords_input, None, &HashMap::new(), false);
926        println!("  Secrets detected: {}", result.redaction_map.len());
927
928        // Test case 3: Verify that rules without keywords are always processed
929        let rules_without_keywords: Vec<_> = config
930            .rules
931            .iter()
932            .filter(|rule| rule.keywords.is_empty())
933            .collect();
934        println!(
935            "\nRules without keywords (always processed): {}",
936            rules_without_keywords.len()
937        );
938        for rule in &rules_without_keywords {
939            println!("  - {}", rule.id);
940        }
941
942        // Assertions
943        assert!(
944            keyword_matches < config.rules.len(),
945            "Input with no keywords should not match all rules"
946        );
947        assert!(
948            !matching_rules.is_empty(),
949            "GitHub token input should match some rules"
950        );
951        assert!(
952            matching_rules.contains(&&"github-pat".to_string())
953                || matching_rules
954                    .iter()
955                    .any(|rule_id| rule_id.contains("github")),
956            "GitHub token should match GitHub-related rules"
957        );
958    }
959
960    #[test]
961    fn test_keyword_filtering_efficiency() {
962        println!("=== KEYWORD FILTERING EFFICIENCY TEST ===");
963
964        let config = &*GITLEAKS_CONFIG;
965        println!("Total rules in config: {}", config.rules.len());
966
967        // Test with input that has NO matching keywords
968        let non_secret_input = "export DATABASE_URL=localhost PORT=3000 DEBUG=true TIMEOUT=30";
969        println!("\nTesting non-secret input: {}", non_secret_input);
970
971        let mut rules_skipped = 0;
972        let mut rules_processed = 0;
973
974        for rule in &config.rules {
975            if rule.keywords.is_empty() || contains_any_keyword(non_secret_input, &rule.keywords) {
976                rules_processed += 1;
977            } else {
978                rules_skipped += 1;
979            }
980        }
981
982        println!(
983            "  Rules skipped due to keyword filtering: {}",
984            rules_skipped
985        );
986        println!("  Rules that would be processed: {}", rules_processed);
987        println!(
988            "  Efficiency gain: {:.1}% of rules skipped",
989            (rules_skipped as f64 / config.rules.len() as f64) * 100.0
990        );
991
992        // Verify no secrets are detected
993        let result = redact_secrets(non_secret_input, None, &HashMap::new(), false);
994        println!("  Secrets detected: {}", result.redaction_map.len());
995
996        // Now test with input that has relevant keywords
997        let secret_input = format!(
998            "export API_KEY={} SECRET_TOKEN=xyz789uvw012rst345def678",
999            fake_api_key()
1000        );
1001        println!("\nTesting input WITH secret keywords:");
1002        println!("  Input: {}", secret_input);
1003
1004        let mut rules_with_keywords = 0;
1005        for rule in &config.rules {
1006            if contains_any_keyword(&secret_input, &rule.keywords) {
1007                rules_with_keywords += 1;
1008            }
1009        }
1010
1011        println!("  Rules that match keywords: {}", rules_with_keywords);
1012
1013        let result = redact_secrets(&secret_input, None, &HashMap::new(), false);
1014        println!("  Secrets detected: {}", result.redaction_map.len());
1015
1016        // Assertions
1017        assert!(
1018            rules_skipped > 0,
1019            "Should skip at least some rules for non-secret input"
1020        );
1021        assert!(
1022            rules_with_keywords > 0,
1023            "Should find matching rules for secret input"
1024        );
1025        assert!(
1026            !result.redaction_map.is_empty(),
1027            "Should detect at least one secret"
1028        );
1029    }
1030
1031    #[test]
1032    fn test_keyword_validation_summary() {
1033        println!("=== KEYWORD VALIDATION SUMMARY ===");
1034
1035        let config = &*GITLEAKS_CONFIG;
1036        let total_rules = config.rules.len();
1037        println!("Total rules in gitleaks config: {}", total_rules);
1038
1039        // Test no keywords - should skip most rules
1040        let no_keyword_input = "export DATABASE_URL=localhost PORT=3000";
1041        println!("\n--- No keywords - should skip all rules ---");
1042        println!("Input: {}", no_keyword_input);
1043
1044        let no_keyword_rules = count_rules_that_would_process(no_keyword_input);
1045        println!(
1046            "Rules that would be processed: {} out of {}",
1047            no_keyword_rules.len(),
1048            total_rules
1049        );
1050        println!("  Rules: {:?}", no_keyword_rules);
1051
1052        let no_keyword_secrets = detect_secrets(no_keyword_input, None, false);
1053        println!(
1054            "Secrets detected: {} (expected: 0)",
1055            no_keyword_secrets.len()
1056        );
1057        assert_eq!(no_keyword_secrets.len(), 0, "Should not detect any secrets");
1058        println!("✅ Test passed");
1059
1060        // Test API keyword - should process generic-api-key rule
1061        let api_input = format!("export API_KEY={}", fake_api_key());
1062        println!("\n--- API keyword - should process generic-api-key rule ---");
1063        println!("Input: {}", api_input);
1064
1065        let api_rules = count_rules_that_would_process(&api_input);
1066        println!(
1067            "Rules that would be processed: {} out of {}",
1068            api_rules.len(),
1069            total_rules
1070        );
1071        println!("  Rules: {:?}", api_rules);
1072
1073        let api_secrets = detect_secrets(&api_input, None, false);
1074        println!("Secrets detected: {} (expected: 1)", api_secrets.len());
1075        assert!(!api_secrets.is_empty(), "Should detect at least 1 secrets");
1076        println!("✅ Test passed");
1077
1078        // Test AWS keyword - should process aws-access-token rule
1079        // Use a realistic AWS key that matches the pattern [A-Z2-7]{16}
1080        let aws_input = format!("AWS_ACCESS_KEY_ID={}", fake_aws_access_key_alt());
1081        println!("\n--- AWS keyword - should process aws-access-token rule ---");
1082        println!("Input: {}", aws_input);
1083
1084        let aws_rules = count_rules_that_would_process(&aws_input);
1085        println!(
1086            "Rules that would be processed: {} out of {}",
1087            aws_rules.len(),
1088            total_rules
1089        );
1090        println!("  Rules: {:?}", aws_rules);
1091
1092        let aws_secrets = detect_secrets(&aws_input, None, false);
1093        println!("Secrets detected: {} (expected: 1)", aws_secrets.len());
1094
1095        // Should detect AWS key
1096        assert!(!aws_secrets.is_empty(), "Should detect at least 1 secrets");
1097        println!("✅ Test passed");
1098    }
1099
1100    #[test]
1101    fn test_debug_missing_secrets() {
1102        println!("=== DEBUGGING MISSING SECRETS ===");
1103
1104        let test_cases = vec![
1105            format!("SECRET_TOKEN={}", fake_secret_token()),
1106            format!("PASSWORD={}", fake_password_secret()),
1107        ];
1108
1109        for input in test_cases {
1110            println!("\nTesting: {}", input);
1111
1112            // Check entropy first
1113            let parts: Vec<&str> = input.split('=').collect();
1114            if parts.len() == 2 {
1115                let secret_value = parts[1];
1116                let entropy = calculate_entropy(secret_value);
1117                println!("  Secret value: '{}'", secret_value);
1118                println!("  Entropy: {:.2} (threshold: 3.5)", entropy);
1119
1120                if entropy >= 3.5 {
1121                    println!("  ✓ Entropy check PASSED");
1122                } else {
1123                    println!("  ✗ Entropy check FAILED - this is why it's not detected");
1124                }
1125            }
1126
1127            // Test the fallback regex directly
1128            if let Ok(regex) = create_simple_api_key_regex() {
1129                println!("  Testing fallback regex:");
1130                if regex.is_match(&input) {
1131                    println!("    ✓ Fallback regex MATCHES");
1132                    for mat in regex.find_iter(&input) {
1133                        println!("    Match: '{}'", mat.as_str());
1134                        if let Some(captures) = regex.captures(mat.as_str()) {
1135                            for (i, cap) in captures.iter().enumerate() {
1136                                if let Some(cap) = cap {
1137                                    println!("      Capture {}: '{}'", i, cap.as_str());
1138                                }
1139                            }
1140                        }
1141
1142                        // Test allowlist checking
1143                        let config = &*GITLEAKS_CONFIG;
1144                        let generic_rule = config
1145                            .rules
1146                            .iter()
1147                            .find(|r| r.id == "generic-api-key")
1148                            .unwrap();
1149                        let allowed = should_allow_match(
1150                            &input,
1151                            None,
1152                            mat.as_str(),
1153                            mat.start(),
1154                            mat.end(),
1155                            generic_rule,
1156                            &config.allowlist,
1157                        );
1158                        println!("      Allowed by allowlist: {}", allowed);
1159                        if allowed {
1160                            println!(
1161                                "      ✗ FILTERED OUT by allowlist - this is why it's not detected"
1162                            );
1163                        }
1164                    }
1165                } else {
1166                    println!("    ✗ Fallback regex does NOT match");
1167                }
1168            }
1169
1170            // Test full detection
1171            let result = redact_secrets(&input, None, &HashMap::new(), false);
1172            println!(
1173                "  Full detection result: {} secrets",
1174                result.redaction_map.len()
1175            );
1176        }
1177    }
1178
1179    #[test]
1180    fn test_debug_allowlist_filtering() {
1181        println!("=== DEBUGGING ALLOWLIST FILTERING ===");
1182
1183        let test_cases = vec![
1184            format!("SECRET_TOKEN={}", fake_secret_token()),
1185            format!("PASSWORD={}", fake_password_secret()),
1186        ];
1187
1188        let config = &*GITLEAKS_CONFIG;
1189        let generic_rule = config
1190            .rules
1191            .iter()
1192            .find(|r| r.id == "generic-api-key")
1193            .unwrap();
1194
1195        for input in test_cases {
1196            println!("\nAnalyzing: {}", input);
1197
1198            if let Ok(regex) = create_simple_api_key_regex() {
1199                for mat in regex.find_iter(&input) {
1200                    let match_text = mat.as_str();
1201                    println!("  Match: '{}'", match_text);
1202
1203                    // Test global allowlist
1204                    if let Some(global_allowlist) = &config.allowlist {
1205                        println!("  Checking global allowlist:");
1206
1207                        // Test global regex patterns
1208                        if let Some(regexes) = &global_allowlist.regexes {
1209                            for (i, pattern) in regexes.iter().enumerate() {
1210                                if let Ok(regex) = Regex::new(pattern)
1211                                    && regex.is_match(match_text)
1212                                {
1213                                    println!("    ✗ FILTERED by global regex {}: '{}'", i, pattern);
1214                                }
1215                            }
1216                        }
1217
1218                        // Test global stopwords
1219                        if let Some(stopwords) = &global_allowlist.stopwords {
1220                            for stopword in stopwords {
1221                                if match_text.to_lowercase().contains(&stopword.to_lowercase()) {
1222                                    println!("    ✗ FILTERED by global stopword: '{}'", stopword);
1223                                }
1224                            }
1225                        }
1226                    }
1227
1228                    // Test rule-specific allowlists
1229                    if let Some(rule_allowlists) = &generic_rule.allowlists {
1230                        for (rule_idx, allowlist) in rule_allowlists.iter().enumerate() {
1231                            println!("  Checking rule allowlist {}:", rule_idx);
1232
1233                            // Test rule regex patterns
1234                            if let Some(regexes) = &allowlist.regexes {
1235                                for (i, pattern) in regexes.iter().enumerate() {
1236                                    if let Ok(regex) = Regex::new(pattern)
1237                                        && regex.is_match(match_text)
1238                                    {
1239                                        println!(
1240                                            "    ✗ FILTERED by rule regex {}: '{}'",
1241                                            i, pattern
1242                                        );
1243                                    }
1244                                }
1245                            }
1246
1247                            // Test rule stopwords
1248                            if let Some(stopwords) = &allowlist.stopwords {
1249                                for stopword in stopwords {
1250                                    if match_text.to_lowercase().contains(&stopword.to_lowercase())
1251                                    {
1252                                        println!("    ✗ FILTERED by rule stopword: '{}'", stopword);
1253                                    }
1254                                }
1255                            }
1256                        }
1257                    }
1258                }
1259            }
1260        }
1261    }
1262
1263    #[test]
1264    fn test_debug_new_allowlist_logic() {
1265        println!("=== DEBUGGING NEW ALLOWLIST LOGIC ===");
1266
1267        let test_cases = vec![
1268            format!("SECRET_TOKEN={}", fake_secret_token()),
1269            format!("PASSWORD={}", fake_password_secret()),
1270            "PASSWORD=password123".to_string(), // Should be filtered
1271            "API_KEY=example_key".to_string(),  // Should be filtered
1272        ];
1273
1274        let config = &*GITLEAKS_CONFIG;
1275        let generic_rule = config
1276            .rules
1277            .iter()
1278            .find(|r| r.id == "generic-api-key")
1279            .unwrap();
1280
1281        for input in test_cases {
1282            println!("\nTesting: {}", input);
1283
1284            if let Ok(regex) = create_simple_api_key_regex() {
1285                for mat in regex.find_iter(&input) {
1286                    let match_text = mat.as_str();
1287                    println!("  Match: '{}'", match_text);
1288
1289                    // Parse the KEY=VALUE
1290                    if let Some((_, value)) = match_text.split_once('=') {
1291                        println!("    Value: '{}'", value);
1292
1293                        // Test specific stopwords
1294                        let test_stopwords = ["token", "password", "super", "word"];
1295                        for stopword in test_stopwords {
1296                            let value_lower = value.to_lowercase();
1297                            let stopword_lower = stopword.to_lowercase();
1298
1299                            if value_lower == stopword_lower {
1300                                println!("    '{}' - Exact match: YES", stopword);
1301                            } else if value.len() < 15 && value_lower.contains(&stopword_lower) {
1302                                let without_stopword = value_lower.replace(&stopword_lower, "");
1303                                let is_simple = without_stopword.chars().all(|c| {
1304                                    c.is_ascii_digit() || "!@#$%^&*()_+-=[]{}|;:,.<>?".contains(c)
1305                                });
1306                                println!(
1307                                    "    '{}' - Short+contains: len={}, without='{}', simple={}",
1308                                    stopword,
1309                                    value.len(),
1310                                    without_stopword,
1311                                    is_simple
1312                                );
1313                            } else {
1314                                println!("    '{}' - No filter", stopword);
1315                            }
1316                        }
1317                    }
1318
1319                    // Test the actual allowlist
1320                    if let Some(rule_allowlists) = &generic_rule.allowlists {
1321                        for (rule_idx, allowlist) in rule_allowlists.iter().enumerate() {
1322                            let allowed = is_allowed_by_rule_allowlist(
1323                                &input,
1324                                None,
1325                                match_text,
1326                                mat.start(),
1327                                mat.end(),
1328                                allowlist,
1329                            );
1330                            println!("  Rule allowlist {}: allowed = {}", rule_idx, allowed);
1331                        }
1332                    }
1333                }
1334            }
1335        }
1336    }
1337
1338    #[test]
1339    fn test_redact_password_basic() {
1340        let content = "User password is supersecret123 and should be hidden";
1341        let password = "supersecret123";
1342        let result = redact_password(content, password, &HashMap::new());
1343
1344        // Should redact the password
1345        assert!(!result.redacted_string.contains(password));
1346        assert!(
1347            result
1348                .redacted_string
1349                .contains("[REDACTED_SECRET:password:")
1350        );
1351        assert_eq!(result.redaction_map.len(), 1);
1352
1353        // The redaction map should contain our password
1354        let redacted_password = result.redaction_map.values().next().unwrap();
1355        assert_eq!(redacted_password, password);
1356    }
1357
1358    #[test]
1359    fn test_redact_password_empty() {
1360        let content = "Some content without password";
1361        let password = "";
1362        let result = redact_password(content, password, &HashMap::new());
1363
1364        // Should not change anything
1365        assert_eq!(result.redacted_string, content);
1366        assert!(result.redaction_map.is_empty());
1367    }
1368
1369    #[test]
1370    fn test_redact_password_multiple_occurrences() {
1371        let content = "Password is mypass123 and again mypass123 appears here";
1372        let password = "mypass123";
1373        let result = redact_password(content, password, &HashMap::new());
1374
1375        // Should redact both occurrences with the same key
1376        assert!(!result.redacted_string.contains(password));
1377        assert_eq!(result.redaction_map.len(), 1);
1378
1379        // Count redaction keys in the result
1380        let redaction_key = result.redaction_map.keys().next().unwrap();
1381        let count = result.redacted_string.matches(redaction_key).count();
1382        assert_eq!(count, 2);
1383    }
1384
1385    #[test]
1386    fn test_redact_password_reuse_existing_key() {
1387        // Start with an existing redaction map
1388        let mut existing_map = HashMap::new();
1389        existing_map.insert(
1390            "[REDACTED_SECRET:password:abc123]".to_string(),
1391            "mypassword".to_string(),
1392        );
1393
1394        let content = "The password mypassword should use existing key";
1395        let password = "mypassword";
1396        let result = redact_password(content, password, &existing_map);
1397
1398        // Should reuse the existing key
1399        assert_eq!(result.redaction_map.len(), 1);
1400        assert!(
1401            result
1402                .redaction_map
1403                .contains_key("[REDACTED_SECRET:password:abc123]")
1404        );
1405        assert!(
1406            result
1407                .redacted_string
1408                .contains("[REDACTED_SECRET:password:abc123]")
1409        );
1410    }
1411
1412    #[test]
1413    fn test_redact_password_with_existing_different_secrets() {
1414        // Start with an existing redaction map containing different secrets
1415        let mut existing_map = HashMap::new();
1416        existing_map.insert(
1417            "[REDACTED_SECRET:api-key:xyz789]".to_string(),
1418            "some_api_key".to_string(),
1419        );
1420
1421        let content = "API key is some_api_key and password is newpassword123";
1422        let password = "newpassword123";
1423        let result = redact_password(content, password, &existing_map);
1424
1425        // Should preserve existing mapping and add new one
1426        assert_eq!(result.redaction_map.len(), 2);
1427        assert!(
1428            result
1429                .redaction_map
1430                .contains_key("[REDACTED_SECRET:api-key:xyz789]")
1431        );
1432        assert!(
1433            result
1434                .redaction_map
1435                .get("[REDACTED_SECRET:api-key:xyz789]")
1436                .unwrap()
1437                == "some_api_key"
1438        );
1439
1440        // Should add new password mapping
1441        let new_keys: Vec<_> = result
1442            .redaction_map
1443            .keys()
1444            .filter(|k| k.contains("password"))
1445            .collect();
1446        assert_eq!(new_keys.len(), 1);
1447        let password_key = new_keys[0];
1448        assert_eq!(
1449            result.redaction_map.get(password_key).unwrap(),
1450            "newpassword123"
1451        );
1452    }
1453
1454    #[test]
1455    fn test_redact_password_no_match() {
1456        let content = "This content has no matching password";
1457        let password = "notfound";
1458        let result = redact_password(content, password, &HashMap::new());
1459
1460        // Should still create a redaction key but content unchanged
1461        assert_eq!(result.redacted_string, content);
1462        assert_eq!(result.redaction_map.len(), 1);
1463        assert_eq!(result.redaction_map.values().next().unwrap(), "notfound");
1464    }
1465
1466    #[test]
1467    fn test_redact_password_integration_with_restore() {
1468        let content = "Login with username admin and password secret456";
1469        let password = "secret456";
1470        let result = redact_password(content, password, &HashMap::new());
1471
1472        // Redact the password
1473        assert!(!result.redacted_string.contains(password));
1474        assert!(result.redacted_string.contains("username admin"));
1475
1476        // Restore should bring back the original
1477        let restored = restore_secrets(&result.redacted_string, &result.redaction_map);
1478        assert_eq!(restored, content);
1479    }
1480
1481    #[test]
1482    fn test_redact_secrets_with_existing_redaction_map() {
1483        // Test that secrets in the existing redaction map get redacted even if not detected by detect_secrets
1484        let content = "The secret value is mysecretvalue123 and another is anothersecret456";
1485
1486        // First, test with empty map to prove the secret wouldn't normally be redacted
1487        let result_empty = redact_secrets(content, None, &HashMap::new(), false);
1488
1489        // Verify that mysecretvalue123 is NOT redacted when using empty map
1490        assert!(result_empty.redacted_string.contains("mysecretvalue123"));
1491        // Now create an existing redaction map with one of the secrets
1492        let mut existing_redaction_map = HashMap::new();
1493        existing_redaction_map.insert(
1494            "[REDACTED_SECRET:manual:abc123]".to_string(),
1495            "mysecretvalue123".to_string(),
1496        );
1497
1498        let result = redact_secrets(content, None, &existing_redaction_map, false);
1499
1500        // The secret from the existing map should be redacted
1501        assert!(
1502            result
1503                .redacted_string
1504                .contains("[REDACTED_SECRET:manual:abc123]")
1505        );
1506        assert!(!result.redacted_string.contains("mysecretvalue123"));
1507
1508        // The redaction map should contain the existing mapping
1509        assert!(
1510            result
1511                .redaction_map
1512                .contains_key("[REDACTED_SECRET:manual:abc123]")
1513        );
1514        assert_eq!(
1515            result
1516                .redaction_map
1517                .get("[REDACTED_SECRET:manual:abc123]")
1518                .unwrap(),
1519            "mysecretvalue123"
1520        );
1521    }
1522
1523    #[test]
1524    fn test_redact_secrets_skip_already_redacted() {
1525        // Content that already contains redacted secrets should not be double-redacted
1526        let content = "The password is [REDACTED_SECRET:password:abc123] and API key is [REDACTED_SECRET:api-key:xyz789]";
1527        let result = redact_secrets(content, None, &HashMap::new(), false);
1528
1529        // Should return content unchanged
1530        assert_eq!(result.redacted_string, content);
1531        // Should not add any new redactions
1532        assert!(result.redaction_map.is_empty());
1533    }
1534
1535    #[test]
1536    fn test_redact_password_skip_already_redacted() {
1537        // Content that already contains redacted secrets should not be double-redacted
1538        let content = "[REDACTED_SECRET:password:existing123]";
1539        let password = "newpassword";
1540        let result = redact_password(content, password, &HashMap::new());
1541
1542        // Should return content unchanged
1543        assert_eq!(result.redacted_string, content);
1544        // Should not add any new redactions
1545        assert!(result.redaction_map.is_empty());
1546    }
1547
1548    #[test]
1549    fn test_redact_secrets_skip_nested_redaction() {
1550        // Simulate what happens when local_tools redacts and proxy tries to redact again
1551        let original_password = "MySecureP@ssw0rd!";
1552
1553        // First redaction (simulating local_tools)
1554        let first_result = redact_password(original_password, original_password, &HashMap::new());
1555        assert!(
1556            first_result
1557                .redacted_string
1558                .contains("[REDACTED_SECRET:password:")
1559        );
1560
1561        // Second redaction attempt (simulating proxy) - should be skipped
1562        let second_result =
1563            redact_secrets(&first_result.redacted_string, None, &HashMap::new(), false);
1564
1565        // Should return the already-redacted content unchanged
1566        assert_eq!(second_result.redacted_string, first_result.redacted_string);
1567        assert!(second_result.redaction_map.is_empty());
1568    }
1569
1570    #[test]
1571    fn test_huawei_cloud_credentials_detection() {
1572        // Test Huawei Cloud credentials in CSV format
1573        // Using obviously fake test values (TESTHUAWEI prefix) to avoid GitHub push protection
1574        let csv_content = r#"User Name,Access Key Id,Secret Access Key
1575terraform,TESTHUAWEIKEY1234567,TestHuaweiSecretKey1234567890abcdefghij"#;
1576
1577        let result = redact_secrets(csv_content, None, &HashMap::new(), false);
1578
1579        println!("Input: {}", csv_content);
1580        println!("Redacted: {}", result.redacted_string);
1581        println!("Mapping: {:?}", result.redaction_map);
1582
1583        // Should detect both AK and SK
1584        assert!(
1585            !result.redaction_map.is_empty(),
1586            "Should detect Huawei credentials"
1587        );
1588
1589        // Verify AK is redacted (20 char uppercase alphanumeric)
1590        assert!(
1591            !result.redacted_string.contains("TESTHUAWEIKEY1234567"),
1592            "AK should be redacted"
1593        );
1594
1595        // Verify SK is redacted (40 char alphanumeric)
1596        assert!(
1597            !result
1598                .redacted_string
1599                .contains("TestHuaweiSecretKey1234567890abcdefghij"),
1600            "SK should be redacted"
1601        );
1602
1603        // Verify redaction keys are present
1604        assert!(
1605            result.redacted_string.contains("[REDACTED_SECRET:huawei-"),
1606            "Should contain Huawei redaction markers"
1607        );
1608    }
1609
1610    #[test]
1611    fn test_huawei_access_key_id_pattern() {
1612        // Test AK detection with "Access Key Id" keyword
1613        // Using obviously fake test value to avoid GitHub push protection
1614        // Must be exactly 20 chars to match the regex pattern
1615        let input = "Access Key Id: TESTHWCLOUD123456789";
1616        let result = redact_secrets(input, None, &HashMap::new(), false);
1617
1618        println!("Input: {}", input);
1619        println!("Redacted: {}", result.redacted_string);
1620
1621        assert!(
1622            !result.redaction_map.is_empty(),
1623            "Should detect Huawei AK with 'Access Key Id' keyword"
1624        );
1625        assert!(
1626            !result.redacted_string.contains("TESTHWCLOUD123456789"),
1627            "AK should be redacted"
1628        );
1629    }
1630
1631    #[test]
1632    fn test_huawei_secret_access_key_pattern() {
1633        // Test SK detection with "Secret Access Key" keyword
1634        // Using obviously fake test value to avoid GitHub push protection
1635        let input = "Secret Access Key: TestHwCloudSecretKey12345678901234567890";
1636        let result = redact_secrets(input, None, &HashMap::new(), false);
1637
1638        println!("Input: {}", input);
1639        println!("Redacted: {}", result.redacted_string);
1640
1641        assert!(
1642            !result.redaction_map.is_empty(),
1643            "Should detect Huawei SK with 'Secret Access Key' keyword"
1644        );
1645        assert!(
1646            !result
1647                .redacted_string
1648                .contains("TestHwCloudSecretKey12345678901234567890"),
1649            "SK should be redacted"
1650        );
1651    }
1652}