Skip to main content

tldr_core/security/
secrets.rs

1//! Secret scanning
2//!
3//! Implements detection of hardcoded secrets as per spec Section 2.9.1:
4//! - AWS key patterns (AKIA...)
5//! - Private key headers (-----BEGIN...PRIVATE KEY-----)
6//! - High entropy strings (Shannon entropy > threshold)
7//! - Password assignments (password = "...")
8//!
9//! # Example
10//! ```ignore
11//! use tldr_core::security::secrets::{scan_secrets, Severity};
12//!
13//! let report = scan_secrets(Path::new("src/"), 4.5, false, None)?;
14//! for finding in &report.findings {
15//!     println!("{}: {} at {}:{}", finding.severity, finding.pattern, finding.file.display(), finding.line);
16//! }
17//! ```
18
19use std::collections::HashMap;
20use std::path::{Path, PathBuf};
21
22use regex::Regex;
23use serde::{Deserialize, Serialize};
24use walkdir::WalkDir;
25
26use crate::TldrResult;
27
28// =============================================================================
29// Types
30// =============================================================================
31
32/// Severity levels for secret findings
33#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
34#[serde(rename_all = "lowercase")]
35pub enum Severity {
36    /// Low severity - may be false positive
37    Low,
38    /// Medium severity - should be reviewed
39    Medium,
40    /// High severity - likely a real secret
41    High,
42    /// Critical severity - confirmed sensitive data
43    Critical,
44}
45
46impl std::fmt::Display for Severity {
47    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
48        match self {
49            Severity::Low => write!(f, "LOW"),
50            Severity::Medium => write!(f, "MEDIUM"),
51            Severity::High => write!(f, "HIGH"),
52            Severity::Critical => write!(f, "CRITICAL"),
53        }
54    }
55}
56
57/// A secret pattern to detect
58#[derive(Debug, Clone)]
59struct SecretPattern {
60    name: &'static str,
61    pattern: Regex,
62    severity: Severity,
63    description: &'static str,
64}
65
66/// A single secret finding
67#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct SecretFinding {
69    /// File containing the secret
70    pub file: PathBuf,
71    /// Line number
72    pub line: u32,
73    /// Column number (start of match)
74    pub column: u32,
75    /// Pattern that matched
76    pub pattern: String,
77    /// Severity level
78    pub severity: Severity,
79    /// Masked value (partial redaction)
80    pub masked_value: String,
81    /// Description of the secret type
82    pub description: String,
83    /// Full line content (for context)
84    #[serde(skip_serializing_if = "Option::is_none")]
85    pub line_content: Option<String>,
86}
87
88/// Summary statistics for secret scanning
89#[derive(Debug, Clone, Serialize, Deserialize)]
90pub struct SecretsSummary {
91    /// Total findings
92    pub total_findings: usize,
93    /// Count by severity
94    pub by_severity: HashMap<String, usize>,
95    /// Count by pattern type
96    pub by_pattern: HashMap<String, usize>,
97}
98
99/// Report from secret scanning
100#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct SecretsReport {
102    /// All secret findings
103    pub findings: Vec<SecretFinding>,
104    /// Number of files scanned
105    pub files_scanned: usize,
106    /// Number of patterns checked
107    pub patterns_checked: usize,
108    /// Summary statistics
109    pub summary: SecretsSummary,
110}
111
112// =============================================================================
113// Secret Patterns
114// =============================================================================
115
116lazy_static::lazy_static! {
117    /// Compiled secret detection patterns
118    static ref SECRET_PATTERNS: Vec<SecretPattern> = vec![
119        // AWS Access Key ID
120        SecretPattern {
121            name: "AWS Access Key",
122            pattern: Regex::new(r"AKIA[0-9A-Z]{16}").unwrap(),
123            severity: Severity::Critical,
124            description: "AWS Access Key ID detected",
125        },
126        // AWS Secret Access Key
127        SecretPattern {
128            name: "AWS Secret Key",
129            pattern: Regex::new(r#"(?i)aws(.{0,20})?['"][0-9a-zA-Z/+]{40}['"]"#).unwrap(),
130            severity: Severity::Critical,
131            description: "AWS Secret Access Key detected",
132        },
133        // Private Key Header
134        SecretPattern {
135            name: "Private Key",
136            pattern: Regex::new(r"-----BEGIN\s*(RSA |EC |DSA |OPENSSH |PGP )?PRIVATE KEY-----").unwrap(),
137            severity: Severity::Critical,
138            description: "Private key header detected",
139        },
140        // GitHub Token
141        SecretPattern {
142            name: "GitHub Token",
143            pattern: Regex::new(r"gh[pousr]_[A-Za-z0-9_]{36,}").unwrap(),
144            severity: Severity::Critical,
145            description: "GitHub personal access token detected",
146        },
147        // Generic API Key
148        SecretPattern {
149            name: "API Key",
150            pattern: Regex::new(r#"(?i)(api[_-]?key|apikey)\s*[:=]\s*['"]\s*[a-zA-Z0-9]{20,}['"]\s*"#).unwrap(),
151            severity: Severity::High,
152            description: "Generic API key pattern detected",
153        },
154        // Password in config
155        SecretPattern {
156            name: "Password",
157            pattern: Regex::new(r#"(?i)(password|passwd|pwd)\s*[:=]\s*['"][^'"]{4,}['"]"#).unwrap(),
158            severity: Severity::High,
159            description: "Hardcoded password detected",
160        },
161        // Secret in config
162        SecretPattern {
163            name: "Secret",
164            pattern: Regex::new(r#"(?i)(secret|token)\s*[:=]\s*['"][^'"]{8,}['"]"#).unwrap(),
165            severity: Severity::High,
166            description: "Hardcoded secret/token detected",
167        },
168        // Database URL with credentials
169        SecretPattern {
170            name: "Database URL",
171            pattern: Regex::new(r"(?i)(postgres|mysql|mongodb|redis)://[^:]+:[^@]+@").unwrap(),
172            severity: Severity::High,
173            description: "Database URL with credentials detected",
174        },
175        // Slack Token
176        SecretPattern {
177            name: "Slack Token",
178            pattern: Regex::new(r"xox[baprs]-[0-9]{10,13}-[0-9]{10,13}[a-zA-Z0-9-]*").unwrap(),
179            severity: Severity::Critical,
180            description: "Slack token detected",
181        },
182        // JWT
183        SecretPattern {
184            name: "JWT",
185            pattern: Regex::new(r"eyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]*").unwrap(),
186            severity: Severity::Medium,
187            description: "JSON Web Token detected",
188        },
189        // Bearer Token
190        SecretPattern {
191            name: "Bearer Token",
192            pattern: Regex::new(r#"(?i)bearer\s+[a-zA-Z0-9_\-\.]+[a-zA-Z0-9_\-\.]"#).unwrap(),
193            severity: Severity::Medium,
194            description: "Bearer token in header detected",
195        },
196    ];
197
198    /// Test file patterns to skip by default
199    static ref TEST_FILE_PATTERNS: Regex = Regex::new(
200        r"(?i)(test[_/]|_test\.|\.test\.|spec[_/]|_spec\.|\.spec\.|conftest|fixture|mock)"
201    ).unwrap();
202}
203
204// =============================================================================
205// Main API
206// =============================================================================
207
208/// Scan for hardcoded secrets in files
209///
210/// # Arguments
211/// * `path` - File or directory to scan
212/// * `entropy_threshold` - Shannon entropy threshold for high-entropy strings (default: 4.5)
213/// * `include_test` - Whether to scan test files
214/// * `severity_filter` - Optional minimum severity to report
215///
216/// # Returns
217/// * `Ok(SecretsReport)` - Report with all findings
218/// * `Err(TldrError)` - On file system errors
219///
220/// # Example
221/// ```ignore
222/// use tldr_core::security::secrets::{scan_secrets, Severity};
223///
224/// // Scan with default settings
225/// let report = scan_secrets(Path::new("src/"), 4.5, false, None)?;
226///
227/// // Scan only for critical findings
228/// let report = scan_secrets(Path::new("src/"), 4.5, false, Some(Severity::Critical))?;
229/// ```
230pub fn scan_secrets(
231    path: &Path,
232    entropy_threshold: f64,
233    include_test: bool,
234    severity_filter: Option<Severity>,
235) -> TldrResult<SecretsReport> {
236    let mut findings = Vec::new();
237    let mut files_scanned = 0;
238
239    // Collect files to scan
240    let files: Vec<PathBuf> = if path.is_file() {
241        vec![path.to_path_buf()]
242    } else {
243        WalkDir::new(path)
244            .into_iter()
245            .filter_map(|e| e.ok())
246            .filter(|e| e.file_type().is_file())
247            .filter(|e| {
248                // Filter by extension (only scan text files)
249                let ext = e.path().extension().and_then(|e| e.to_str()).unwrap_or("");
250                matches!(
251                    ext,
252                    "py" | "js"
253                        | "ts"
254                        | "jsx"
255                        | "tsx"
256                        | "go"
257                        | "rs"
258                        | "java"
259                        | "rb"
260                        | "php"
261                        | "yaml"
262                        | "yml"
263                        | "json"
264                        | "toml"
265                        | "xml"
266                        | "env"
267                        | "sh"
268                        | "bash"
269                        | "zsh"
270                        | "config"
271                        | "cfg"
272                        | "conf"
273                        | "properties"
274                )
275            })
276            .filter(|e| {
277                // Skip test files unless requested
278                include_test || !TEST_FILE_PATTERNS.is_match(&e.path().to_string_lossy())
279            })
280            .map(|e| e.path().to_path_buf())
281            .collect()
282    };
283
284    // Scan each file
285    for file_path in &files {
286        if let Ok(file_findings) = scan_file(file_path, entropy_threshold) {
287            findings.extend(file_findings);
288            files_scanned += 1;
289        }
290    }
291
292    // Apply severity filter
293    if let Some(min_severity) = severity_filter {
294        findings.retain(|f| f.severity >= min_severity);
295    }
296
297    // Calculate summary
298    let mut by_severity: HashMap<String, usize> = HashMap::new();
299    let mut by_pattern: HashMap<String, usize> = HashMap::new();
300    for finding in &findings {
301        *by_severity.entry(finding.severity.to_string()).or_insert(0) += 1;
302        *by_pattern.entry(finding.pattern.clone()).or_insert(0) += 1;
303    }
304
305    let summary = SecretsSummary {
306        total_findings: findings.len(),
307        by_severity,
308        by_pattern,
309    };
310
311    Ok(SecretsReport {
312        findings,
313        files_scanned,
314        patterns_checked: SECRET_PATTERNS.len(),
315        summary,
316    })
317}
318
319// =============================================================================
320// Internal Implementation
321// =============================================================================
322
323/// Scan a single file for secrets
324fn scan_file(path: &Path, entropy_threshold: f64) -> TldrResult<Vec<SecretFinding>> {
325    let content = std::fs::read_to_string(path)?;
326    let mut findings = Vec::new();
327
328    for (line_num, line) in content.lines().enumerate() {
329        let line_num = (line_num + 1) as u32;
330
331        // Check each pattern
332        for pattern in SECRET_PATTERNS.iter() {
333            if let Some(mat) = pattern.pattern.find(line) {
334                // Skip placeholder/example values for generic patterns
335                if is_placeholder_pattern_match(line, pattern.name) {
336                    continue;
337                }
338                findings.push(SecretFinding {
339                    file: path.to_path_buf(),
340                    line: line_num,
341                    column: mat.start() as u32,
342                    pattern: pattern.name.to_string(),
343                    severity: pattern.severity,
344                    masked_value: mask_secret(mat.as_str()),
345                    description: pattern.description.to_string(),
346                    line_content: Some(truncate_line(line, 100)),
347                });
348            }
349        }
350
351        // Check for high-entropy strings
352        for word in extract_strings(line) {
353            if word.len() >= 16 && shannon_entropy(&word) > entropy_threshold {
354                // Skip if it looks like a common non-secret pattern
355                if !is_likely_false_positive(&word) {
356                    findings.push(SecretFinding {
357                        file: path.to_path_buf(),
358                        line: line_num,
359                        column: line.find(&word).unwrap_or(0) as u32,
360                        pattern: "High Entropy".to_string(),
361                        severity: Severity::Medium,
362                        masked_value: mask_secret(&word),
363                        description: format!(
364                            "High entropy string detected (entropy: {:.2})",
365                            shannon_entropy(&word)
366                        ),
367                        line_content: Some(truncate_line(line, 100)),
368                    });
369                }
370            }
371        }
372    }
373
374    Ok(findings)
375}
376
377/// Extract quoted strings from a line
378fn extract_strings(line: &str) -> Vec<String> {
379    let mut strings = Vec::new();
380    let re = Regex::new(r#"['"]([^'"]{8,})['"]"#).unwrap();
381
382    for cap in re.captures_iter(line) {
383        if let Some(m) = cap.get(1) {
384            strings.push(m.as_str().to_string());
385        }
386    }
387
388    strings
389}
390
391/// Calculate Shannon entropy of a string
392fn shannon_entropy(s: &str) -> f64 {
393    let len = s.len() as f64;
394    if len == 0.0 {
395        return 0.0;
396    }
397
398    let mut freq: HashMap<char, usize> = HashMap::new();
399    for c in s.chars() {
400        *freq.entry(c).or_insert(0) += 1;
401    }
402
403    freq.values()
404        .map(|&count| {
405            let p = count as f64 / len;
406            -p * p.log2()
407        })
408        .sum()
409}
410
411/// Check if a high-entropy string is likely a false positive
412fn is_likely_false_positive(s: &str) -> bool {
413    // Common non-secret patterns
414    let fp_patterns = [
415        // UUIDs
416        Regex::new(r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$").unwrap(),
417        // Hex hashes (SHA, MD5, etc.)
418        Regex::new(r"^[0-9a-fA-F]{32,}$").unwrap(),
419        // Base64 encoded common strings
420        Regex::new(r"^[A-Za-z0-9+/]+=*$").unwrap(),
421    ];
422
423    // Check if it matches a known false positive pattern
424    for pattern in &fp_patterns {
425        if pattern.is_match(s) {
426            return true;
427        }
428    }
429
430    // Check if it's all same character repeated
431    if s.chars().collect::<std::collections::HashSet<_>>().len() <= 2 {
432        return true;
433    }
434
435    // Check if it looks like a version string or date
436    if s.contains('.') && s.chars().filter(|c| *c == '.').count() >= 2 {
437        return true;
438    }
439
440    false
441}
442
443/// Generic pattern names eligible for placeholder filtering.
444///
445/// Only these broad-matching patterns can be suppressed when the matched value
446/// looks like a placeholder. Specific patterns (AWS, GitHub, Slack, etc.) are
447/// never suppressed because a structural match on those is high-confidence
448/// regardless of the value content.
449const GENERIC_PATTERN_NAMES: &[&str] = &["API Key", "Password", "Secret"];
450
451/// Uppercase words that indicate a placeholder value rather than a real secret.
452const PLACEHOLDER_WORDS: &[&str] = &[
453    "YOUR_",
454    "REPLACE",
455    "EXAMPLE",
456    "CHANGEME",
457    "FIXME",
458    "TODO",
459    "INSERT",
460    "PLACEHOLDER",
461];
462
463/// Characters that, when a value consists entirely of them (3+ chars), indicate filler.
464const FILLER_CHARS: &[char] = &['x', 'X', '*', '?', '0'];
465
466/// Check whether a pattern-based match on a line is a placeholder/example value.
467///
468/// Returns `true` (skip this finding) when ALL of:
469/// 1. `pattern_name` is one of the generic patterns ("API Key", "Password", "Secret")
470/// 2. The line contains an assignment (`=` or `:`) with a quoted value
471/// 3. The value contains a placeholder indicator (uppercase keyword, angle-bracket
472///    template, template marker, or repeated filler characters)
473///
474/// For specific patterns (AWS, GitHub, Private Key, etc.) this always returns `false`.
475fn is_placeholder_pattern_match(line: &str, pattern_name: &str) -> bool {
476    // Only filter generic patterns
477    if !GENERIC_PATTERN_NAMES.contains(&pattern_name) {
478        return false;
479    }
480
481    // Extract the value portion: find assignment operator, then quoted value
482    let value = match extract_assigned_value(line) {
483        Some(v) => v,
484        None => return false,
485    };
486
487    let upper = value.to_uppercase();
488
489    // Check uppercase placeholder words
490    for word in PLACEHOLDER_WORDS {
491        if upper.contains(word) {
492            return true;
493        }
494    }
495
496    // Check angle-bracket templates: <...>
497    if value.contains('<') && value.contains('>') {
498        return true;
499    }
500
501    // Check template markers: ${...} or {{...}}
502    if value.contains("${") || value.contains("{{") {
503        return true;
504    }
505
506    // Check repeated filler characters (strip non-filler chars like hyphens first)
507    let stripped: String = value.chars().filter(|c| *c != '-' && *c != '_').collect();
508    if stripped.len() >= 3 {
509        for &filler in FILLER_CHARS {
510            if stripped.chars().all(|c| c == filler) {
511                return true;
512            }
513        }
514    }
515
516    false
517}
518
519/// Extract the value portion from an assignment line.
520///
521/// Looks for `=` or `:` followed by a quoted string, and returns the content
522/// inside the quotes. Returns `None` if no assignment with a quoted value is found.
523fn extract_assigned_value(line: &str) -> Option<String> {
524    // Find the assignment operator
525    let after_op = if let Some(idx) = line.find('=') {
526        &line[idx + 1..]
527    } else if let Some(idx) = line.find(':') {
528        &line[idx + 1..]
529    } else {
530        return None;
531    };
532
533    // Find the first quoted string after the operator
534    let trimmed = after_op.trim();
535    let (quote, rest) = if let Some(stripped) = trimmed.strip_prefix('"') {
536        ('"', stripped)
537    } else if let Some(stripped) = trimmed.strip_prefix('\'') {
538        ('\'', stripped)
539    } else {
540        return None;
541    };
542
543    // Find the closing quote
544    rest.find(quote).map(|end| rest[..end].to_string())
545}
546
547/// Mask a secret value for safe display
548fn mask_secret(value: &str) -> String {
549    let len = value.len();
550    if len <= 8 {
551        return "*".repeat(len);
552    }
553
554    let visible = 4.min(len / 4);
555    format!(
556        "{}{}{}",
557        &value[..visible],
558        "*".repeat(len - visible * 2),
559        &value[len - visible..]
560    )
561}
562
563/// Truncate a line to max length
564fn truncate_line(line: &str, max_len: usize) -> String {
565    if line.len() <= max_len {
566        line.to_string()
567    } else {
568        // Find nearest char boundary to avoid UTF-8 panic
569        let mut end = max_len - 3;
570        while end > 0 && !line.is_char_boundary(end) {
571            end -= 1;
572        }
573        format!("{}...", &line[..end])
574    }
575}
576
577#[cfg(test)]
578mod tests {
579    use super::*;
580
581    #[test]
582    fn test_shannon_entropy() {
583        // Low entropy (repetitive)
584        assert!(shannon_entropy("aaaaaaaaaa") < 1.0);
585
586        // High entropy (random-looking)
587        assert!(shannon_entropy("aB3$kL9@mN2#") > 3.0);
588    }
589
590    #[test]
591    fn test_mask_secret() {
592        assert_eq!(mask_secret("short"), "*****");
593        assert_eq!(mask_secret("AKIAIOSFODNN7EXAMPLE"), "AKIA************MPLE");
594    }
595
596    #[test]
597    fn test_aws_key_pattern() {
598        let pattern = &SECRET_PATTERNS[0];
599        assert!(pattern.pattern.is_match("AKIAIOSFODNN7EXAMPLE"));
600        assert!(!pattern.pattern.is_match("AKIA")); // Too short
601    }
602
603    #[test]
604    fn test_private_key_pattern() {
605        let pattern = &SECRET_PATTERNS[2];
606        assert!(pattern.pattern.is_match("-----BEGIN RSA PRIVATE KEY-----"));
607        assert!(pattern.pattern.is_match("-----BEGIN PRIVATE KEY-----"));
608    }
609
610    #[test]
611    fn test_test_file_detection() {
612        assert!(TEST_FILE_PATTERNS.is_match("test_secrets.py"));
613        assert!(TEST_FILE_PATTERNS.is_match("secrets.test.js"));
614        assert!(TEST_FILE_PATTERNS.is_match("conftest.py"));
615        assert!(!TEST_FILE_PATTERNS.is_match("secrets.py"));
616    }
617
618    #[test]
619    fn test_severity_ordering() {
620        assert!(Severity::Critical > Severity::High);
621        assert!(Severity::High > Severity::Medium);
622        assert!(Severity::Medium > Severity::Low);
623    }
624
625    #[test]
626    fn test_extract_strings() {
627        let strings = extract_strings(r#"api_key = "sk-abcdefghijklmnop""#);
628        assert_eq!(strings.len(), 1);
629        assert_eq!(strings[0], "sk-abcdefghijklmnop");
630    }
631
632    // ---- is_placeholder_pattern_match tests ----
633
634    #[test]
635    fn test_placeholder_skips_generic_patterns_only() {
636        // Generic patterns ("API Key", "Password", "Secret") should be skippable
637        assert!(is_placeholder_pattern_match(
638            r#"API_KEY = "YOUR_API_KEY_HERE""#,
639            "API Key"
640        ));
641        assert!(is_placeholder_pattern_match(
642            r#"password = "REPLACE_ME""#,
643            "Password"
644        ));
645        assert!(is_placeholder_pattern_match(
646            r#"SECRET_TOKEN = "<your-secret-token>""#,
647            "Secret"
648        ));
649    }
650
651    #[test]
652    fn test_placeholder_never_skips_specific_patterns() {
653        // Specific patterns must never be skipped, even if value looks like placeholder
654        assert!(!is_placeholder_pattern_match(
655            r#"key = "YOUR_API_KEY_HERE""#,
656            "AWS Access Key"
657        ));
658        assert!(!is_placeholder_pattern_match(
659            r#"key = "YOUR_API_KEY_HERE""#,
660            "AWS Secret Key"
661        ));
662        assert!(!is_placeholder_pattern_match(
663            r#"key = "REPLACE_ME""#,
664            "GitHub Token"
665        ));
666        assert!(!is_placeholder_pattern_match(
667            r#"key = "REPLACE_ME""#,
668            "Private Key"
669        ));
670        assert!(!is_placeholder_pattern_match(
671            r#"key = "REPLACE_ME""#,
672            "Database URL"
673        ));
674        assert!(!is_placeholder_pattern_match(
675            r#"key = "REPLACE_ME""#,
676            "Slack Token"
677        ));
678        assert!(!is_placeholder_pattern_match(
679            r#"key = "REPLACE_ME""#,
680            "JWT"
681        ));
682        assert!(!is_placeholder_pattern_match(
683            r#"key = "REPLACE_ME""#,
684            "Bearer Token"
685        ));
686    }
687
688    #[test]
689    fn test_placeholder_uppercase_words() {
690        // YOUR_ prefix
691        assert!(is_placeholder_pattern_match(
692            r#"api_key = "YOUR_KEY_VALUE""#,
693            "API Key"
694        ));
695        // REPLACE
696        assert!(is_placeholder_pattern_match(
697            r#"secret = "REPLACE_THIS""#,
698            "Secret"
699        ));
700        // EXAMPLE
701        assert!(is_placeholder_pattern_match(
702            r#"api_key = "EXAMPLE_KEY_12345""#,
703            "API Key"
704        ));
705        // CHANGEME
706        assert!(is_placeholder_pattern_match(
707            r#"password = "CHANGEME""#,
708            "Password"
709        ));
710        // FIXME
711        assert!(is_placeholder_pattern_match(
712            r#"token = "FIXME_token""#,
713            "Secret"
714        ));
715        // TODO
716        assert!(is_placeholder_pattern_match(
717            r#"secret = "TODO_fill_this""#,
718            "Secret"
719        ));
720        // INSERT
721        assert!(is_placeholder_pattern_match(
722            r#"password = "INSERT_PASSWORD""#,
723            "Password"
724        ));
725        // PLACEHOLDER
726        assert!(is_placeholder_pattern_match(
727            r#"token = "PLACEHOLDER_value""#,
728            "Secret"
729        ));
730    }
731
732    #[test]
733    fn test_placeholder_angle_bracket_templates() {
734        assert!(is_placeholder_pattern_match(
735            r#"password = "<password>""#,
736            "Password"
737        ));
738        assert!(is_placeholder_pattern_match(
739            r#"secret = "<your-api-key>""#,
740            "Secret"
741        ));
742        assert!(is_placeholder_pattern_match(
743            r#"token = "<insert-token-here>""#,
744            "Secret"
745        ));
746    }
747
748    #[test]
749    fn test_placeholder_template_markers() {
750        // ${...} style
751        assert!(is_placeholder_pattern_match(
752            r#"secret = "${SECRET_TOKEN}""#,
753            "Secret"
754        ));
755        // {{...}} style
756        assert!(is_placeholder_pattern_match(
757            r#"password = "{{vault.password}}""#,
758            "Password"
759        ));
760    }
761
762    #[test]
763    fn test_placeholder_repeated_filler_chars() {
764        // All x's
765        assert!(is_placeholder_pattern_match(
766            r#"token = "xxx-xxx-xxx""#,
767            "Secret"
768        ));
769        // All *'s
770        assert!(is_placeholder_pattern_match(
771            r#"password = "********""#,
772            "Password"
773        ));
774        // All ?'s
775        assert!(is_placeholder_pattern_match(
776            r#"secret = "????????""#,
777            "Secret"
778        ));
779        // All 0's
780        assert!(is_placeholder_pattern_match(
781            r#"token = "0000000000""#,
782            "Secret"
783        ));
784        // Too short (2 chars) should not match filler
785        assert!(!is_placeholder_pattern_match(
786            r#"password = "xx""#,
787            "Password"
788        ));
789    }
790
791    #[test]
792    fn test_placeholder_real_secrets_not_skipped() {
793        // Real high-entropy secrets must NOT be treated as placeholders
794        assert!(!is_placeholder_pattern_match(
795            r#"api_key = "a3f8b2c1d4e5f6789012345678abcdef""#,
796            "API Key"
797        ));
798        assert!(!is_placeholder_pattern_match(
799            r#"password = "S3cur3P@ssw0rd!2024""#,
800            "Password"
801        ));
802        assert!(!is_placeholder_pattern_match(
803            r#"secret = "K8mPqR3sT7uVwX2yZ4aBcDeFgHjKm""#,
804            "Secret"
805        ));
806    }
807
808    #[test]
809    fn test_placeholder_no_value_portion() {
810        // Lines without = or : should not be skipped (no value to extract)
811        assert!(!is_placeholder_pattern_match(
812            r#"echo "YOUR_API_KEY_HERE""#,
813            "API Key"
814        ));
815    }
816
817    #[test]
818    fn test_placeholder_scan_file_integration() {
819        // Integration: scan_file should filter out placeholder findings
820        use std::io::Write;
821        let dir = std::env::temp_dir().join("tldr_test_placeholder");
822        std::fs::create_dir_all(&dir).unwrap();
823        let file = dir.join("config_template.py");
824        {
825            let mut f = std::fs::File::create(&file).unwrap();
826            writeln!(f, r#"SECRET = "REPLACE_ME""#).unwrap();
827            writeln!(f, r#"TOKEN = "xxx-xxx-xxx""#).unwrap();
828            writeln!(f, r#"PASSWORD = "<password>""#).unwrap();
829        }
830        let findings = scan_file(&file, 4.5).unwrap();
831        let secret_findings: Vec<_> = findings
832            .iter()
833            .filter(|f| f.pattern == "Secret" || f.pattern == "Password" || f.pattern == "API Key")
834            .collect();
835        assert!(
836            secret_findings.is_empty(),
837            "Placeholder values should produce 0 pattern findings, got {}: {:?}",
838            secret_findings.len(),
839            secret_findings
840                .iter()
841                .map(|f| format!("{}: {}", f.line, f.pattern))
842                .collect::<Vec<_>>()
843        );
844        std::fs::remove_dir_all(&dir).ok();
845    }
846
847    #[test]
848    fn test_real_secrets_still_detected_after_placeholder_filter() {
849        // Integration: real secrets must still be found
850        use std::io::Write;
851        let dir = std::env::temp_dir().join("tldr_test_real_secrets");
852        std::fs::create_dir_all(&dir).unwrap();
853        let file = dir.join("config.py");
854        {
855            let mut f = std::fs::File::create(&file).unwrap();
856            writeln!(
857                f,
858                r#"secret = "K8mPqR3sT7uVwX2yZ4aBcDeFgHjKmNpQr""#
859            )
860            .unwrap();
861            writeln!(f, r#"password = "S3cur3P@ssw0rd!2024""#).unwrap();
862        }
863        let findings = scan_file(&file, 4.5).unwrap();
864        let secret_findings: Vec<_> = findings
865            .iter()
866            .filter(|f| f.pattern == "Secret" || f.pattern == "Password")
867            .collect();
868        assert!(
869            !secret_findings.is_empty(),
870            "Real secrets must still be detected after placeholder filter"
871        );
872        std::fs::remove_dir_all(&dir).ok();
873    }
874}