1use std::collections::HashMap;
20use std::path::{Path, PathBuf};
21
22use regex::Regex;
23use serde::{Deserialize, Serialize};
24use walkdir::WalkDir;
25
26use crate::TldrResult;
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
34#[serde(rename_all = "lowercase")]
35pub enum Severity {
36 Low,
38 Medium,
40 High,
42 Critical,
44}
45
46impl std::fmt::Display for Severity {
47 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
48 match self {
49 Severity::Low => write!(f, "LOW"),
50 Severity::Medium => write!(f, "MEDIUM"),
51 Severity::High => write!(f, "HIGH"),
52 Severity::Critical => write!(f, "CRITICAL"),
53 }
54 }
55}
56
57#[derive(Debug, Clone)]
59struct SecretPattern {
60 name: &'static str,
61 pattern: Regex,
62 severity: Severity,
63 description: &'static str,
64}
65
66#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct SecretFinding {
69 pub file: PathBuf,
71 pub line: u32,
73 pub column: u32,
75 pub pattern: String,
77 pub severity: Severity,
79 pub masked_value: String,
81 pub description: String,
83 #[serde(skip_serializing_if = "Option::is_none")]
85 pub line_content: Option<String>,
86}
87
88#[derive(Debug, Clone, Serialize, Deserialize)]
90pub struct SecretsSummary {
91 pub total_findings: usize,
93 pub by_severity: HashMap<String, usize>,
95 pub by_pattern: HashMap<String, usize>,
97}
98
99#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct SecretsReport {
102 pub findings: Vec<SecretFinding>,
104 pub files_scanned: usize,
106 pub patterns_checked: usize,
108 pub summary: SecretsSummary,
110}
111
112lazy_static::lazy_static! {
117 static ref SECRET_PATTERNS: Vec<SecretPattern> = vec![
119 SecretPattern {
121 name: "AWS Access Key",
122 pattern: Regex::new(r"AKIA[0-9A-Z]{16}").unwrap(),
123 severity: Severity::Critical,
124 description: "AWS Access Key ID detected",
125 },
126 SecretPattern {
128 name: "AWS Secret Key",
129 pattern: Regex::new(r#"(?i)aws(.{0,20})?['"][0-9a-zA-Z/+]{40}['"]"#).unwrap(),
130 severity: Severity::Critical,
131 description: "AWS Secret Access Key detected",
132 },
133 SecretPattern {
135 name: "Private Key",
136 pattern: Regex::new(r"-----BEGIN\s*(RSA |EC |DSA |OPENSSH |PGP )?PRIVATE KEY-----").unwrap(),
137 severity: Severity::Critical,
138 description: "Private key header detected",
139 },
140 SecretPattern {
142 name: "GitHub Token",
143 pattern: Regex::new(r"gh[pousr]_[A-Za-z0-9_]{36,}").unwrap(),
144 severity: Severity::Critical,
145 description: "GitHub personal access token detected",
146 },
147 SecretPattern {
149 name: "API Key",
150 pattern: Regex::new(r#"(?i)(api[_-]?key|apikey)\s*[:=]\s*['"]\s*[a-zA-Z0-9]{20,}['"]\s*"#).unwrap(),
151 severity: Severity::High,
152 description: "Generic API key pattern detected",
153 },
154 SecretPattern {
156 name: "Password",
157 pattern: Regex::new(r#"(?i)(password|passwd|pwd)\s*[:=]\s*['"][^'"]{4,}['"]"#).unwrap(),
158 severity: Severity::High,
159 description: "Hardcoded password detected",
160 },
161 SecretPattern {
163 name: "Secret",
164 pattern: Regex::new(r#"(?i)(secret|token)\s*[:=]\s*['"][^'"]{8,}['"]"#).unwrap(),
165 severity: Severity::High,
166 description: "Hardcoded secret/token detected",
167 },
168 SecretPattern {
170 name: "Database URL",
171 pattern: Regex::new(r"(?i)(postgres|mysql|mongodb|redis)://[^:]+:[^@]+@").unwrap(),
172 severity: Severity::High,
173 description: "Database URL with credentials detected",
174 },
175 SecretPattern {
177 name: "Slack Token",
178 pattern: Regex::new(r"xox[baprs]-[0-9]{10,13}-[0-9]{10,13}[a-zA-Z0-9-]*").unwrap(),
179 severity: Severity::Critical,
180 description: "Slack token detected",
181 },
182 SecretPattern {
184 name: "JWT",
185 pattern: Regex::new(r"eyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]*").unwrap(),
186 severity: Severity::Medium,
187 description: "JSON Web Token detected",
188 },
189 SecretPattern {
191 name: "Bearer Token",
192 pattern: Regex::new(r#"(?i)bearer\s+[a-zA-Z0-9_\-\.]+[a-zA-Z0-9_\-\.]"#).unwrap(),
193 severity: Severity::Medium,
194 description: "Bearer token in header detected",
195 },
196 ];
197
198 static ref TEST_FILE_PATTERNS: Regex = Regex::new(
200 r"(?i)(test[_/]|_test\.|\.test\.|spec[_/]|_spec\.|\.spec\.|conftest|fixture|mock)"
201 ).unwrap();
202}
203
204pub fn scan_secrets(
231 path: &Path,
232 entropy_threshold: f64,
233 include_test: bool,
234 severity_filter: Option<Severity>,
235) -> TldrResult<SecretsReport> {
236 let mut findings = Vec::new();
237 let mut files_scanned = 0;
238
239 let files: Vec<PathBuf> = if path.is_file() {
241 vec![path.to_path_buf()]
242 } else {
243 WalkDir::new(path)
244 .into_iter()
245 .filter_map(|e| e.ok())
246 .filter(|e| e.file_type().is_file())
247 .filter(|e| {
248 let ext = e.path().extension().and_then(|e| e.to_str()).unwrap_or("");
250 matches!(
251 ext,
252 "py" | "js"
253 | "ts"
254 | "jsx"
255 | "tsx"
256 | "go"
257 | "rs"
258 | "java"
259 | "rb"
260 | "php"
261 | "yaml"
262 | "yml"
263 | "json"
264 | "toml"
265 | "xml"
266 | "env"
267 | "sh"
268 | "bash"
269 | "zsh"
270 | "config"
271 | "cfg"
272 | "conf"
273 | "properties"
274 )
275 })
276 .filter(|e| {
277 include_test || !TEST_FILE_PATTERNS.is_match(&e.path().to_string_lossy())
279 })
280 .map(|e| e.path().to_path_buf())
281 .collect()
282 };
283
284 for file_path in &files {
286 if let Ok(file_findings) = scan_file(file_path, entropy_threshold) {
287 findings.extend(file_findings);
288 files_scanned += 1;
289 }
290 }
291
292 if let Some(min_severity) = severity_filter {
294 findings.retain(|f| f.severity >= min_severity);
295 }
296
297 let mut by_severity: HashMap<String, usize> = HashMap::new();
299 let mut by_pattern: HashMap<String, usize> = HashMap::new();
300 for finding in &findings {
301 *by_severity.entry(finding.severity.to_string()).or_insert(0) += 1;
302 *by_pattern.entry(finding.pattern.clone()).or_insert(0) += 1;
303 }
304
305 let summary = SecretsSummary {
306 total_findings: findings.len(),
307 by_severity,
308 by_pattern,
309 };
310
311 Ok(SecretsReport {
312 findings,
313 files_scanned,
314 patterns_checked: SECRET_PATTERNS.len(),
315 summary,
316 })
317}
318
319fn scan_file(path: &Path, entropy_threshold: f64) -> TldrResult<Vec<SecretFinding>> {
325 let content = std::fs::read_to_string(path)?;
326 let mut findings = Vec::new();
327
328 for (line_num, line) in content.lines().enumerate() {
329 let line_num = (line_num + 1) as u32;
330
331 for pattern in SECRET_PATTERNS.iter() {
333 if let Some(mat) = pattern.pattern.find(line) {
334 if is_placeholder_pattern_match(line, pattern.name) {
336 continue;
337 }
338 findings.push(SecretFinding {
339 file: path.to_path_buf(),
340 line: line_num,
341 column: mat.start() as u32,
342 pattern: pattern.name.to_string(),
343 severity: pattern.severity,
344 masked_value: mask_secret(mat.as_str()),
345 description: pattern.description.to_string(),
346 line_content: Some(truncate_line(line, 100)),
347 });
348 }
349 }
350
351 for word in extract_strings(line) {
353 if word.len() >= 16 && shannon_entropy(&word) > entropy_threshold {
354 if !is_likely_false_positive(&word) {
356 findings.push(SecretFinding {
357 file: path.to_path_buf(),
358 line: line_num,
359 column: line.find(&word).unwrap_or(0) as u32,
360 pattern: "High Entropy".to_string(),
361 severity: Severity::Medium,
362 masked_value: mask_secret(&word),
363 description: format!(
364 "High entropy string detected (entropy: {:.2})",
365 shannon_entropy(&word)
366 ),
367 line_content: Some(truncate_line(line, 100)),
368 });
369 }
370 }
371 }
372 }
373
374 Ok(findings)
375}
376
377fn extract_strings(line: &str) -> Vec<String> {
379 let mut strings = Vec::new();
380 let re = Regex::new(r#"['"]([^'"]{8,})['"]"#).unwrap();
381
382 for cap in re.captures_iter(line) {
383 if let Some(m) = cap.get(1) {
384 strings.push(m.as_str().to_string());
385 }
386 }
387
388 strings
389}
390
391fn shannon_entropy(s: &str) -> f64 {
393 let len = s.len() as f64;
394 if len == 0.0 {
395 return 0.0;
396 }
397
398 let mut freq: HashMap<char, usize> = HashMap::new();
399 for c in s.chars() {
400 *freq.entry(c).or_insert(0) += 1;
401 }
402
403 freq.values()
404 .map(|&count| {
405 let p = count as f64 / len;
406 -p * p.log2()
407 })
408 .sum()
409}
410
411fn is_likely_false_positive(s: &str) -> bool {
413 let fp_patterns = [
415 Regex::new(r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$").unwrap(),
417 Regex::new(r"^[0-9a-fA-F]{32,}$").unwrap(),
419 Regex::new(r"^[A-Za-z0-9+/]+=*$").unwrap(),
421 ];
422
423 for pattern in &fp_patterns {
425 if pattern.is_match(s) {
426 return true;
427 }
428 }
429
430 if s.chars().collect::<std::collections::HashSet<_>>().len() <= 2 {
432 return true;
433 }
434
435 if s.contains('.') && s.chars().filter(|c| *c == '.').count() >= 2 {
437 return true;
438 }
439
440 false
441}
442
443const GENERIC_PATTERN_NAMES: &[&str] = &["API Key", "Password", "Secret"];
450
451const PLACEHOLDER_WORDS: &[&str] = &[
453 "YOUR_",
454 "REPLACE",
455 "EXAMPLE",
456 "CHANGEME",
457 "FIXME",
458 "TODO",
459 "INSERT",
460 "PLACEHOLDER",
461];
462
463const FILLER_CHARS: &[char] = &['x', 'X', '*', '?', '0'];
465
466fn is_placeholder_pattern_match(line: &str, pattern_name: &str) -> bool {
476 if !GENERIC_PATTERN_NAMES.contains(&pattern_name) {
478 return false;
479 }
480
481 let value = match extract_assigned_value(line) {
483 Some(v) => v,
484 None => return false,
485 };
486
487 let upper = value.to_uppercase();
488
489 for word in PLACEHOLDER_WORDS {
491 if upper.contains(word) {
492 return true;
493 }
494 }
495
496 if value.contains('<') && value.contains('>') {
498 return true;
499 }
500
501 if value.contains("${") || value.contains("{{") {
503 return true;
504 }
505
506 let stripped: String = value.chars().filter(|c| *c != '-' && *c != '_').collect();
508 if stripped.len() >= 3 {
509 for &filler in FILLER_CHARS {
510 if stripped.chars().all(|c| c == filler) {
511 return true;
512 }
513 }
514 }
515
516 false
517}
518
519fn extract_assigned_value(line: &str) -> Option<String> {
524 let after_op = if let Some(idx) = line.find('=') {
526 &line[idx + 1..]
527 } else if let Some(idx) = line.find(':') {
528 &line[idx + 1..]
529 } else {
530 return None;
531 };
532
533 let trimmed = after_op.trim();
535 let (quote, rest) = if let Some(stripped) = trimmed.strip_prefix('"') {
536 ('"', stripped)
537 } else if let Some(stripped) = trimmed.strip_prefix('\'') {
538 ('\'', stripped)
539 } else {
540 return None;
541 };
542
543 rest.find(quote).map(|end| rest[..end].to_string())
545}
546
547fn mask_secret(value: &str) -> String {
549 let len = value.len();
550 if len <= 8 {
551 return "*".repeat(len);
552 }
553
554 let visible = 4.min(len / 4);
555 format!(
556 "{}{}{}",
557 &value[..visible],
558 "*".repeat(len - visible * 2),
559 &value[len - visible..]
560 )
561}
562
563fn truncate_line(line: &str, max_len: usize) -> String {
565 if line.len() <= max_len {
566 line.to_string()
567 } else {
568 let mut end = max_len - 3;
570 while end > 0 && !line.is_char_boundary(end) {
571 end -= 1;
572 }
573 format!("{}...", &line[..end])
574 }
575}
576
577#[cfg(test)]
578mod tests {
579 use super::*;
580
581 #[test]
582 fn test_shannon_entropy() {
583 assert!(shannon_entropy("aaaaaaaaaa") < 1.0);
585
586 assert!(shannon_entropy("aB3$kL9@mN2#") > 3.0);
588 }
589
590 #[test]
591 fn test_mask_secret() {
592 assert_eq!(mask_secret("short"), "*****");
593 assert_eq!(mask_secret("AKIAIOSFODNN7EXAMPLE"), "AKIA************MPLE");
594 }
595
596 #[test]
597 fn test_aws_key_pattern() {
598 let pattern = &SECRET_PATTERNS[0];
599 assert!(pattern.pattern.is_match("AKIAIOSFODNN7EXAMPLE"));
600 assert!(!pattern.pattern.is_match("AKIA")); }
602
603 #[test]
604 fn test_private_key_pattern() {
605 let pattern = &SECRET_PATTERNS[2];
606 assert!(pattern.pattern.is_match("-----BEGIN RSA PRIVATE KEY-----"));
607 assert!(pattern.pattern.is_match("-----BEGIN PRIVATE KEY-----"));
608 }
609
610 #[test]
611 fn test_test_file_detection() {
612 assert!(TEST_FILE_PATTERNS.is_match("test_secrets.py"));
613 assert!(TEST_FILE_PATTERNS.is_match("secrets.test.js"));
614 assert!(TEST_FILE_PATTERNS.is_match("conftest.py"));
615 assert!(!TEST_FILE_PATTERNS.is_match("secrets.py"));
616 }
617
618 #[test]
619 fn test_severity_ordering() {
620 assert!(Severity::Critical > Severity::High);
621 assert!(Severity::High > Severity::Medium);
622 assert!(Severity::Medium > Severity::Low);
623 }
624
625 #[test]
626 fn test_extract_strings() {
627 let strings = extract_strings(r#"api_key = "sk-abcdefghijklmnop""#);
628 assert_eq!(strings.len(), 1);
629 assert_eq!(strings[0], "sk-abcdefghijklmnop");
630 }
631
632 #[test]
635 fn test_placeholder_skips_generic_patterns_only() {
636 assert!(is_placeholder_pattern_match(
638 r#"API_KEY = "YOUR_API_KEY_HERE""#,
639 "API Key"
640 ));
641 assert!(is_placeholder_pattern_match(
642 r#"password = "REPLACE_ME""#,
643 "Password"
644 ));
645 assert!(is_placeholder_pattern_match(
646 r#"SECRET_TOKEN = "<your-secret-token>""#,
647 "Secret"
648 ));
649 }
650
651 #[test]
652 fn test_placeholder_never_skips_specific_patterns() {
653 assert!(!is_placeholder_pattern_match(
655 r#"key = "YOUR_API_KEY_HERE""#,
656 "AWS Access Key"
657 ));
658 assert!(!is_placeholder_pattern_match(
659 r#"key = "YOUR_API_KEY_HERE""#,
660 "AWS Secret Key"
661 ));
662 assert!(!is_placeholder_pattern_match(
663 r#"key = "REPLACE_ME""#,
664 "GitHub Token"
665 ));
666 assert!(!is_placeholder_pattern_match(
667 r#"key = "REPLACE_ME""#,
668 "Private Key"
669 ));
670 assert!(!is_placeholder_pattern_match(
671 r#"key = "REPLACE_ME""#,
672 "Database URL"
673 ));
674 assert!(!is_placeholder_pattern_match(
675 r#"key = "REPLACE_ME""#,
676 "Slack Token"
677 ));
678 assert!(!is_placeholder_pattern_match(
679 r#"key = "REPLACE_ME""#,
680 "JWT"
681 ));
682 assert!(!is_placeholder_pattern_match(
683 r#"key = "REPLACE_ME""#,
684 "Bearer Token"
685 ));
686 }
687
688 #[test]
689 fn test_placeholder_uppercase_words() {
690 assert!(is_placeholder_pattern_match(
692 r#"api_key = "YOUR_KEY_VALUE""#,
693 "API Key"
694 ));
695 assert!(is_placeholder_pattern_match(
697 r#"secret = "REPLACE_THIS""#,
698 "Secret"
699 ));
700 assert!(is_placeholder_pattern_match(
702 r#"api_key = "EXAMPLE_KEY_12345""#,
703 "API Key"
704 ));
705 assert!(is_placeholder_pattern_match(
707 r#"password = "CHANGEME""#,
708 "Password"
709 ));
710 assert!(is_placeholder_pattern_match(
712 r#"token = "FIXME_token""#,
713 "Secret"
714 ));
715 assert!(is_placeholder_pattern_match(
717 r#"secret = "TODO_fill_this""#,
718 "Secret"
719 ));
720 assert!(is_placeholder_pattern_match(
722 r#"password = "INSERT_PASSWORD""#,
723 "Password"
724 ));
725 assert!(is_placeholder_pattern_match(
727 r#"token = "PLACEHOLDER_value""#,
728 "Secret"
729 ));
730 }
731
732 #[test]
733 fn test_placeholder_angle_bracket_templates() {
734 assert!(is_placeholder_pattern_match(
735 r#"password = "<password>""#,
736 "Password"
737 ));
738 assert!(is_placeholder_pattern_match(
739 r#"secret = "<your-api-key>""#,
740 "Secret"
741 ));
742 assert!(is_placeholder_pattern_match(
743 r#"token = "<insert-token-here>""#,
744 "Secret"
745 ));
746 }
747
748 #[test]
749 fn test_placeholder_template_markers() {
750 assert!(is_placeholder_pattern_match(
752 r#"secret = "${SECRET_TOKEN}""#,
753 "Secret"
754 ));
755 assert!(is_placeholder_pattern_match(
757 r#"password = "{{vault.password}}""#,
758 "Password"
759 ));
760 }
761
762 #[test]
763 fn test_placeholder_repeated_filler_chars() {
764 assert!(is_placeholder_pattern_match(
766 r#"token = "xxx-xxx-xxx""#,
767 "Secret"
768 ));
769 assert!(is_placeholder_pattern_match(
771 r#"password = "********""#,
772 "Password"
773 ));
774 assert!(is_placeholder_pattern_match(
776 r#"secret = "????????""#,
777 "Secret"
778 ));
779 assert!(is_placeholder_pattern_match(
781 r#"token = "0000000000""#,
782 "Secret"
783 ));
784 assert!(!is_placeholder_pattern_match(
786 r#"password = "xx""#,
787 "Password"
788 ));
789 }
790
791 #[test]
792 fn test_placeholder_real_secrets_not_skipped() {
793 assert!(!is_placeholder_pattern_match(
795 r#"api_key = "a3f8b2c1d4e5f6789012345678abcdef""#,
796 "API Key"
797 ));
798 assert!(!is_placeholder_pattern_match(
799 r#"password = "S3cur3P@ssw0rd!2024""#,
800 "Password"
801 ));
802 assert!(!is_placeholder_pattern_match(
803 r#"secret = "K8mPqR3sT7uVwX2yZ4aBcDeFgHjKm""#,
804 "Secret"
805 ));
806 }
807
808 #[test]
809 fn test_placeholder_no_value_portion() {
810 assert!(!is_placeholder_pattern_match(
812 r#"echo "YOUR_API_KEY_HERE""#,
813 "API Key"
814 ));
815 }
816
817 #[test]
818 fn test_placeholder_scan_file_integration() {
819 use std::io::Write;
821 let dir = std::env::temp_dir().join("tldr_test_placeholder");
822 std::fs::create_dir_all(&dir).unwrap();
823 let file = dir.join("config_template.py");
824 {
825 let mut f = std::fs::File::create(&file).unwrap();
826 writeln!(f, r#"SECRET = "REPLACE_ME""#).unwrap();
827 writeln!(f, r#"TOKEN = "xxx-xxx-xxx""#).unwrap();
828 writeln!(f, r#"PASSWORD = "<password>""#).unwrap();
829 }
830 let findings = scan_file(&file, 4.5).unwrap();
831 let secret_findings: Vec<_> = findings
832 .iter()
833 .filter(|f| f.pattern == "Secret" || f.pattern == "Password" || f.pattern == "API Key")
834 .collect();
835 assert!(
836 secret_findings.is_empty(),
837 "Placeholder values should produce 0 pattern findings, got {}: {:?}",
838 secret_findings.len(),
839 secret_findings
840 .iter()
841 .map(|f| format!("{}: {}", f.line, f.pattern))
842 .collect::<Vec<_>>()
843 );
844 std::fs::remove_dir_all(&dir).ok();
845 }
846
847 #[test]
848 fn test_real_secrets_still_detected_after_placeholder_filter() {
849 use std::io::Write;
851 let dir = std::env::temp_dir().join("tldr_test_real_secrets");
852 std::fs::create_dir_all(&dir).unwrap();
853 let file = dir.join("config.py");
854 {
855 let mut f = std::fs::File::create(&file).unwrap();
856 writeln!(
857 f,
858 r#"secret = "K8mPqR3sT7uVwX2yZ4aBcDeFgHjKmNpQr""#
859 )
860 .unwrap();
861 writeln!(f, r#"password = "S3cur3P@ssw0rd!2024""#).unwrap();
862 }
863 let findings = scan_file(&file, 4.5).unwrap();
864 let secret_findings: Vec<_> = findings
865 .iter()
866 .filter(|f| f.pattern == "Secret" || f.pattern == "Password")
867 .collect();
868 assert!(
869 !secret_findings.is_empty(),
870 "Real secrets must still be detected after placeholder filter"
871 );
872 std::fs::remove_dir_all(&dir).ok();
873 }
874}