1use anyhow::Result;
33use ignore::WalkBuilder;
34use parking_lot::Mutex;
35use rayon::prelude::*;
36use regex::Regex;
37use std::fs;
38use std::path::{Path, PathBuf};
39use std::sync::Arc;
40use std::time::{Duration, Instant};
41
42#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
48pub enum RiskLevel {
49 Low = 1,
51 Medium = 2,
53 High = 3,
55 Critical = 4,
57}
58
59impl RiskLevel {
60 pub fn multiplier(&self) -> usize {
62 match self {
63 RiskLevel::Low => 1,
64 RiskLevel::Medium => 2,
65 RiskLevel::High => 3,
66 RiskLevel::Critical => 4,
67 }
68 }
69}
70
71pub const KNOWN_TOKEN_PREFIXES: &[(&str, &str)] = &[
73 ("ghp_", "GitHub Personal Access Token"),
75 ("gho_", "GitHub OAuth Token"),
76 ("ghu_", "GitHub User-to-Server Token"),
77 ("ghs_", "GitHub Server-to-Server Token"),
78 ("ghr_", "GitHub Refresh Token"),
79 ("AKIA", "AWS Access Key ID"),
81 ("ABIA", "AWS STS Token"),
82 ("ACCA", "AWS Context-specific Credential"),
83 ("ASIA", "AWS Temporary Access Key"),
84 ("xoxb-", "Slack Bot Token"),
86 ("xoxp-", "Slack User Token"),
87 ("xoxa-", "Slack App Token"),
88 ("xoxr-", "Slack Refresh Token"),
89 ("sk_live_", "Stripe Live Secret Key"),
91 ("sk_test_", "Stripe Test Secret Key"),
92 ("pk_live_", "Stripe Live Publishable Key"),
93 ("rk_live_", "Stripe Live Restricted Key"),
94 ("sk-", "OpenAI API Key"),
96 ("sk-ant-", "Anthropic API Key"),
98 ("AIza", "Google API Key"),
100 ("hf_", "Hugging Face Token"),
102 ("npm_", "npm Access Token"),
104 ("pypi-", "PyPI API Token"),
106 ("NDc", "Discord Bot Token (Base64)"),
108 ("MTk", "Discord Bot Token (Base64)"),
109 ("bot", "Telegram Bot Token"),
111 ("SK", "Twilio API Key"),
113 ("SG.", "SendGrid API Key"),
115 ("key-", "Mailgun API Key"),
117 ("dop_v1_", "DigitalOcean Personal Access Token"),
119 ("doo_v1_", "DigitalOcean OAuth Token"),
120 ("vercel_", "Vercel Token"),
122 ("sbp_", "Supabase Token"),
124 ("pscale_", "PlanetScale Token"),
126 ("railway_", "Railway Token"),
128 ("rnd_", "Render Token"),
130 ("netlify_", "Netlify Token"),
132];
133
134const CRITICAL_FILE_PATTERNS: &[&str] = &[
136 ".env",
137 ".env.local",
138 ".env.development",
139 ".env.production",
140 ".env.staging",
141 ".envrc",
142 "secrets",
143 "credentials",
144 ".secrets",
145 ".credentials",
146 "id_rsa",
147 "id_ed25519",
148 ".pem",
149 ".key",
150 ".p12",
151 ".pfx",
152 ".htpasswd",
153 ".netrc",
154 ".npmrc",
155 ".pypirc",
156 ".dockerconfigjson",
157 "service_account",
158 "serviceaccount",
159];
160
161const HIGH_RISK_FILE_PATTERNS: &[&str] = &[
163 "docker-compose",
164 "dockerfile",
165 "terraform.tfvars",
166 "terraform.tfstate",
167 ".tfvars",
168 "ansible",
169 "vault",
170 "consul",
171 "kubernetes",
172 "k8s",
173 "helm",
174 "kustomize",
175 "application.yml",
176 "application.yaml",
177 "application.properties",
178 "appsettings.json",
179 "config.yml",
180 "config.yaml",
181 "config.json",
182 "settings.yml",
183 "settings.yaml",
184 "settings.json",
185 "parameters.yml",
186 "parameters.yaml",
187 "database.yml",
188];
189
190const MEDIUM_RISK_EXTENSIONS: &[&str] = &[
192 "yml",
193 "yaml",
194 "toml",
195 "ini",
196 "cfg",
197 "conf",
198 "config",
199 "properties",
200];
201
202#[derive(Debug, Clone)]
208pub struct AnalyzerConfig {
209 pub max_files: usize,
211 pub max_file_size: u64,
213 pub timeout_ms: u64,
215 pub follow_symlinks: bool,
217 pub include_hidden: bool,
219 pub extensions: Vec<String>,
221 pub ignore_dirs: Vec<String>,
223 pub num_threads: usize,
225}
226
227impl Default for AnalyzerConfig {
228 fn default() -> Self {
229 Self {
230 max_files: 10_000,
231 max_file_size: 10 * 1024 * 1024, timeout_ms: 30_000, follow_symlinks: false,
234 include_hidden: false,
235 extensions: vec![],
236 ignore_dirs: vec![
237 "node_modules".into(),
238 "target".into(),
239 ".git".into(),
240 "__pycache__".into(),
241 "venv".into(),
242 ".venv".into(),
243 "dist".into(),
244 "build".into(),
245 ".cache".into(),
246 ],
247 num_threads: 0, }
249 }
250}
251
252impl AnalyzerConfig {
253 pub fn fast() -> Self {
255 Self {
256 max_files: 1_000,
257 max_file_size: 1024 * 1024, timeout_ms: 5_000, ..Default::default()
260 }
261 }
262
263 pub fn thorough() -> Self {
265 Self {
266 max_files: 0, max_file_size: 50 * 1024 * 1024, timeout_ms: 120_000, include_hidden: true,
270 ..Default::default()
271 }
272 }
273
274 fn default_extensions() -> Vec<&'static str> {
276 vec![
277 "py", "js", "ts", "jsx", "tsx", "rs", "go", "rb", "java", "kt", "swift", "c", "cpp",
278 "h", "hpp", "cs", "php", "sh", "bash", "zsh", "fish", "yaml", "yml", "json", "toml",
279 "env", "conf", "cfg", "ini", "md", "txt", "sql", "graphql", "prisma",
280 ]
281 }
282}
283
284#[derive(Debug, Clone, PartialEq, Eq)]
290pub enum ExposureType {
291 HardcodedValue,
293 LoggedOutput,
295 EnvironmentFile,
297 ConfigFile,
299 HighEntropy,
301 KnownTokenPrefix(String),
303}
304
305impl std::fmt::Display for ExposureType {
306 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
307 match self {
308 ExposureType::HardcodedValue => write!(f, "Hardcoded value"),
309 ExposureType::LoggedOutput => write!(f, "Logged/printed"),
310 ExposureType::EnvironmentFile => write!(f, "In .env file"),
311 ExposureType::ConfigFile => write!(f, "In config file"),
312 ExposureType::HighEntropy => write!(f, "High entropy (real secret)"),
313 ExposureType::KnownTokenPrefix(prefix) => write!(f, "Known prefix: {}", prefix),
314 }
315 }
316}
317
318#[derive(Debug, Clone)]
320pub struct ExposureDetail {
321 pub line: usize,
323 pub exposure_type: ExposureType,
325 pub context: String,
327}
328
329#[derive(Debug, Clone)]
331pub struct FileAnalysis {
332 pub path: PathBuf,
334 pub call_count: usize,
336 pub has_exposure: bool,
338 pub risk_level: RiskLevel,
340 pub risk_score: usize,
342 pub exposures: Vec<ExposureDetail>,
344 pub exposure_lines: Vec<usize>,
346 pub occurrence_lines: Vec<usize>,
348}
349
350#[derive(Debug, Clone)]
352pub struct AnalysisReport {
353 pub token_name: String,
355 pub search_dir: PathBuf,
357 pub total_calls: usize,
359 pub exposure_count: usize,
361 pub total_risk_score: usize,
363 pub critical_files: usize,
365 pub files: Vec<FileAnalysis>,
367 pub duration: Duration,
369 pub files_scanned: usize,
371 pub truncated: bool,
373 pub errors: Vec<String>,
375}
376
377impl AnalysisReport {
378 pub fn files_sorted(&self) -> Vec<&FileAnalysis> {
380 let mut sorted: Vec<_> = self.files.iter().collect();
381 sorted.sort_by(|a, b| {
382 b.risk_score
384 .cmp(&a.risk_score)
385 .then_with(|| b.has_exposure.cmp(&a.has_exposure))
386 .then_with(|| b.call_count.cmp(&a.call_count))
387 });
388 sorted
389 }
390
391 pub fn exposed_files(&self) -> Vec<&FileAnalysis> {
393 self.files.iter().filter(|f| f.has_exposure).collect()
394 }
395
396 pub fn high_risk_files(&self) -> Vec<&FileAnalysis> {
398 self.files
399 .iter()
400 .filter(|f| f.risk_level >= RiskLevel::High)
401 .collect()
402 }
403
404 pub fn has_security_issues(&self) -> bool {
406 self.exposure_count > 0
407 }
408
409 pub fn has_critical_issues(&self) -> bool {
411 self.files
412 .iter()
413 .any(|f| f.has_exposure && f.risk_level == RiskLevel::Critical)
414 }
415}
416
417pub struct TokenSecurityAnalyzer {
425 config: AnalyzerConfig,
426}
427
428impl TokenSecurityAnalyzer {
429 pub fn new(config: AnalyzerConfig) -> Self {
431 Self { config }
432 }
433
434 pub fn default_analyzer() -> Self {
436 Self::new(AnalyzerConfig::default())
437 }
438
439 pub fn analyze(&self, token_name: &str, search_dir: &Path) -> Result<AnalysisReport> {
441 let start = Instant::now();
442 let timeout = if self.config.timeout_ms > 0 {
443 Some(Duration::from_millis(self.config.timeout_ms))
444 } else {
445 None
446 };
447
448 if token_name.is_empty() {
450 anyhow::bail!("Token name cannot be empty");
451 }
452 if !search_dir.exists() {
453 anyhow::bail!("Search directory does not exist: {}", search_dir.display());
454 }
455
456 let files = self.collect_files(search_dir, &start, timeout)?;
458 let files_scanned = files.len();
459 let truncated = self.config.max_files > 0 && files_scanned >= self.config.max_files;
460
461 if let Some(t) = timeout {
463 if start.elapsed() >= t {
464 return Ok(self.timeout_report(token_name, search_dir, start));
465 }
466 }
467
468 let patterns = self.build_patterns(token_name)?;
470
471 let results = self.analyze_files_parallel(&files, &patterns, &start, timeout)?;
473
474 let total_calls: usize = results.iter().map(|f| f.call_count).sum();
476 let exposure_count = results.iter().filter(|f| f.has_exposure).count();
477 let total_risk_score: usize = results.iter().map(|f| f.risk_score).sum();
478 let critical_files = results
479 .iter()
480 .filter(|f| f.risk_level == RiskLevel::Critical)
481 .count();
482
483 Ok(AnalysisReport {
484 token_name: token_name.to_string(),
485 search_dir: search_dir.to_path_buf(),
486 total_calls,
487 exposure_count,
488 total_risk_score,
489 critical_files,
490 files: results,
491 duration: start.elapsed(),
492 files_scanned,
493 truncated,
494 errors: vec![],
495 })
496 }
497
498 fn get_file_risk_level(path: &Path) -> RiskLevel {
500 let filename = path
501 .file_name()
502 .map(|n| n.to_string_lossy().to_lowercase())
503 .unwrap_or_default();
504 let path_str = path.to_string_lossy().to_lowercase();
505
506 for pattern in CRITICAL_FILE_PATTERNS {
508 if filename.contains(pattern) || filename.starts_with(pattern) {
509 return RiskLevel::Critical;
510 }
511 }
512
513 for pattern in HIGH_RISK_FILE_PATTERNS {
515 if filename.contains(pattern) || path_str.contains(pattern) {
516 return RiskLevel::High;
517 }
518 }
519
520 if let Some(ext) = path.extension() {
522 let ext_str = ext.to_string_lossy().to_lowercase();
523 if MEDIUM_RISK_EXTENSIONS.contains(&ext_str.as_str()) {
524 return RiskLevel::Medium;
525 }
526 }
527
528 RiskLevel::Low
529 }
530
531 fn calculate_entropy(s: &str) -> f64 {
533 if s.is_empty() {
534 return 0.0;
535 }
536
537 let mut char_counts = std::collections::HashMap::new();
538 for c in s.chars() {
539 *char_counts.entry(c).or_insert(0) += 1;
540 }
541
542 let len = s.len() as f64;
543 let mut entropy = 0.0;
544
545 for count in char_counts.values() {
546 let p = *count as f64 / len;
547 entropy -= p * p.log2();
548 }
549
550 entropy
551 }
552
553 fn is_high_entropy_secret(value: &str) -> bool {
555 if value.len() < 8 {
557 return false;
558 }
559
560 let lower = value.to_lowercase();
562 if lower.contains("example")
563 || lower.contains("placeholder")
564 || lower.contains("your_")
565 || lower.contains("xxx")
566 || lower.contains("todo")
567 || lower.contains("replace")
568 || lower == "test"
569 || lower == "secret"
570 || lower == "password"
571 {
572 return false;
573 }
574
575 let entropy = Self::calculate_entropy(value);
577 entropy > 3.5
578 }
579
580 fn detect_known_prefix(value: &str) -> Option<&'static str> {
582 for (prefix, description) in KNOWN_TOKEN_PREFIXES {
583 if value.starts_with(prefix) {
584 return Some(*description);
585 }
586 }
587 None
588 }
589
590 fn collect_files(
592 &self,
593 search_dir: &Path,
594 start: &Instant,
595 timeout: Option<Duration>,
596 ) -> Result<Vec<PathBuf>> {
597 let mut files = Vec::new();
598 let extensions: Vec<&str> = if self.config.extensions.is_empty() {
599 AnalyzerConfig::default_extensions()
600 } else {
601 self.config.extensions.iter().map(|s| s.as_str()).collect()
602 };
603
604 let mut builder = WalkBuilder::new(search_dir);
605 builder
606 .hidden(!self.config.include_hidden)
607 .follow_links(self.config.follow_symlinks)
608 .git_ignore(true)
609 .git_global(true)
610 .git_exclude(true);
611
612 if self.config.num_threads > 0 {
614 builder.threads(self.config.num_threads);
615 }
616
617 for result in builder.build() {
618 if let Some(t) = timeout {
620 if start.elapsed() >= t {
621 break;
622 }
623 }
624
625 if self.config.max_files > 0 && files.len() >= self.config.max_files {
627 break;
628 }
629
630 let entry = match result {
631 Ok(e) => e,
632 Err(_) => continue,
633 };
634
635 let path = entry.path();
636
637 if path.is_dir() {
639 continue;
640 }
641
642 if self.is_ignored_dir(path) {
644 continue;
645 }
646
647 let filename = path
649 .file_name()
650 .map(|n| n.to_string_lossy().to_lowercase())
651 .unwrap_or_default();
652
653 let is_critical = CRITICAL_FILE_PATTERNS
655 .iter()
656 .any(|p| filename.contains(p) || filename.starts_with(p));
657
658 if !is_critical {
659 if let Some(ext) = path.extension() {
661 let ext_str = ext.to_string_lossy().to_lowercase();
662 if !extensions.contains(&ext_str.as_str()) {
663 continue;
664 }
665 } else {
666 continue;
668 }
669 }
670
671 if let Ok(metadata) = path.metadata() {
673 if metadata.len() > self.config.max_file_size {
674 continue;
675 }
676 }
677
678 files.push(path.to_path_buf());
679 }
680
681 Ok(files)
682 }
683
684 fn is_ignored_dir(&self, path: &Path) -> bool {
686 for component in path.components() {
687 if let std::path::Component::Normal(name) = component {
688 let name_str = name.to_string_lossy();
689 if self
690 .config
691 .ignore_dirs
692 .iter()
693 .any(|d| d == name_str.as_ref())
694 {
695 return true;
696 }
697 }
698 }
699 false
700 }
701
702 fn build_patterns(&self, token_name: &str) -> Result<AnalysisPatterns> {
704 let escaped = regex::escape(token_name);
706
707 let token_pattern = format!(r"\b{}\b", escaped);
709 let token_regex = Regex::new(&token_pattern)
710 .map_err(|e| anyhow::anyhow!("Failed to build token regex: {}", e))?;
711
712 let exposure_patterns = [
717 format!(r#"\b{}\b\s*=\s*["'][^"']+["']"#, escaped),
721 format!(r#"["']{}\s*["']\s*:\s*["'][^"']+["']"#, escaped),
723 format!(
726 r"(?i)(print|println!?|printf|echo|puts)\s*[\(\[].*\b{}\b",
727 escaped
728 ),
729 format!(
731 r"(?i)console\.(log|info|warn|error|debug)\s*\(.*\b{}\b",
732 escaped
733 ),
734 format!(
736 r"(?i)(logging\.|logger\.)(info|debug|warning|error|critical)\s*\(.*\b{}\b",
737 escaped
738 ),
739 format!(
741 r"(?i)(log::)?(info!|debug!|warn!|error!|trace!)\s*\(.*\b{}\b",
742 escaped
743 ),
744 format!(r"(?i)\blog\s*[\(\[].*\b{}\b", escaped),
746 format!(
748 r"(?i)(stdout|stderr|write|writeln!?)\s*[\(\[].*\b{}\b",
749 escaped
750 ),
751 format!(r#"(?i)f["'].*\b{}\b"#, escaped),
753 format!(r"(?i)format!\s*\(.*\b{}\b", escaped),
754 ];
755
756 let exposure_regex = Regex::new(&exposure_patterns.join("|"))
757 .map_err(|e| anyhow::anyhow!("Failed to build exposure regex: {}", e))?;
758
759 Ok(AnalysisPatterns {
760 token_regex,
761 exposure_regex,
762 })
763 }
764
765 fn analyze_files_parallel(
767 &self,
768 files: &[PathBuf],
769 patterns: &AnalysisPatterns,
770 start: &Instant,
771 timeout: Option<Duration>,
772 ) -> Result<Vec<FileAnalysis>> {
773 let results: Arc<Mutex<Vec<FileAnalysis>>> = Arc::new(Mutex::new(Vec::new()));
774 let timed_out = Arc::new(Mutex::new(false));
775
776 files.par_iter().for_each(|file| {
777 if let Some(t) = timeout {
779 if start.elapsed() >= t {
780 *timed_out.lock() = true;
781 return;
782 }
783 }
784
785 if *timed_out.lock() {
786 return;
787 }
788
789 if let Ok(analysis) = self.analyze_file(file, patterns) {
790 if analysis.call_count > 0 {
791 results.lock().push(analysis);
792 }
793 }
794 });
795
796 let inner = Arc::try_unwrap(results)
797 .map(|m| m.into_inner())
798 .unwrap_or_else(|arc| arc.lock().clone());
799
800 Ok(inner)
801 }
802
803 fn analyze_file(&self, path: &Path, patterns: &AnalysisPatterns) -> Result<FileAnalysis> {
805 let content = fs::read_to_string(path)?;
806 let risk_level = Self::get_file_risk_level(path);
807 let is_env_file = path
808 .file_name()
809 .map(|n| n.to_string_lossy().to_lowercase().contains(".env"))
810 .unwrap_or(false);
811 let is_config_file = risk_level >= RiskLevel::Medium;
812
813 let mut call_count = 0;
814 let mut occurrence_lines = Vec::new();
815 let mut exposures: Vec<ExposureDetail> = Vec::new();
816
817 let value_pattern =
819 Regex::new(r#"[=:]\s*["']([^"']+)["']|[=:]\s*([a-zA-Z0-9_\-./+]{8,})"#).ok();
820
821 for (line_num, line) in content.lines().enumerate() {
822 let line_number = line_num + 1; let trimmed = line.trim();
826 if trimmed.starts_with('#')
827 || trimmed.starts_with("//")
828 || trimmed.starts_with("/*")
829 || trimmed.starts_with('*')
830 {
831 continue;
832 }
833
834 let matches: Vec<_> = patterns.token_regex.find_iter(line).collect();
836 if matches.is_empty() {
837 continue;
838 }
839
840 call_count += matches.len();
841 occurrence_lines.push(line_number);
842
843 if is_env_file {
847 if let Some(ref vp) = value_pattern {
848 if let Some(caps) = vp.captures(line) {
849 let value = caps.get(1).or(caps.get(2)).map(|m| m.as_str());
850 if let Some(v) = value {
851 if let Some(prefix_desc) = Self::detect_known_prefix(v) {
853 exposures.push(ExposureDetail {
854 line: line_number,
855 exposure_type: ExposureType::KnownTokenPrefix(
856 prefix_desc.to_string(),
857 ),
858 context: Self::redact_value(v),
859 });
860 } else if Self::is_high_entropy_secret(v) {
861 exposures.push(ExposureDetail {
862 line: line_number,
863 exposure_type: ExposureType::HighEntropy,
864 context: Self::redact_value(v),
865 });
866 } else {
867 exposures.push(ExposureDetail {
868 line: line_number,
869 exposure_type: ExposureType::EnvironmentFile,
870 context: format!("{}=***", patterns.token_regex.as_str()),
871 });
872 }
873 }
874 }
875 }
876 continue;
877 }
878
879 if is_config_file {
881 if let Some(ref vp) = value_pattern {
882 if let Some(caps) = vp.captures(line) {
883 let value = caps.get(1).or(caps.get(2)).map(|m| m.as_str());
884 if let Some(v) = value {
885 if v.starts_with('$')
887 || v.contains("env.")
888 || v.contains("ENV[")
889 || v.contains("getenv")
890 {
891 continue;
892 }
893
894 if let Some(prefix_desc) = Self::detect_known_prefix(v) {
895 exposures.push(ExposureDetail {
896 line: line_number,
897 exposure_type: ExposureType::KnownTokenPrefix(
898 prefix_desc.to_string(),
899 ),
900 context: Self::redact_value(v),
901 });
902 } else if Self::is_high_entropy_secret(v) {
903 exposures.push(ExposureDetail {
904 line: line_number,
905 exposure_type: ExposureType::HighEntropy,
906 context: Self::redact_value(v),
907 });
908 } else {
909 exposures.push(ExposureDetail {
910 line: line_number,
911 exposure_type: ExposureType::ConfigFile,
912 context: format!(
913 "Hardcoded in {}",
914 risk_level_name(risk_level)
915 ),
916 });
917 }
918 }
919 }
920 }
921 }
922
923 if patterns.exposure_regex.is_match(line) {
925 let exposure_type = if line.to_lowercase().contains("log")
927 || line.to_lowercase().contains("print")
928 || line.to_lowercase().contains("console")
929 || line.to_lowercase().contains("echo")
930 {
931 ExposureType::LoggedOutput
932 } else {
933 ExposureType::HardcodedValue
934 };
935
936 if !exposures.iter().any(|e| e.line == line_number) {
938 exposures.push(ExposureDetail {
939 line: line_number,
940 exposure_type,
941 context: Self::truncate_line(line),
942 });
943 }
944 }
945 }
946
947 let exposure_lines: Vec<usize> = exposures.iter().map(|e| e.line).collect();
948 let risk_score = call_count * risk_level.multiplier();
949
950 Ok(FileAnalysis {
951 path: path.to_path_buf(),
952 call_count,
953 has_exposure: !exposures.is_empty(),
954 risk_level,
955 risk_score,
956 exposures,
957 exposure_lines,
958 occurrence_lines,
959 })
960 }
961
962 fn redact_value(value: &str) -> String {
964 if value.len() <= 8 {
965 return "***".to_string();
966 }
967 let prefix = &value[..4];
968 let suffix = &value[value.len() - 4..];
969 format!("{}...{}", prefix, suffix)
970 }
971
972 fn truncate_line(line: &str) -> String {
974 let trimmed = line.trim();
975 if trimmed.len() <= 50 {
976 trimmed.to_string()
977 } else {
978 format!("{}...", &trimmed[..47])
979 }
980 }
981
982 fn timeout_report(
984 &self,
985 token_name: &str,
986 search_dir: &Path,
987 start: Instant,
988 ) -> AnalysisReport {
989 AnalysisReport {
990 token_name: token_name.to_string(),
991 search_dir: search_dir.to_path_buf(),
992 total_calls: 0,
993 exposure_count: 0,
994 total_risk_score: 0,
995 critical_files: 0,
996 files: vec![],
997 duration: start.elapsed(),
998 files_scanned: 0,
999 truncated: true,
1000 errors: vec!["Analysis timed out".to_string()],
1001 }
1002 }
1003}
1004
1005fn risk_level_name(level: RiskLevel) -> &'static str {
1007 match level {
1008 RiskLevel::Low => "source file",
1009 RiskLevel::Medium => "config file",
1010 RiskLevel::High => "sensitive config",
1011 RiskLevel::Critical => "secrets file",
1012 }
1013}
1014
1015struct AnalysisPatterns {
1017 token_regex: Regex,
1018 exposure_regex: Regex,
1019}
1020
1021#[cfg(test)]
1026mod tests {
1027 use super::*;
1028 use tempfile::TempDir;
1029
1030 fn setup_test_dir() -> TempDir {
1031 let dir = TempDir::new().unwrap();
1032
1033 fs::write(
1035 dir.path().join("config.py"),
1036 r#"
1037import os
1038API_KEY = os.getenv("API_KEY")
1039db_url = f"postgres://{API_KEY}@localhost/db"
1040"#,
1041 )
1042 .unwrap();
1043
1044 fs::write(
1045 dir.path().join("main.js"),
1046 r#"
1047const API_KEY = process.env.API_KEY;
1048console.log("API Key:", API_KEY);
1049fetch(url, { headers: { "Authorization": API_KEY } });
1050"#,
1051 )
1052 .unwrap();
1053
1054 fs::write(
1055 dir.path().join("safe.rs"),
1056 r#"
1057let api_key = std::env::var("API_KEY")?;
1058client.set_header("Authorization", &api_key);
1059"#,
1060 )
1061 .unwrap();
1062
1063 fs::write(
1064 dir.path().join("debug.py"),
1065 r#"
1066import logging
1067logger = logging.getLogger(__name__)
1068logger.debug(f"Using API_KEY: {API_KEY}")
1069print(f"Debug: API_KEY = {API_KEY}")
1070"#,
1071 )
1072 .unwrap();
1073
1074 let subdir = dir.path().join("src");
1076 fs::create_dir(&subdir).unwrap();
1077 fs::write(
1078 subdir.join("api.ts"),
1079 r#"
1080export const API_KEY = process.env.API_KEY;
1081export function getHeaders() {
1082 return { "X-API-Key": API_KEY };
1083}
1084"#,
1085 )
1086 .unwrap();
1087
1088 dir
1089 }
1090
1091 #[test]
1092 fn test_analyzer_finds_token_occurrences() {
1093 let dir = setup_test_dir();
1094 let analyzer = TokenSecurityAnalyzer::default_analyzer();
1095
1096 let report = analyzer.analyze("API_KEY", dir.path()).unwrap();
1097
1098 assert!(report.total_calls > 0, "Should find token occurrences");
1099 assert!(!report.files.is_empty(), "Should have files with matches");
1100 }
1101
1102 #[test]
1103 fn test_analyzer_detects_exposure() {
1104 let dir = setup_test_dir();
1105 let analyzer = TokenSecurityAnalyzer::default_analyzer();
1106
1107 let report = analyzer.analyze("API_KEY", dir.path()).unwrap();
1108
1109 assert!(report.exposure_count > 0, "Should detect exposure");
1110 assert!(report.has_security_issues(), "Should have security issues");
1111
1112 let exposed = report.exposed_files();
1114 let exposed_paths: Vec<_> = exposed
1115 .iter()
1116 .map(|f| f.path.file_name().unwrap().to_string_lossy().to_string())
1117 .collect();
1118
1119 assert!(
1120 exposed_paths.iter().any(|p| p == "main.js"),
1121 "main.js should be exposed (console.log)"
1122 );
1123 assert!(
1124 exposed_paths.iter().any(|p| p == "debug.py"),
1125 "debug.py should be exposed (logger.debug, print)"
1126 );
1127 }
1128
1129 #[test]
1130 fn test_analyzer_respects_word_boundaries() {
1131 let dir = TempDir::new().unwrap();
1132
1133 fs::write(
1134 dir.path().join("test.py"),
1135 r#"
1136API_KEY_NAME = "test"
1137MY_API_KEY = "value"
1138API_KEY = "secret"
1139"#,
1140 )
1141 .unwrap();
1142
1143 let analyzer = TokenSecurityAnalyzer::default_analyzer();
1144 let report = analyzer.analyze("API_KEY", dir.path()).unwrap();
1145
1146 assert_eq!(report.total_calls, 1, "Should match exact token only");
1148 }
1149
1150 #[test]
1151 fn test_analyzer_config_fast() {
1152 let config = AnalyzerConfig::fast();
1153 assert_eq!(config.max_files, 1_000);
1154 assert_eq!(config.timeout_ms, 5_000);
1155 }
1156
1157 #[test]
1158 fn test_analyzer_config_thorough() {
1159 let config = AnalyzerConfig::thorough();
1160 assert_eq!(config.max_files, 0);
1161 assert!(config.include_hidden);
1162 }
1163
1164 #[test]
1165 fn test_analyzer_empty_token() {
1166 let dir = TempDir::new().unwrap();
1167 let analyzer = TokenSecurityAnalyzer::default_analyzer();
1168
1169 let result = analyzer.analyze("", dir.path());
1170 assert!(result.is_err(), "Should reject empty token");
1171 }
1172
1173 #[test]
1174 fn test_analyzer_nonexistent_dir() {
1175 let analyzer = TokenSecurityAnalyzer::default_analyzer();
1176
1177 let result = analyzer.analyze("TOKEN", Path::new("/nonexistent/path"));
1178 assert!(result.is_err(), "Should reject nonexistent directory");
1179 }
1180
1181 #[test]
1182 fn test_analyzer_report_sorting() {
1183 let dir = setup_test_dir();
1184 let analyzer = TokenSecurityAnalyzer::default_analyzer();
1185 let report = analyzer.analyze("API_KEY", dir.path()).unwrap();
1186
1187 let sorted = report.files_sorted();
1188
1189 if !sorted.is_empty() && sorted[0].has_exposure {
1191 assert!(
1192 sorted.iter().take_while(|f| f.has_exposure).count() > 0,
1193 "Exposed files should come first"
1194 );
1195 }
1196 }
1197
1198 #[test]
1199 fn test_analyzer_ignores_node_modules() {
1200 let dir = TempDir::new().unwrap();
1201
1202 let nm = dir.path().join("node_modules");
1204 fs::create_dir(&nm).unwrap();
1205 fs::write(nm.join("test.js"), "const API_KEY = 'test';").unwrap();
1206
1207 fs::write(dir.path().join("main.js"), "const API_KEY = 'test';").unwrap();
1209
1210 let analyzer = TokenSecurityAnalyzer::default_analyzer();
1211 let report = analyzer.analyze("API_KEY", dir.path()).unwrap();
1212
1213 assert_eq!(report.files.len(), 1);
1215 assert!(report.files[0].path.file_name().unwrap() == "main.js");
1216 }
1217
1218 #[test]
1219 fn test_analyzer_performance_metrics() {
1220 let dir = setup_test_dir();
1221 let analyzer = TokenSecurityAnalyzer::default_analyzer();
1222
1223 let report = analyzer.analyze("API_KEY", dir.path()).unwrap();
1224
1225 assert!(
1226 report.duration.as_millis() < 5000,
1227 "Analysis should complete quickly (< 5s)"
1228 );
1229 assert!(report.files_scanned > 0, "Should report files scanned");
1230 }
1231
1232 #[test]
1233 fn test_analyzer_multiple_occurrences_per_line() {
1234 let dir = TempDir::new().unwrap();
1235
1236 fs::write(
1237 dir.path().join("test.py"),
1238 "x = API_KEY + API_KEY + API_KEY\n",
1239 )
1240 .unwrap();
1241
1242 let analyzer = TokenSecurityAnalyzer::default_analyzer();
1243 let report = analyzer.analyze("API_KEY", dir.path()).unwrap();
1244
1245 assert_eq!(
1246 report.total_calls, 3,
1247 "Should count all occurrences on same line"
1248 );
1249 assert_eq!(
1250 report.files[0].occurrence_lines.len(),
1251 1,
1252 "Should only have 1 line"
1253 );
1254 }
1255}