1pub fn shannon_entropy(data: &[u8]) -> f64 {
17 if data.is_empty() {
18 return 0.0;
19 }
20
21 let mut counts0 = [0u64; 256];
22 let mut counts1 = [0u64; 256];
23 let mut counts2 = [0u64; 256];
24 let mut counts3 = [0u64; 256];
25
26 let mut chunks = data.chunks_exact(4);
27 for chunk in &mut chunks {
28 counts0[usize::from(chunk[0])] += 1;
29 counts1[usize::from(chunk[1])] += 1;
30 counts2[usize::from(chunk[2])] += 1;
31 counts3[usize::from(chunk[3])] += 1;
32 }
33
34 let mut counts = [0u64; 256];
35 for &byte in chunks.remainder() {
36 counts[usize::from(byte)] += 1;
37 }
38
39 for i in 0..256 {
40 counts[i] += counts0[i] + counts1[i] + counts2[i] + counts3[i];
41 }
42
43 let len = data.len() as f64;
44 let mut entropy = 0.0;
45
46 for &count in &counts {
47 if count > 0 {
48 let p = count as f64 / len;
49 entropy -= p * p.log2();
50 }
51 }
52
53 entropy
54}
55
56pub fn normalized_entropy(data: &[u8]) -> f64 {
69 if data.is_empty() {
70 return 0.0;
71 }
72
73 let unique_chars = {
74 let mut seen = [false; 256];
75 for &b in data {
76 seen[b as usize] = true;
77 }
78 seen.iter().filter(|&&v| v).count()
79 };
80
81 if unique_chars <= 1 {
82 return 0.0;
83 }
84
85 let max_entropy = (unique_chars as f64).log2();
86 if max_entropy == 0.0 {
87 return 0.0;
88 }
89
90 shannon_entropy(data) / max_entropy
91}
92
93pub const HIGH_ENTROPY_THRESHOLD: f64 = 4.5;
102pub const VERY_HIGH_ENTROPY_THRESHOLD: f64 = 5.5;
110const CREDENTIAL_CONTEXT_THRESHOLD: f64 = 3.5;
111const CREDENTIAL_CONTEXT_MIN_LEN: usize = 16;
112const KEYWORD_FREE_MIN_LEN: usize = 30;
113const MIN_PASSWORD_LEN: usize = 8;
114const FIRST_SOURCE_LINE_NUMBER: usize = 1;
115const KEYWORD_FREE_LABEL: &str = "none (high-entropy)";
116
117const SECRET_KEYWORDS: &[&str] = &[
119 "api_key",
120 "apikey",
121 "api-key",
122 "api_token",
123 "api-token",
124 "secret",
125 "secret_key",
126 "secretkey",
127 "token",
128 "access_token",
129 "auth_token",
130 "auth-token",
131 "password",
132 "passwd",
133 "pwd",
134 "credential",
135 "credentials",
136 "private_key",
137 "privatekey",
138 "client_secret",
139 "jwt_secret",
140 "jwtsecret",
141 "session_key",
142 "session-key",
143 "signing_key",
144 "encryption_key",
145 "oauth_token",
146 "bearer",
147 "authorization",
148 "webhook_secret",
149 "database_url",
150 "connection_string",
151 "dsn",
152];
153
154#[derive(Debug, Clone)]
156pub struct EntropyMatch {
165 pub value: String,
167 pub entropy: f64,
169 pub keyword: String,
171 pub line: usize,
173 pub offset: usize,
175}
176
177pub fn is_entropy_appropriate(path: Option<&str>) -> bool {
190 let Some(path) = path else { return true }; let lower = path.to_lowercase();
192 const CONFIG_EXTENSIONS: &[&str] = &[
194 ".env",
195 ".yaml",
196 ".yml",
197 ".json",
198 ".toml",
199 ".properties",
200 ".cfg",
201 ".conf",
202 ".ini",
203 ".config",
204 ".secrets",
205 ".pem",
206 ".key",
207 ".tfvars",
208 ".hcl",
209 ];
210 for ext in CONFIG_EXTENSIONS {
211 if lower.ends_with(ext) {
212 return true;
213 }
214 }
215 let filename = lower.rsplit('/').next().unwrap_or(&lower);
218 const CONFIG_FILENAMES: &[&str] = &[
219 ".env",
220 "credentials",
221 "secrets",
222 "apikeys",
223 "docker-compose",
224 ".npmrc",
225 ".pypirc",
226 ".netrc",
227 ];
228 for name in CONFIG_FILENAMES {
229 if filename.starts_with(name) || filename == *name {
230 return true;
231 }
232 }
233 false
235}
236
237pub fn find_entropy_secrets(
250 text: &str,
251 min_length: usize,
252 context_lines: usize,
253) -> Vec<EntropyMatch> {
254 let lines: Vec<&str> = text.lines().collect();
255 let line_offsets = cumulative_line_offsets(&lines);
256 let mut matches = Vec::new();
257 let mut seen = std::collections::HashSet::new();
258 let keyword_lines = find_keyword_assignment_lines(&lines);
259
260 scan_keyword_contexts(
261 &lines,
262 &line_offsets,
263 &keyword_lines,
264 min_length,
265 context_lines,
266 &mut seen,
267 &mut matches,
268 );
269 scan_keyword_free_candidates(&lines, &line_offsets, &mut seen, &mut matches);
270 matches
271}
272
273fn find_keyword_assignment_lines<'a>(lines: &'a [&str]) -> Vec<(usize, &'a str)> {
274 lines
275 .iter()
276 .enumerate()
277 .filter_map(|(index, line)| is_keyword_assignment_line(line).then_some((index, *line)))
278 .collect()
279}
280
281fn is_keyword_assignment_line(line: &str) -> bool {
282 let line_bytes = line.as_bytes();
283 let has_keyword = SECRET_KEYWORDS.iter().any(|keyword| {
284 let keyword_bytes = keyword.as_bytes();
285 line_bytes
286 .windows(keyword_bytes.len())
287 .any(|window| window.eq_ignore_ascii_case(keyword_bytes))
288 });
289 let trimmed = line.trim();
290 let is_import = trimmed.starts_with("import")
291 || trimmed.starts_with("package")
292 || trimmed.starts_with("use ")
293 || trimmed.starts_with("from ")
294 || trimmed.starts_with("require(");
295 has_keyword && (line.contains('=') || line.contains(": ")) && !is_import
296}
297
298fn scan_keyword_contexts(
299 lines: &[&str],
300 line_offsets: &[usize],
301 keyword_lines: &[(usize, &str)],
302 min_length: usize,
303 context_lines: usize,
304 seen: &mut std::collections::HashSet<String>,
305 matches: &mut Vec<EntropyMatch>,
306) {
307 for (keyword_line_index, keyword_line) in keyword_lines {
308 let context = keyword_context(keyword_line, min_length);
309 let start = keyword_line_index.saturating_sub(context_lines);
310 let end = (*keyword_line_index + context_lines + 1).min(lines.len());
311 for line_idx in start..end {
312 collect_line_candidates(
313 lines[line_idx],
314 line_idx,
315 line_offsets[line_idx],
316 &context,
317 seen,
318 matches,
319 );
320 }
321 }
322}
323
324fn scan_keyword_free_candidates(
325 lines: &[&str],
326 line_offsets: &[usize],
327 seen: &mut std::collections::HashSet<String>,
328 matches: &mut Vec<EntropyMatch>,
329) {
330 let keyword_free_context = KeywordContext {
331 keyword: KEYWORD_FREE_LABEL.to_string(),
332 threshold: VERY_HIGH_ENTROPY_THRESHOLD,
333 min_len: KEYWORD_FREE_MIN_LEN,
334 is_credential_context: false,
335 };
336 for (line_idx, line) in lines.iter().enumerate() {
337 collect_line_candidates(
338 line,
339 line_idx,
340 line_offsets[line_idx],
341 &keyword_free_context,
342 seen,
343 matches,
344 );
345 }
346}
347
348struct KeywordContext {
349 keyword: String,
350 threshold: f64,
351 min_len: usize,
352 is_credential_context: bool,
353}
354
355fn keyword_context(keyword_line: &str, min_length: usize) -> KeywordContext {
356 const CREDENTIAL_KEYWORDS: &[&str] = &[
357 "password",
358 "passwd",
359 "pwd",
360 "db_pass",
361 "db_password",
362 "api_key",
363 "apikey",
364 "api-key",
365 "_key",
366 "-key",
367 "token",
368 "_token",
369 "-token",
370 "secret",
371 "_secret",
372 "-secret",
373 ];
374
375 let lowered = keyword_line.to_lowercase();
376 let keyword = SECRET_KEYWORDS
377 .iter()
378 .find(|keyword| lowered.contains(*keyword))
379 .copied()
380 .unwrap_or("unknown");
381 let is_credential_context = CREDENTIAL_KEYWORDS
382 .iter()
383 .any(|credential_keyword| lowered.contains(credential_keyword));
384 KeywordContext {
385 keyword: keyword.to_string(),
386 threshold: if is_credential_context {
387 CREDENTIAL_CONTEXT_THRESHOLD
388 } else {
389 HIGH_ENTROPY_THRESHOLD
390 },
391 min_len: if is_credential_context {
392 CREDENTIAL_CONTEXT_MIN_LEN
393 } else {
394 min_length
395 },
396 is_credential_context,
397 }
398}
399
400fn collect_line_candidates(
401 line: &str,
402 line_idx: usize,
403 line_offset: usize,
404 context: &KeywordContext,
405 seen: &mut std::collections::HashSet<String>,
406 matches: &mut Vec<EntropyMatch>,
407) {
408 for candidate in extract_candidates(line, context.min_len) {
409 let entropy = shannon_entropy(candidate.as_bytes());
410 if !candidate_is_plausible(&candidate, entropy, context) || !seen.insert(candidate.clone())
411 {
412 continue;
413 }
414 matches.push(EntropyMatch {
415 value: candidate,
416 entropy,
417 keyword: context.keyword.clone(),
418 line: line_idx + FIRST_SOURCE_LINE_NUMBER,
419 offset: line_offset,
420 });
421 }
422}
423
424fn candidate_is_plausible(candidate: &str, entropy: f64, context: &KeywordContext) -> bool {
425 if entropy < context.threshold {
426 return false;
427 }
428 if context.is_credential_context {
429 return candidate.len() >= MIN_PASSWORD_LEN;
430 }
431 candidate.len() >= KEYWORD_FREE_MIN_LEN.min(context.min_len) && is_secret_plausible(candidate)
432}
433
434fn cumulative_line_offsets(lines: &[&str]) -> Vec<usize> {
435 let mut offsets = Vec::with_capacity(lines.len());
436 let mut current = 0usize;
437 for line in lines {
438 offsets.push(current);
439 current = current.saturating_add(line.len().saturating_add(1));
442 }
443 offsets
444}
445
446fn extract_candidates(line: &str, min_length: usize) -> Vec<String> {
449 let mut candidates = Vec::new();
450
451 if is_likely_concatenation_fragment(line) {
454 return candidates;
455 }
456
457 if let Some(eq_pos) = line.find('=').or_else(|| line.find(": ")) {
463 let sep_len = if line.as_bytes().get(eq_pos) == Some(&b'=') {
464 1
465 } else {
466 2 };
468 let value_part = line[eq_pos + sep_len..].trim();
469 let cleaned = value_part
470 .trim_matches(|c: char| c == '"' || c == '\'' || c == '`' || c == ';' || c == ',');
471 if cleaned.len() >= min_length && is_candidate_plausible(cleaned) {
472 candidates.push(cleaned.to_string());
473 }
474 }
475
476 for quote in &['"', '\''] {
478 let mut start = None;
479 for (i, ch) in line.char_indices() {
480 if ch == *quote {
481 match start {
482 None => start = Some(i + 1),
483 Some(s) => {
484 let content = &line[s..i];
485 if content.len() >= min_length && is_secret_plausible(content) {
486 candidates.push(content.to_string());
487 }
488 start = None;
489 }
490 }
491 }
492 }
493 }
494
495 candidates
496}
497
498fn is_likely_concatenation_fragment(line: &str) -> bool {
502 let trimmed = line.trim();
503
504 if trimmed.starts_with('"') || trimmed.starts_with('\'') {
507 let double_quotes = trimmed.matches('"').count();
509 let single_quotes = trimmed.matches('\'').count();
510
511 if (double_quotes == 2 && single_quotes == 0) || (single_quotes == 2 && double_quotes == 0)
513 {
514 let after_quote = if double_quotes == 2 {
517 trimmed
518 .rfind('"')
519 .map(|i| &trimmed[i + 1..])
520 .unwrap_or("")
521 .trim()
522 } else {
523 trimmed
524 .rfind('\'')
525 .map(|i| &trimmed[i + 1..])
526 .unwrap_or("")
527 .trim()
528 };
529
530 let is_fragment_suffix = after_quote.is_empty()
532 || after_quote == "+"
533 || after_quote == "\\"
534 || after_quote == ","
535 || after_quote == ")"
536 || after_quote.starts_with('+')
537 || after_quote.starts_with(')');
538
539 if is_fragment_suffix {
540 return true;
541 }
542 }
543 }
544
545 if trimmed.ends_with("\\\"") || trimmed.ends_with("-\\") {
547 return true;
548 }
549
550 false
551}
552
553enum PlausibilityMode {
559 Lenient,
561 Strict,
563}
564fn passes_plausibility_checks(s: &str, mode: PlausibilityMode) -> bool {
565 if matches_universal_rejection(s) {
566 return false;
567 }
568
569 if is_placeholder_ci(s.as_bytes()) || has_low_alnum_ratio(s) {
570 return false;
571 }
572
573 if matches!(mode, PlausibilityMode::Strict) && !passes_strict_secret_checks(s) {
574 return false;
575 }
576
577 true
578}
579
580fn matches_universal_rejection(s: &str) -> bool {
581 s.contains("://")
582 || s.starts_with('/')
583 || s.starts_with("./")
584 || s.starts_with("../")
585 || s.starts_with("${{")
586 || s.starts_with("{{")
587 || s.starts_with("${")
588 || s.starts_with("(?")
589 || s.starts_with('^')
590 || s.starts_with("ssh-")
591 || s.starts_with("ecdsa-")
592 || (s.starts_with("eyJ") && s.matches('.').count() == 2)
593 || s.starts_with("$ANSIBLE_VAULT")
594 || s.starts_with("ENC[")
595 || s.starts_with("-----BEGIN")
596 || (s.starts_with("Ag") && s.len() > 40)
597 || s.starts_with("age1")
598 || s.starts_with("vault:")
599 || s.starts_with("AQI")
600 || s.starts_with("CiQ")
601 || (s.len() > 2
603 && s.as_bytes()[1] == b':'
604 && s.as_bytes()[0].is_ascii_alphabetic()
605 && (s.as_bytes()[2] == b'\\' || s.as_bytes()[2] == b'/'))
606 || s.starts_with("```")
607 || s.starts_with("---")
608 || s.starts_with("===")
609}
610
611fn has_low_alnum_ratio(s: &str) -> bool {
612 let alnum = s.chars().filter(|c| c.is_alphanumeric()).count() as f64 / s.len().max(1) as f64;
613 alnum < 0.5
614}
615
616fn passes_strict_secret_checks(s: &str) -> bool {
617 if s.chars().all(|c| c.is_ascii_hexdigit()) && s.len() > 10 {
618 return false;
619 }
620 if s.len() > 4
621 && let Some(first) = s.chars().next()
622 && s.chars().all(|c| c == first)
623 {
624 return false;
625 }
626 if s.len() > 16 && unique_char_count(s) < 8 {
627 return false;
628 }
629 if s.len() > 16 && second_half_entropy(s) < 2.5 {
630 return false;
631 }
632
633 shannon_entropy(s.as_bytes()) >= HIGH_ENTROPY_THRESHOLD
634}
635
636fn unique_char_count(s: &str) -> usize {
637 let mut seen = std::collections::HashSet::new();
638 for ch in s.chars() {
639 seen.insert(ch);
640 }
641 seen.len()
642}
643
644fn second_half_entropy(s: &str) -> f64 {
645 let mid = s.len() / 2;
646 let half_start = s.floor_char_boundary(mid);
647 shannon_entropy(&s.as_bytes()[half_start..])
648}
649
650fn is_candidate_plausible(s: &str) -> bool {
652 passes_plausibility_checks(s, PlausibilityMode::Lenient)
653}
654
655fn is_secret_plausible(s: &str) -> bool {
657 passes_plausibility_checks(s, PlausibilityMode::Strict)
658}
659
660#[cfg(test)]
661mod tests {
662 use super::*;
663
664 #[test]
665 fn entropy_constant_string() {
666 assert!(shannon_entropy(b"aaaaaaaaaa") < 0.1);
667 }
668
669 #[test]
670 fn entropy_random_string() {
671 let key = b"aK7xP9mQ2wE5rT8yU1iO3pA6sD4fG0hJ";
673 assert!(shannon_entropy(key) > 4.0);
674 }
675
676 #[test]
677 fn entropy_hex_hash() {
678 let hash = b"d41d8cd98f00b204e9800998ecf8427e";
679 let e = shannon_entropy(hash);
680 assert!(e > 3.0);
682 assert!(e < 5.0);
683 }
684
685 #[test]
686 fn find_secrets_near_keywords() {
687 let text = r#"
688# Config
689DATABASE_URL=postgres://localhost/mydb
690API_KEY=aK7xP9mQ2wE5rT8yU1iO3pA6sD4fG0hJkL
691DEBUG=true
692"#;
693 let matches = find_entropy_secrets(text, 16, 2);
694 assert!(
695 !matches.is_empty(),
696 "should find high-entropy string near API_KEY"
697 );
698 assert_eq!(matches[0].value, "aK7xP9mQ2wE5rT8yU1iO3pA6sD4fG0hJkL");
699 assert!(
701 matches.iter().any(|m| m.entropy > 4.0),
702 "should have high entropy match"
703 );
704 }
705
706 #[test]
707 fn skip_placeholders() {
708 let text = r#"
709API_KEY=YOUR_API_KEY_HERE
710SECRET=change_me_placeholder
711TOKEN=xxxxxxxxxxxxxxxxxxxx
712"#;
713 let matches = find_entropy_secrets(text, 16, 2);
714 assert!(matches.is_empty());
715 }
716
717 #[test]
718 fn plausible_secret_filter() {
719 assert!(!is_secret_plausible("https://example.com/api"));
720 assert!(!is_secret_plausible("/usr/local/bin/python"));
721 assert!(!is_secret_plausible("your_api_key_here"));
722 assert!(is_secret_plausible("aK7xP9mQ2wE5rT8yU1iO3pA6sD4fG0hJ"));
723 }
724
725 #[test]
726 fn candidate_mode_skips_strict_secret_checks() {
727 assert!(is_candidate_plausible("0123456789abcdef"));
728 assert!(!is_secret_plausible("0123456789abcdef"));
729 }
730
731 #[test]
732 fn detect_db_password_hex() {
733 let text = "DB_PASSWORD=8ae31cacf141669ddfb5da\n";
734 let matches = find_entropy_secrets(text, 8, 2);
735 assert!(
736 !matches.is_empty(),
737 "Should detect hex password near DB_PASSWORD keyword. Got 0 matches."
738 );
739 assert!(
740 matches[0].value.contains("8ae31cac"),
741 "Should extract the password value"
742 );
743 }
744
745 #[test]
746 fn entropy_match_offsets_are_cumulative() {
747 let text = "first=line\nAPI_KEY=aK7xP9mQ2wE5rT8yU1iO3pA6sD4fG0hJkL\n";
748 let matches = find_entropy_secrets(text, 16, 2);
749 assert_eq!(matches.len(), 1);
750 assert_eq!(matches[0].value, "aK7xP9mQ2wE5rT8yU1iO3pA6sD4fG0hJkL");
751 assert_eq!(matches[0].offset, "first=line\n".len());
752 }
753
754 #[test]
755 fn entropy_empty_input_is_zero() {
756 assert_eq!(shannon_entropy(b""), 0.0);
757 }
758
759 #[test]
760 fn entropy_single_unique_byte_is_zero() {
761 assert_eq!(shannon_entropy(b"zzzzzzzz"), 0.0);
762 }
763
764 #[test]
765 fn entropy_all_byte_values_is_near_eight() {
766 let all_bytes: Vec<u8> = (0u8..=255).collect();
767 let entropy = shannon_entropy(&all_bytes);
768 assert!((entropy - 8.0).abs() < 1e-9, "entropy was {}", entropy);
769 }
770
771 #[test]
772 fn entropy_huge_repeated_input_stays_low() {
773 let repeated = vec![b'A'; 100_000];
774 assert_eq!(shannon_entropy(&repeated), 0.0);
775 }
776
777 #[test]
778 fn normalized_entropy_empty_input_is_zero() {
779 assert_eq!(normalized_entropy(b""), 0.0);
780 }
781
782 #[test]
783 fn normalized_entropy_single_unique_byte_is_zero() {
784 assert_eq!(normalized_entropy(b"aaaaaaaaaaaaaaaa"), 0.0);
785 }
786
787 #[test]
788 fn normalized_entropy_binary_pattern_reaches_one() {
789 let entropy = normalized_entropy(b"abababababababab");
790 assert!((entropy - 1.0).abs() < 1e-9, "entropy was {}", entropy);
791 }
792
793 #[test]
794 fn normalized_entropy_all_unique_bytes_reaches_one() {
795 let all_bytes: Vec<u8> = (0u8..=255).collect();
796 let entropy = normalized_entropy(&all_bytes);
797 assert!((entropy - 1.0).abs() < 1e-9, "entropy was {}", entropy);
798 }
799
800 #[test]
801 fn normalized_entropy_stays_bounded_for_large_mixed_input() {
802 let mut data = Vec::with_capacity(16_000);
803 for _ in 0..500 {
804 data.extend_from_slice(b"abc123XYZ!@#$%^&*()");
805 }
806 let entropy = normalized_entropy(&data);
807 assert!((0.0..=1.0).contains(&entropy), "entropy was {}", entropy);
808 }
809
810 #[test]
811 fn entropy_is_appropriate_for_stdin() {
812 assert!(is_entropy_appropriate(None));
813 }
814
815 #[test]
816 fn entropy_is_appropriate_for_config_extensions_case_insensitively() {
817 assert!(is_entropy_appropriate(Some("CONFIG/SETTINGS.YAML")));
818 assert!(is_entropy_appropriate(Some("keys/server.PEM")));
819 assert!(is_entropy_appropriate(Some("infra/secrets.TFVARS")));
820 }
821
822 #[test]
823 fn entropy_is_appropriate_for_sensitive_filenames_only() {
824 assert!(is_entropy_appropriate(Some("/tmp/.npmrc.backup")));
825 assert!(is_entropy_appropriate(Some("nested/docker-compose.prod")));
826 assert!(is_entropy_appropriate(Some("config/apikeys.txt")));
827 }
828
829 #[test]
830 fn entropy_is_not_appropriate_for_source_files_even_with_config_substrings() {
831 assert!(!is_entropy_appropriate(Some(
832 "src/docker_auth_config_test.go"
833 )));
834 assert!(!is_entropy_appropriate(Some(
835 "lib/application_yaml_parser.rs"
836 )));
837 assert!(!is_entropy_appropriate(Some("src/main.rs")));
838 }
839
840 #[test]
841 fn entropy_secret_scan_empty_input_returns_no_matches() {
842 assert!(find_entropy_secrets("", 16, 2).is_empty());
843 }
844
845 #[test]
846
847 fn keyword_free_scan_detects_long_high_entropy_strings() {
848 let secret = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!@";
849 let text = format!("prefix\n value: \"{secret}\"\nsuffix\n");
850 let matches = find_entropy_secrets(&text, 16, 0);
851 assert_eq!(matches.len(), 1);
852 assert_eq!(matches[0].value, secret);
853 assert_eq!(matches[0].keyword, "none (high-entropy)");
854 assert_eq!(matches[0].line, 2);
855 }
856
857 #[test]
858 fn keyword_free_scan_rejects_short_high_entropy_strings() {
859 let text = "ZxCvBn123!@#AsDfGh456$%^QwErTy789";
860 assert!(find_entropy_secrets(text, 16, 0).is_empty());
861 }
862
863 #[test]
864 fn duplicate_secret_value_is_reported_once() {
865 let secret = "aK7xP9mQ2wE5rT8yU1iO3pA6sD4fG0hJkL";
866 let text = format!("API_KEY={secret}\nTOKEN={secret}\n");
867 let matches = find_entropy_secrets(&text, 16, 1);
868 assert_eq!(matches.len(), 1);
869 assert_eq!(matches[0].value, secret);
870 }
871
872 #[test]
873 fn import_statements_with_keywords_are_ignored() {
874 let text = "import API_KEY from \"aK7xP9mQ2wE5rT8yU1iO3pA6sD4fG0hJkL\"\n";
875 assert!(find_entropy_secrets(text, 16, 1).is_empty());
876 }
877
878 #[test]
879 fn url_like_values_are_rejected_even_in_keyword_context() {
880 let text = "DATABASE_URL=https://example.com/super/secret/path/value\n";
881 assert!(find_entropy_secrets(text, 16, 1).is_empty());
882 }
883
884 #[test]
885 fn context_lines_zero_limits_scan_to_keyword_line() {
886 let secret = "aK7xP9mQ2wE5rT8yU1iO3pA6sD4fG0hJkL";
887 let text = format!("API_KEY=placeholder\n\"{secret}\"\n");
888 assert!(find_entropy_secrets(&text, 16, 0).is_empty());
889 }
890
891 #[test]
892
893 fn context_lines_include_neighboring_lines() {
894 let secret = "aK7xP9mQ2wE5rT8yU1iO3pA6sD4fG0hJkL";
895 let text = format!("API_KEY=placeholder\n value: \"{secret}\"\n");
896 let matches = find_entropy_secrets(&text, 16, 1);
897 assert_eq!(matches.len(), 1);
898 assert_eq!(matches[0].value, secret);
899 assert_eq!(matches[0].line, 2);
900 }
901
902 #[test]
903 fn special_character_placeholders_are_rejected() {
904 let text = "SECRET=<replace-with-real-secret>\nTOKEN=${{ secrets.API_TOKEN }}\n";
905 assert!(find_entropy_secrets(text, 8, 1).is_empty());
906 }
907
908 #[test]
909 fn large_input_preserves_line_and_offset_for_match() {
910 let filler = "abcd1234\n".repeat(2000);
911 let secret = "QwErTy123!@#ZxCvBn456$%^AsDfGh789&*(YuIoP0)_+LmNoPqRsTuV";
912 let text = format!("{filler}API_KEY={secret}\n");
913 let matches = find_entropy_secrets(&text, 16, 0);
914 assert_eq!(matches.len(), 1);
915 assert_eq!(matches[0].value, secret);
916 assert_eq!(matches[0].line, 2001);
917 assert_eq!(matches[0].offset, filler.len());
918 }
919}
920
921fn is_placeholder_ci(bytes: &[u8]) -> bool {
923 const PLACEHOLDERS: &[&[u8]] = &[
924 b"example",
925 b"placeholder",
926 b"change_me",
927 b"changeme",
928 b"your_",
929 b"your-",
930 b"xxx",
931 b"todo",
932 b"fixme",
933 b"replace",
934 b"insert",
935 b"enter_",
936 b"enter-",
937 b"dummy",
938 b"sample",
939 b"demo",
940 b"fake",
941 b"mock",
942 b"goes-here",
943 b"fill_in",
944 b"not-a-real",
945 b"not_a_real",
946 ];
947 PLACEHOLDERS
948 .iter()
949 .any(|p| bytes.windows(p.len()).any(|w| w.eq_ignore_ascii_case(p)))
950 || bytes.contains(&b'<')
951 || bytes.contains(&b'>')
952 || matches!(
953 bytes,
954 b"null" | b"none" | b"undefined" | b"empty" | b"default" | b"secret" | b"password"
955 )
956}