1use crate::error::{RuntimeError, RuntimeResult};
35
36#[derive(Debug, Clone, PartialEq, Eq)]
43pub struct SecretMatch {
44 pub detector: &'static str,
46 pub masked: String,
48}
49
50impl std::fmt::Display for SecretMatch {
51 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
52 write!(
53 f,
54 "content matches secret pattern {} at masked excerpt {}",
55 self.detector, self.masked
56 )
57 }
58}
59
60pub fn check(content: &str) -> RuntimeResult<()> {
65 if let Some(m) = scan(content) {
66 return Err(RuntimeError::SecretDetected(m));
67 }
68 Ok(())
69}
70
71pub fn check_json(value: &serde_json::Value) -> RuntimeResult<()> {
77 scan_json_value(value)
78}
79
80pub fn check_tags(tags: &[String]) -> RuntimeResult<()> {
84 for tag in tags {
85 check(tag)?;
86 }
87 Ok(())
88}
89
90fn scan_json_value(value: &serde_json::Value) -> RuntimeResult<()> {
91 match value {
92 serde_json::Value::String(s) => check(s),
93 serde_json::Value::Array(arr) => {
94 for v in arr {
95 scan_json_value(v)?;
96 }
97 Ok(())
98 }
99 serde_json::Value::Object(map) => {
100 for (k, v) in map {
101 check(k)?;
104 scan_json_value(v)?;
105 }
106 Ok(())
107 }
108 _ => Ok(()),
109 }
110}
111
112fn scan(text: &str) -> Option<SecretMatch> {
116 if let Some(m) = check_known_patterns(text) {
118 return Some(m);
119 }
120 if let Some(m) = check_entropy_heuristic(text) {
122 return Some(m);
123 }
124 None
125}
126
127const PREFIX_DETECTORS: &[(&str, &str, usize)] = &[
135 ("aws-access-key-id", "AKIA", 20),
137 ("aws-access-key-id", "ASIA", 20),
138 ("github-token", "ghp_", 36),
140 ("github-token", "gho_", 36),
141 ("github-token", "github_pat_", 20),
142 ("openai-api-key", "sk-proj-", 40),
144 ("anthropic-api-key", "sk-ant-", 20),
148 ("stripe-secret-key", "sk_live_", 30),
150 ("stripe-restricted-key", "rk_live_", 30),
151 ("fly-token", "fm2_", 20),
153 ("vercel-token", "vercel_", 20),
155 ("slack-token", "xoxb-", 40),
157 ("slack-token", "xoxa-", 40),
158 ("slack-token", "xoxp-", 40),
159 ("slack-token", "xoxr-", 40),
160 ("slack-token", "xoxs-", 40),
161 ("age-secret-key", "AGE-SECRET-KEY-", 60),
163];
164
165const SK_SAFE_PREFIXES: &[&str] = &["sk-learn", "sk-image", "sk-lego", "sk-base", "sk-misc"];
168
169fn check_known_patterns(text: &str) -> Option<SecretMatch> {
171 for &(name, needle, min_len) in PREFIX_DETECTORS {
173 if let Some(m) = find_prefix_token(text, needle, min_len) {
174 return Some(build_match(name, m));
175 }
176 }
177
178 if let Some(token) = find_prefix_token(text, "sk-", 30) {
181 if !SK_SAFE_PREFIXES.iter().any(|safe| token.starts_with(safe)) {
182 return Some(build_match("openai-api-key", token));
183 }
184 }
185
186 if let Some(pos) = text.find("FlyV1 ") {
191 let at_boundary = pos == 0 || {
192 text[..pos]
193 .chars()
194 .next_back()
195 .is_none_or(|c| !c.is_alphanumeric())
196 };
197 if at_boundary {
198 let payload_start = pos + 6; let payload = extract_token(&text[payload_start..]);
200 if payload.len() >= 4 {
201 let candidate = &text[pos..payload_start + payload.len()];
202 return Some(build_match("fly-token", candidate));
203 }
204 }
205 }
206
207 if text.contains("-----BEGIN") && text.contains("PRIVATE KEY-----") {
210 if let Some(pos) = text.find("-----BEGIN") {
211 let block_end = text[pos..]
215 .find("-----END")
216 .map(|rel| {
217 text[pos + rel..]
218 .find('\n')
219 .map(|l| pos + rel + l + 1)
220 .unwrap_or(text.len())
221 })
222 .unwrap_or(text.len());
223 let excerpt = &text[pos..block_end];
224 return Some(build_match("pem-private-key", excerpt));
225 }
226 }
227
228 if let Some(m) = find_jwt(text) {
231 return Some(build_match("jwt", m));
232 }
233
234 if let Some(m) = find_url_userinfo(text) {
236 return Some(build_match("url-userinfo", m));
237 }
238
239 None
240}
241
242fn find_prefix_token<'a>(text: &'a str, needle: &str, min_len: usize) -> Option<&'a str> {
245 let mut start = 0;
246 while let Some(rel) = text[start..].find(needle) {
247 let abs = start + rel;
248 let at_boundary = abs == 0 || {
251 let prev = text[..abs].chars().next_back().unwrap_or(' ');
252 !prev.is_alphanumeric()
253 };
254 if at_boundary {
255 let token = extract_token(&text[abs..]);
256 if token.len() >= min_len {
257 return Some(token);
258 }
259 }
260 start = abs + needle.len().max(1);
261 }
262 None
263}
264
265fn find_jwt(text: &str) -> Option<&str> {
268 let bytes = text.as_bytes();
269 let mut i = 0;
270 while i + 4 < bytes.len() {
271 if bytes[i..].starts_with(b"eyJ") {
272 let end = bytes[i..]
274 .iter()
275 .position(|&b| b == b' ' || b == b'\n' || b == b'\r' || b == b'\t')
276 .map(|p| i + p)
277 .unwrap_or(bytes.len());
278 let candidate = &text[i..end];
279 let dots = candidate.as_bytes().iter().filter(|&&b| b == b'.').count();
281 if dots >= 2 {
282 let parts: Vec<&str> = candidate.splitn(3, '.').collect();
283 if parts.len() == 3
284 && parts[0].starts_with("eyJ")
285 && parts[1].starts_with("eyJ")
286 && parts[0].len() >= 10
287 && parts[1].len() >= 10
288 {
289 return Some(candidate);
290 }
291 }
292 i = end + 1;
293 } else {
294 i += 1;
295 }
296 }
297 None
298}
299
300fn find_url_userinfo(text: &str) -> Option<&str> {
303 let mut search = text;
304 let mut base = 0usize;
305 while let Some(at_rel) = search.find("://") {
306 let at_abs = base + at_rel;
307 let rest_start = at_abs + 3;
309 let rest = &text[rest_start..];
310 if let Some(at_pos) = rest.find('@') {
311 let userinfo = &rest[..at_pos];
312 if let Some(colon) = userinfo.find(':') {
314 let user = &userinfo[..colon];
315 let pass = &userinfo[colon + 1..];
316 if !user.is_empty() && !pass.is_empty() && pass.len() >= 4 {
317 let scheme_start = text[..at_abs]
320 .rfind(|c: char| {
321 !c.is_ascii_alphanumeric() && c != '+' && c != '-' && c != '.'
322 })
323 .map(|p| p + 1)
324 .unwrap_or(0);
325 if !userinfo.contains(' ') && !userinfo.contains('\n') {
327 let end = rest_start
328 + at_pos
329 + 1
330 + rest[at_pos + 1..]
331 .find([' ', '\n', '\r'])
332 .unwrap_or(rest[at_pos + 1..].len());
333 return Some(&text[scheme_start..end.min(text.len())]);
334 }
335 }
336 }
337 }
338 base = at_abs + 3;
339 search = &text[base..];
340 }
341 None
342}
343
344const TRIGGER_WORDS: &[&str] = &[
353 "key",
354 "secret",
355 "password",
356 "passwd",
357 "credential",
358 "bearer",
359 "auth",
360 "apikey",
361 "api_key",
362 "access_key",
363 "private_key",
364];
365
366const MIN_ENTROPY_LEN: usize = 24;
368
369const ENTROPY_THRESHOLD: f64 = 4.5;
373
374const TRIGGER_WINDOW: usize = 120;
376
377fn check_entropy_heuristic(text: &str) -> Option<SecretMatch> {
378 let tokens: Vec<(usize, &str)> = text
380 .split_ascii_whitespace()
381 .map(|t| {
382 let offset = t.as_ptr() as usize - text.as_ptr() as usize;
383 (offset, t)
384 })
385 .collect();
386
387 for &(tok_offset, raw_token) in &tokens {
388 let token = strip_delimiters(raw_token);
390 if token.len() < MIN_ENTROPY_LEN {
391 continue;
392 }
393
394 if is_uuid_canonical(token) || is_base64_content_hash(token) {
398 continue;
399 }
400
401 let window_start = tok_offset.saturating_sub(TRIGGER_WINDOW);
405 let window_end = (tok_offset + raw_token.len() + TRIGGER_WINDOW).min(text.len());
406 let window = &text[window_start..window_end];
407 let low_window = window.to_ascii_lowercase();
408
409 let near_trigger = TRIGGER_WORDS.iter().any(|tw| low_window.contains(tw))
410 || has_standalone_token(&low_window)
411 || has_token_assignment(&low_window);
412
413 if !near_trigger && is_pure_hex(token) {
416 continue;
417 }
418
419 const HEX_CREDENTIAL_LENGTHS: &[usize] = &[32, 40, 64, 128];
430 if near_trigger && is_pure_hex(token) && HEX_CREDENTIAL_LENGTHS.contains(&token.len()) {
431 return Some(build_match("hex-credential-token", token));
432 }
433
434 let entropy = shannon_entropy(token.as_bytes());
435 if entropy < ENTROPY_THRESHOLD {
436 continue;
437 }
438
439 if near_trigger {
441 return Some(build_match("high-entropy-token", token));
442 }
443 }
444 None
445}
446
447fn has_standalone_token(low_window: &str) -> bool {
451 let needle = "token";
452 let mut start = 0;
453 while let Some(rel) = low_window[start..].find(needle) {
454 let abs = start + rel;
455 let before_ok = abs == 0
456 || low_window[..abs]
457 .chars()
458 .next_back()
459 .is_none_or(|c| !c.is_alphanumeric() && c != '_');
460 let after_end = abs + needle.len();
461 let after_ok = after_end >= low_window.len()
462 || low_window[after_end..]
463 .chars()
464 .next()
465 .is_none_or(|c| !c.is_alphanumeric() && c != '_');
466 if before_ok && after_ok {
467 return true;
468 }
469 start = abs + needle.len().max(1);
470 }
471 false
472}
473
474fn has_token_assignment(low_window: &str) -> bool {
486 let needle = "token";
487 let mut start = 0;
488 while let Some(rel) = low_window[start..].find(needle) {
489 let abs = start + rel;
490 let before_ok = abs == 0
492 || low_window[..abs]
493 .chars()
494 .next_back()
495 .is_none_or(|c| !c.is_alphanumeric() && c != '_');
496 let after_end = abs + needle.len();
497 let after_char = low_window[after_end..].chars().next();
500 let after_is_assign = matches!(after_char, Some('=') | Some(':'));
501 if before_ok && after_is_assign {
502 return true;
503 }
504 start = abs + needle.len().max(1);
505 }
506 false
507}
508
509fn is_pure_hex(token: &str) -> bool {
518 let hex_part = token
519 .strip_prefix("0x")
520 .or(token.strip_prefix("0X"))
521 .unwrap_or(token);
522 hex_part.len() >= 8 && hex_part.len() <= 128 && hex_part.bytes().all(|b| b.is_ascii_hexdigit())
523}
524
525fn is_base64_content_hash(token: &str) -> bool {
541 const VENDOR_PREFIXES: &[&str] = &[
544 "sk-",
545 "rk_live_",
546 "fm2_",
547 "vercel_",
548 "xoxb-",
549 "xoxa-",
550 "xoxp-",
551 "xoxr-",
552 "xoxs-",
553 "ghp_",
554 "gho_",
555 "github_pat_",
556 "AKIA",
557 "ASIA",
558 "AGE-SECRET-KEY-",
559 "FlyV1",
560 ];
561 if VENDOR_PREFIXES.iter().any(|p| token.starts_with(p)) {
562 return false;
563 }
564 let body = if let Some(rest) = token.strip_prefix("sha") {
567 let dash = rest.find('-').unwrap_or(rest.len());
569 let digits = &rest[..dash];
570 if !digits.is_empty() && digits.bytes().all(|b| b.is_ascii_digit()) && dash < rest.len() {
571 &rest[dash + 1..] } else {
573 return false; }
575 } else {
576 return false; };
578 let stripped = body.trim_end_matches('=');
580 let pad_removed = body.len() - stripped.len();
581 if pad_removed > 2 {
582 return false;
583 }
584 let n = stripped.len();
586 if n != 43 && n != 64 && !(86..=88).contains(&n) {
587 return false;
588 }
589 stripped
591 .bytes()
592 .all(|b| b.is_ascii_alphanumeric() || b == b'+' || b == b'/' || b == b'-' || b == b'_')
593}
594
595fn is_uuid_canonical(s: &str) -> bool {
597 let b = s.as_bytes();
598 if b.len() != 36 {
599 return false;
600 }
601 b[8] == b'-'
602 && b[13] == b'-'
603 && b[18] == b'-'
604 && b[23] == b'-'
605 && b[..8].iter().all(|c| c.is_ascii_hexdigit())
606 && b[9..13].iter().all(|c| c.is_ascii_hexdigit())
607 && b[14..18].iter().all(|c| c.is_ascii_hexdigit())
608 && b[19..23].iter().all(|c| c.is_ascii_hexdigit())
609 && b[24..].iter().all(|c| c.is_ascii_hexdigit())
610}
611
612fn strip_delimiters(s: &str) -> &str {
614 s.trim_matches(|c| matches!(c, '"' | '\'' | '`' | ':' | '=' | ',' | ';'))
615}
616
617fn extract_token(s: &str) -> &str {
621 let end = s
622 .find(|c: char| c.is_whitespace() || c == '\n' || c == '\r')
623 .unwrap_or(s.len());
624 &s[..end]
625}
626
627fn shannon_entropy(bytes: &[u8]) -> f64 {
631 if bytes.is_empty() {
632 return 0.0;
633 }
634 let mut counts = [0u32; 256];
635 for &b in bytes {
636 counts[b as usize] += 1;
637 }
638 let len = bytes.len() as f64;
639 counts
640 .iter()
641 .filter(|&&c| c > 0)
642 .map(|&c| {
643 let p = c as f64 / len;
644 -p * p.log2()
645 })
646 .sum()
647}
648
649fn build_match(detector: &'static str, candidate: &str) -> SecretMatch {
654 let chars: Vec<char> = candidate.chars().collect();
655 let preview: String = chars.iter().take(6).collect();
656 let masked = format!("{}...{}chars", preview, chars.len());
657 SecretMatch { detector, masked }
658}
659
660#[cfg(test)]
663mod tests {
664 use super::*;
665
666 #[test]
669 fn blocks_aws_akia() {
670 let fake = "AKIAFAKEKEY1234567890";
672 assert!(scan(fake).is_some(), "AKIA must be caught");
673 let m = scan(fake).unwrap();
674 assert_eq!(m.detector, "aws-access-key-id");
675 assert!(
677 !m.masked.contains("FAKEKEY1234567890"),
678 "must not echo the secret: {}",
679 m.masked
680 );
681 }
682
683 #[test]
684 fn blocks_aws_asia() {
685 let fake = "ASIAFAKEKEY00000000000";
686 let m = scan(fake);
687 assert!(m.is_some(), "ASIA must be caught");
688 assert_eq!(m.unwrap().detector, "aws-access-key-id");
689 }
690
691 #[test]
692 fn blocks_github_ghp() {
693 let fake = "ghp_AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
695 assert!(scan(fake).is_some(), "ghp_ must be caught");
696 }
697
698 #[test]
699 fn blocks_github_gho() {
700 let fake = "gho_BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB";
701 assert!(scan(fake).is_some(), "gho_ must be caught");
702 }
703
704 #[test]
705 fn blocks_github_pat() {
706 let fake = "github_pat_AAAAAABBBBBBCCCCCC";
707 assert!(scan(fake).is_some(), "github_pat_ must be caught");
708 }
709
710 #[test]
711 fn blocks_openai_sk() {
712 let fake = "sk-aaaaaabbbbbbccccccddddddeeeeeeffffgg";
713 assert!(scan(fake).is_some(), "sk- must be caught");
714 }
715
716 #[test]
717 fn blocks_anthropic_sk_ant() {
718 let fake = "sk-ant-api03-AAAAAAAAAAAAAAA";
719 assert!(scan(fake).is_some(), "sk-ant- must be caught");
720 assert_eq!(scan(fake).unwrap().detector, "anthropic-api-key");
721 }
722
723 #[test]
724 fn blocks_stripe_live() {
725 let fake = "sk_live_FAKESTRIPE0000000000000"; assert!(scan(fake).is_some(), "sk_live_ must be caught");
727 assert_eq!(scan(fake).unwrap().detector, "stripe-secret-key");
728 }
729
730 #[test]
731 fn blocks_stripe_restricted() {
732 let fake = "rk_live_FAKESTRIPE0000000000000"; assert!(scan(fake).is_some(), "rk_live_ must be caught");
734 assert_eq!(scan(fake).unwrap().detector, "stripe-restricted-key");
735 }
736
737 #[test]
738 fn blocks_fly_flyv1() {
739 let fake = "FlyV1 FAKEFLYTOKEN000000000000000000";
740 assert!(scan(fake).is_some(), "FlyV1 must be caught");
741 assert_eq!(scan(fake).unwrap().detector, "fly-token");
742 }
743
744 #[test]
745 fn blocks_fly_fm2() {
746 let fake = "fm2_FAKEFLYTOKEN00000000000000000";
747 assert!(scan(fake).is_some(), "fm2_ must be caught");
748 assert_eq!(scan(fake).unwrap().detector, "fly-token");
749 }
750
751 #[test]
752 fn blocks_vercel_token() {
753 let fake = "vercel_FAKETOKEN00000000000000000";
754 assert!(scan(fake).is_some(), "vercel_ must be caught");
755 assert_eq!(scan(fake).unwrap().detector, "vercel-token");
756 }
757
758 #[test]
759 fn blocks_slack_xoxb() {
760 let fake = "xoxb-FAKE-SLACKTOKEN-000000000000000000000000";
761 assert!(scan(fake).is_some(), "xoxb- must be caught");
762 assert_eq!(scan(fake).unwrap().detector, "slack-token");
763 }
764
765 #[test]
766 fn blocks_pem_private_key() {
767 let header = ["-----BEGIN RSA", " PRIVATE KEY-----"].concat(); let fake = format!("{}\nMIIEo\u{2026}\n-----END RSA PRIVATE KEY-----", header);
772 assert!(scan(&fake).is_some(), "PEM private key must be caught");
773 assert_eq!(scan(&fake).unwrap().detector, "pem-private-key");
774 }
775
776 #[test]
777 fn blocks_pem_ec_private_key() {
778 let header = ["-----BEGIN EC", " PRIVATE KEY-----"].concat(); let fake = format!("{}\nMHQCAQEE\u{2026}\n-----END EC PRIVATE KEY-----", header);
780 assert!(scan(&fake).is_some(), "EC PEM must be caught");
781 }
782
783 #[test]
784 fn blocks_age_secret_key() {
785 let fake = "AGE-SECRET-KEY-1QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ";
787 assert!(scan(fake).is_some(), "AGE-SECRET-KEY- must be caught");
788 assert_eq!(scan(fake).unwrap().detector, "age-secret-key");
789 }
790
791 #[test]
792 fn blocks_jwt_triple() {
793 let fake = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.FAKE_SIG_XXXXXXXXXXXX"; assert!(scan(fake).is_some(), "JWT triple must be caught");
796 assert_eq!(scan(fake).unwrap().detector, "jwt");
797 }
798
799 #[test]
800 fn blocks_url_userinfo() {
801 let fake = "postgresql://dbuser:S3cr3tP4ss@db.example.com:5432/mydb";
802 assert!(scan(fake).is_some(), "URL userinfo must be caught");
803 assert_eq!(scan(fake).unwrap().detector, "url-userinfo");
804 }
805
806 #[test]
807 fn blocks_high_entropy_near_bearer_word() {
808 let fake = "Bearer token: Xk9mZ2vQpLrT8nJwYuAeHfBsDcGiONvM"; assert!(
811 scan(fake).is_some(),
812 "high-entropy value near 'bearer' must be caught"
813 );
814 assert_eq!(scan(fake).unwrap().detector, "high-entropy-token");
815 }
816
817 #[test]
818 fn blocks_high_entropy_near_secret_word() {
819 let fake = "secret=Xk9mZ2vQpLrT8nJwYuAeHfBsDcGiONvM"; assert!(
821 scan(fake).is_some(),
822 "high-entropy value near 'secret' must be caught"
823 );
824 }
825
826 #[test]
827 fn error_message_masks_secret() {
828 let fake = "ghp_AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
829 let m = scan(fake).unwrap();
830 let masked = &m.masked;
833 assert!(
834 !masked.contains("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"),
835 "mask must not echo the full secret value; got: {masked}"
836 );
837 assert!(
839 masked.starts_with("ghp_AA"),
840 "mask must show first 6 chars; got: {masked}"
841 );
842 }
843
844 #[test]
847 fn allows_sha256_hex() {
848 let sha = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
850 assert!(
851 scan(sha).is_none(),
852 "sha256 hex must pass (allowlisted); fired: {:?}",
853 scan(sha)
854 );
855 }
856
857 #[test]
858 fn allows_uuid() {
859 let uuid = "550e8400-e29b-41d4-a716-446655440000";
860 assert!(
861 scan(uuid).is_none(),
862 "UUID must pass; fired: {:?}",
863 scan(uuid)
864 );
865 }
866
867 #[test]
868 fn allows_git_sha() {
869 let sha = "d362950a3c9b1a4cb47d97f1623e38f1a1e6bcdf";
871 assert!(
872 scan(sha).is_none(),
873 "git SHA must pass; fired: {:?}",
874 scan(sha)
875 );
876 }
877
878 #[test]
879 fn allows_normal_prose() {
880 let prose =
881 "The FlashAttention paper introduces IO-aware tiling for transformer self-attention.";
882 assert!(scan(prose).is_none(), "normal prose must pass");
883 }
884
885 #[test]
886 fn allows_code_snippet() {
887 let code = r#"fn create_entity(name: &str, kind: &str) -> RuntimeResult<Entity> {
888 self.validate_entity_kind(kind)?;
889 Ok(Entity::new("local", kind, name))
890}"#;
891 assert!(
892 scan(code).is_none(),
893 "code snippet must pass; fired: {:?}",
894 scan(code)
895 );
896 }
897
898 #[test]
899 fn allows_long_url_without_credentials() {
900 let url = "https://docs.example.com/api/v2/entities?kind=concept&limit=100";
901 assert!(scan(url).is_none(), "URL without userinfo must pass");
902 }
903
904 #[test]
905 fn allows_base64_image_stub() {
906 let b64 = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAAC0lEQVQI12NgAAIABQ";
908 assert!(
909 scan(b64).is_none(),
910 "base64 image stub without trigger word must pass; fired: {:?}",
911 scan(b64)
912 );
913 }
914
915 #[test]
916 fn allows_long_plain_url() {
917 let url = "https://api.github.com/repos/ohdearquant/khive/pulls/76/comments?per_page=100";
918 assert!(
919 scan(url).is_none(),
920 "plain URL must pass; fired: {:?}",
921 scan(url)
922 );
923 }
924
925 #[test]
926 fn allows_manifest_content_hash() {
927 let line =
929 "checksum = \"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\"";
930 assert!(
931 scan(line).is_none(),
932 "manifest content hash line must pass; fired: {:?}",
933 scan(line)
934 );
935 }
936
937 #[test]
938 fn masked_excerpt_format() {
939 let fake = "AKIAFAKEKEY1234567890";
940 let m = scan(fake).unwrap();
941 assert!(m.masked.contains("..."), "masked must contain '...'");
943 assert!(m.masked.ends_with("chars"), "masked must end with 'chars'");
944 }
945
946 #[test]
949 fn check_returns_ok_for_safe_content() {
950 assert!(check("A normal memory note about LoRA.").is_ok());
951 }
952
953 #[test]
954 fn check_returns_err_for_secret() {
955 let fake = "AKIAFAKEKEY1234567890";
956 let result = check(fake);
957 assert!(result.is_err(), "check must fail for AKIA key");
958 let err = result.unwrap_err();
959 assert!(
960 matches!(err, RuntimeError::SecretDetected(_)),
961 "error variant must be SecretDetected"
962 );
963 }
964
965 #[test]
968 fn entropy_of_uniform_string_is_zero() {
969 let s = "aaaaaaaaaaaaaaaa";
970 assert!(shannon_entropy(s.as_bytes()) < 0.01);
971 }
972
973 #[test]
974 fn entropy_of_random_bytes_is_high() {
975 let s = b"X9kZ2vQpLrT8nJwYuAeHfBsDcGiONvM1"; assert!(shannon_entropy(s) > 4.5, "entropy={}", shannon_entropy(s));
978 }
979
980 #[test]
981 fn allowlist_passes_sha256() {
982 let sha = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
985 assert!(is_pure_hex(sha));
986 }
987
988 #[test]
989 fn allowlist_passes_uuid_canonical() {
990 assert!(is_uuid_canonical("550e8400-e29b-41d4-a716-446655440000"));
991 }
992
993 #[test]
994 fn allowlist_does_not_pass_mixed_token() {
995 assert!(!is_pure_hex("sk-aaaaaabbbbbbccccccddddddeeeeeeffffgg"));
997 }
998
999 #[test]
1002 fn check_json_blocks_secret_in_object_value() {
1003 let props = serde_json::json!({ "api_key": "AKIAFAKEKEY1234567890" });
1004 assert!(
1005 check_json(&props).is_err(),
1006 "secret in properties object value must be blocked"
1007 );
1008 }
1009
1010 #[test]
1011 fn check_json_blocks_secret_in_nested_object() {
1012 let props = serde_json::json!({ "credentials": { "token": "sk-proj-FAKEKEY00000000000000000000000000000000" } }); assert!(
1014 check_json(&props).is_err(),
1015 "secret in nested properties object must be blocked"
1016 );
1017 }
1018
1019 #[test]
1020 fn check_json_blocks_secret_in_array() {
1021 let props = serde_json::json!(["normal", "AKIAFAKEKEY1234567890"]);
1022 assert!(
1023 check_json(&props).is_err(),
1024 "secret in JSON array must be blocked"
1025 );
1026 }
1027
1028 #[test]
1029 fn check_json_passes_safe_properties() {
1030 let props = serde_json::json!({
1031 "domain": "attention",
1032 "status": "researched",
1033 "year": 2024
1034 });
1035 assert!(
1036 check_json(&props).is_ok(),
1037 "normal properties must pass; fired: {:?}",
1038 check_json(&props).err()
1039 );
1040 }
1041
1042 #[test]
1043 fn check_tags_blocks_credential_tag() {
1044 let tags = vec![
1045 "type:concept".to_string(),
1046 "AKIAFAKEKEY1234567890".to_string(),
1047 ];
1048 assert!(
1049 check_tags(&tags).is_err(),
1050 "credential-shaped tag must be blocked"
1051 );
1052 }
1053
1054 #[test]
1055 fn check_tags_passes_normal_tags() {
1056 let tags = vec!["type:concept".to_string(), "domain:attention".to_string()];
1057 assert!(
1058 check_tags(&tags).is_ok(),
1059 "normal tags must pass; fired: {:?}",
1060 check_tags(&tags).err()
1061 );
1062 }
1063
1064 #[test]
1067 fn allows_sk_learn_prose() {
1068 let texts = &[
1070 "sk-learn is a Python machine learning library",
1071 "sk-learn-compatible transformer pipeline reference",
1072 "sk-learn scikit-learn estimator interface",
1073 ];
1074 for t in texts {
1075 assert!(
1076 scan(t).is_none(),
1077 "sk-learn prose must pass; fired: {:?} on {:?}",
1078 scan(t),
1079 t
1080 );
1081 }
1082 }
1083
1084 #[test]
1085 fn blocks_openai_sk_proj_not_confused_with_sk_learn() {
1086 let fake = "sk-proj-FAKEKEY00000000000000000000000000000000"; assert!(
1089 scan(fake).is_some(),
1090 "sk-proj- key must still be caught after sk-learn exemption"
1091 );
1092 }
1093
1094 #[test]
1097 fn allows_sri_hash() {
1098 let line = "integrity key: sha384-oqVuAfXRKap7fdgcCY5uykM6+R9GqQ8K/uxy9rx7HNQlGYl1kPzQho1wx4JwY8wC";
1101 assert!(
1102 scan(line).is_none(),
1103 "SRI hash must pass; fired: {:?}",
1104 scan(line)
1105 );
1106 }
1107
1108 #[test]
1109 fn allows_base64_tokenizer_hash_metadata() {
1110 let line = "tokenizer_vocab_hash: Xk9mZ2vQpLrT8nJwYuAeHfBsDcGiONvM"; assert!(
1113 scan(line).is_none(),
1114 "tokenizer hash metadata must pass; fired: {:?}",
1115 scan(line)
1116 );
1117 }
1118
1119 #[test]
1120 fn allows_npm_lockfile_integrity() {
1121 let body_86 = "Xk9mZ2vQpLrT8nJwYuAeHfBsDcGiONvM1234567890abcdefghijklmnopqrstuvwxABCDEFGHIJKLMNOPQRST";
1124 assert_eq!(body_86.len(), 86, "test body must be exactly 86 chars");
1125 let line = format!(
1126 "resolved: https://registry.npmjs.org/foo/-/foo-1.0.0.tgz\nintegrity: sha512-{body_86}=="
1127 );
1128 assert!(
1129 scan(&line).is_none(),
1130 "npm lockfile integrity must pass; fired: {:?}",
1131 scan(&line)
1132 );
1133 }
1134
1135 #[test]
1138 fn allows_tokenizer_vocab_hash_no_block() {
1139 let line = "tokenizer_vocab_hash = Xk9mZ2vQpLrT8nJwYuAeHfBsDcGiONvM"; assert!(
1144 scan(line).is_none(),
1145 "tokenizer_vocab_hash must pass; 'token' is only standalone-word matched; fired: {:?}",
1146 scan(line)
1147 );
1148 }
1149
1150 #[test]
1153 fn blocks_bare_base64url_43chars_near_key() {
1154 let token_43 = "wJalrXUtnFEMI-K7MDENGbPxRfiCYEXAMPLEKEYX123"; assert_eq!(token_43.len(), 43, "test token must be exactly 43 chars");
1158 let line = format!("api key {token_43}");
1159 assert!(
1160 scan(&line).is_some(),
1161 "43-char base64url token near 'key' must be caught (no sha-prefix = not a hash); fired: {:?}",
1162 scan(&line)
1163 );
1164 }
1165
1166 #[test]
1167 fn blocks_bare_base64url_64chars_near_secret() {
1168 let token_64 = "wJalrXUtnFEMI-K7MDENGbPxRfiCYEXAMPLEKEYX123wJalrXUtnFEMI-K7MDENa"; assert_eq!(token_64.len(), 64, "test token must be exactly 64 chars");
1172 let line = format!("secret: {token_64}");
1173 assert!(
1174 scan(&line).is_some(),
1175 "64-char base64url token near 'secret' must be caught; got: {:?}",
1176 scan(&line)
1177 );
1178 }
1179
1180 #[test]
1181 fn blocks_bare_base64url_86chars_near_auth() {
1182 let token_86 = "wJalrXUtnFEMI-K7MDENGbPxRfiCYEXAMPLEKEYX123wJalrXUtnFEMI-K7MDENwJalrXUtnFEMI-K7MDENabc"; assert_eq!(token_86.len(), 86, "test token must be exactly 86 chars");
1186 let line = format!("auth header {token_86}");
1187 assert!(
1188 scan(&line).is_some(),
1189 "86-char base64url token near 'auth' must be caught; got: {:?}",
1190 scan(&line)
1191 );
1192 }
1193
1194 #[test]
1197 fn blocks_service_token_opaque_value() {
1198 let opaque = "Xk9mZ2vQpLrT8nJwYuAeHfBsDcGiONvMabcdef"; assert!(
1202 opaque.len() >= 24,
1203 "opaque must be long enough for entropy check"
1204 );
1205 let line = format!("service token {opaque}");
1206 assert!(
1207 scan(&line).is_some(),
1208 "service token <opaque> must be caught by standalone 'token' check; got: {:?}",
1209 scan(&line)
1210 );
1211 }
1212
1213 #[test]
1214 fn blocks_token_equals_credential() {
1215 let opaque = "Xk9mZ2vQpLrT8nJwYuAeHfBsDcGiONvMabcdef"; let line = format!("token={opaque}");
1218 assert!(
1219 scan(&line).is_some(),
1220 "token=<value> must be caught via token= trigger; got: {:?}",
1221 scan(&line)
1222 );
1223 }
1224
1225 #[test]
1226 fn blocks_token_colon_credential() {
1227 let opaque = "Xk9mZ2vQpLrT8nJwYuAeHfBsDcGiONvMabcdef"; let line = format!("token: {opaque}");
1230 assert!(
1231 scan(&line).is_some(),
1232 "token: <value> must be caught via token: trigger; got: {:?}",
1233 scan(&line)
1234 );
1235 }
1236
1237 #[test]
1238 fn allows_next_token_technical_context() {
1239 let line = "next_token: cursor-page-2-abcdef12345678";
1242 assert!(
1243 scan(line).is_none(),
1244 "next_token technical context must not be blocked; fired: {:?}",
1245 scan(line)
1246 );
1247 }
1248
1249 #[test]
1252 fn allows_next_token_high_entropy_cursor() {
1253 let cursor = "Xk9mZ2vQpLrT8nJwYuAeHfBsDcGiONvMabcdef"; let line = format!("next_token: {cursor}");
1257 assert!(
1258 scan(&line).is_none(),
1259 "next_token with high-entropy cursor must pass (compound identifier); fired: {:?}",
1260 scan(&line)
1261 );
1262 }
1263
1264 #[test]
1265 fn allows_token_count_high_entropy() {
1266 let opaque = "Xk9mZ2vQpLrT8nJwYuAeHfBsDcGiONvMabcdef"; let line = format!("token_count: {opaque}");
1271 assert!(
1272 scan(&line).is_none(),
1273 "token_count with high-entropy value must pass; fired: {:?}",
1274 scan(&line)
1275 );
1276 }
1277
1278 #[test]
1293 fn hex_near_key_blocked_in_credential_context() {
1294 let hex32 = "4f9c2e8a1d3b5c7e9f0a2b4d6e8c0a2b";
1299 assert_eq!(hex32.len(), 32);
1300 let line = format!("api key {hex32}");
1301 assert!(
1302 scan(&line).is_some(),
1303 "32-char pure hex near 'api key' must be blocked; got None"
1304 );
1305 }
1306
1307 #[test]
1308 fn hex_credential_lengths_blocked_near_trigger() {
1309 let hex40 = "a3f5c2e9d1b8047e63a1f4c2d5b6e8f1a9c3d2e4";
1311 let hex64 = "1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f1a2b";
1312 let hex128 = format!("{hex64}{hex64}");
1313 assert_eq!(hex40.len(), 40);
1314 assert_eq!(hex64.len(), 64);
1315 assert_eq!(hex128.len(), 128);
1316
1317 for (label, hex) in &[
1318 ("hex40", hex40),
1319 ("hex64", hex64),
1320 ("hex128", hex128.as_str()),
1321 ] {
1322 let line = format!("secret key: {hex}");
1323 assert!(
1324 scan(&line).is_some(),
1325 "{label} near 'secret key' must be blocked; got None"
1326 );
1327 }
1328 }
1329
1330 #[test]
1331 fn hex_blocked_when_trigger_and_hash_word_coexist() {
1332 let hex32 = "4f9c2e8a1d3b5c7e9f0a2b4d6e8c0a2b";
1337 let key_hash_line = format!("api key hash {hex32}");
1338 let secret_sha_line = format!("secret sha {hex32}");
1339 assert!(
1340 scan(&key_hash_line).is_some(),
1341 "'api key hash <hex32>' must be blocked; got None"
1342 );
1343 assert!(
1344 scan(&secret_sha_line).is_some(),
1345 "'secret sha <hex32>' must be blocked; got None"
1346 );
1347 }
1348
1349 #[test]
1350 fn hex_near_sha_context_word_allowed() {
1351 let hex40 = "da39a3ee5e6b4b0d3255bfef95601890afd80709";
1354 let sha_line = format!("sha1: {hex40}");
1355 let commit_line = format!("commit sha {hex40}");
1356 assert!(
1357 scan(&sha_line).is_none(),
1358 "hex40 near 'sha1' context must be allowed; fired: {:?}",
1359 scan(&sha_line)
1360 );
1361 assert!(
1362 scan(&commit_line).is_none(),
1363 "hex40 near 'commit sha' context must be allowed; fired: {:?}",
1364 scan(&commit_line)
1365 );
1366 }
1367
1368 #[test]
1369 fn hex64_near_hash_context_allowed() {
1370 let hex64 = "1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f1a2b";
1373 let sha_line = format!("sha256: {hex64}");
1374 let hash_line = format!("hash value {hex64}");
1375 assert!(
1376 scan(&sha_line).is_none(),
1377 "hex64 near 'sha256' must be allowed; fired: {:?}",
1378 scan(&sha_line)
1379 );
1380 assert!(
1381 scan(&hash_line).is_none(),
1382 "hex64 near 'hash' must be allowed; fired: {:?}",
1383 scan(&hash_line)
1384 );
1385 }
1386
1387 #[test]
1388 fn blocks_high_entropy_hex_like_token_near_key() {
1389 let mixed = "Xk9mZ2vQpLrT8nJwYuAeHfBsDcGiONvM"; assert!(!is_pure_hex(mixed), "test token must not be pure hex");
1396 let line = format!("api key {mixed}");
1397 assert!(
1398 scan(&line).is_some(),
1399 "mixed-charset high-entropy token near 'api key' must be caught; got: {:?}",
1400 scan(&line)
1401 );
1402 }
1403
1404 #[test]
1405 fn allows_hex40_without_trigger() {
1406 let hex40 = "da39a3ee5e6b4b0d3255bfef95601890afd80709";
1409 let line = format!("commit: {hex40}");
1410 assert!(
1411 scan(&line).is_none(),
1412 "40-char hex without trigger word must pass; fired: {:?}",
1413 scan(&line)
1414 );
1415 }
1416
1417 #[test]
1420 fn check_json_blocks_secret_in_object_key() {
1421 let props = serde_json::json!({ "ghp_FakeGitHubToken0000000000000000000": "redacted" }); assert!(
1424 check_json(&props).is_err(),
1425 "credential as JSON object key must be blocked"
1426 );
1427 }
1428
1429 #[test]
1430 fn check_json_blocks_nested_secret_key() {
1431 let props = serde_json::json!({
1433 "metadata": {
1434 "AKIAFAKEKEY000000000": "value" }
1436 });
1437 assert!(
1438 check_json(&props).is_err(),
1439 "nested credential as JSON object key must be blocked"
1440 );
1441 }
1442
1443 #[test]
1446 fn pem_masked_excerpt_reflects_block_length_not_rest_of_string() {
1447 let header = ["-----BEGIN RSA", " PRIVATE KEY-----"].concat(); let fake = format!(
1449 "{}\nMIIEo\u{2026}\n-----END RSA PRIVATE KEY-----\nsome trailing text that is very long",
1450 header
1451 );
1452 let m = scan(&fake).unwrap();
1453 assert_eq!(m.detector, "pem-private-key");
1454 let full_len = fake.chars().count();
1459 let reported_len: usize = m
1460 .masked
1461 .trim_end_matches("chars")
1462 .rsplit("...")
1463 .next()
1464 .and_then(|s| s.parse().ok())
1465 .unwrap_or(full_len + 1);
1466 assert!(
1467 reported_len < full_len,
1468 "masked length ({reported_len}) should be less than full string length ({full_len})"
1469 );
1470 }
1471}