1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
//! Fast probabilistic gating to reject obvious non-secrets before heavy ML scoring.
//!
//! Uses character diversity and simple bigram analysis to identify high-entropy noise
//! like UUIDs, hashes, and base64-encoded binary that doesn't look like a secret.
/// A tiny statistical gate for fast candidate rejection.
pub struct ProbabilisticGate;
impl ProbabilisticGate {
/// Returns true if the candidate string looks like a potential secret.
/// Returns false if it's almost certainly noise (UUID, hash, etc).
pub fn looks_promising(s: &str) -> bool {
if s.len() < 16 {
return true; // Too short for reliable gating
}
// 1. Check character diversity
let mut unique_chars = 0u128;
let mut count = 0;
for b in s.bytes() {
if b < 128 {
let bit = 1u128 << b;
if unique_chars & bit == 0 {
unique_chars |= bit;
count += 1;
}
}
}
// UUID detection: exactly 4 dashes in 8-4-4-4-12 hex pattern
if s.len() >= 32 && s.len() <= 40 && s.matches('-').count() == 4 {
let parts: Vec<&str> = s.split('-').collect();
if parts.len() == 5
&& parts
.iter()
.all(|p| p.chars().all(|c| c.is_ascii_hexdigit()))
{
return false;
}
}
// Extremely low diversity (e.g. "aaaaaaaaaaaaaaaa") is rejected
if count < 5 {
return false;
}
// 2. Simple bigram check: secrets usually have specific transitions
// (This is a placeholder for a more advanced bigram frequency table)
true
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_looks_promising() {
// Real secrets
assert!(ProbabilisticGate::looks_promising(
"ghp_abcdefghijklmnopqrstuvwxyz1234567890"
));
assert!(ProbabilisticGate::looks_promising(
"sk-proj-abcdefghijklmnopqrstuvwxyz123456"
));
// Obvious noise
assert!(!ProbabilisticGate::looks_promising(
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
));
assert!(!ProbabilisticGate::looks_promising(
"550e8400-e29b-41d4-a716-446655440000"
)); // UUID
}
}