use crate::types::{Kind, Match};
use alloc::boxed::Box;
use alloc::vec::Vec;
pub trait Detector: Send + Sync {
fn kind(&self) -> Kind;
fn detect(&self, input: &str, out: &mut Vec<Match>);
}
impl Detector for Box<dyn Detector> {
fn kind(&self) -> Kind {
(**self).kind()
}
fn detect(&self, input: &str, out: &mut Vec<Match>) {
(**self).detect(input, out)
}
}
pub struct FnDetector<F> {
kind: Kind,
f: F,
}
impl<F> FnDetector<F>
where
F: Fn(&str, &mut Vec<Match>) + Send + Sync,
{
pub fn new(kind: Kind, f: F) -> Self {
Self { kind, f }
}
}
impl<F> Detector for FnDetector<F>
where
F: Fn(&str, &mut Vec<Match>) + Send + Sync,
{
fn kind(&self) -> Kind {
self.kind.clone()
}
fn detect(&self, input: &str, out: &mut Vec<Match>) {
(self.f)(input, out)
}
}
#[inline]
fn is_ascii_digit(b: u8) -> bool {
b.is_ascii_digit()
}
#[inline]
fn is_b64url(b: u8) -> bool {
b.is_ascii_alphanumeric() || b == b'-' || b == b'_'
}
#[inline]
fn is_email_atom(b: u8) -> bool {
b.is_ascii_alphanumeric() || matches!(b, b'.' | b'_' | b'%' | b'+' | b'-')
}
#[inline]
fn run<F: Fn(u8) -> bool>(bytes: &[u8], i: usize, pred: F) -> usize {
let mut j = i;
while j < bytes.len() && pred(bytes[j]) {
j += 1;
}
j
}
pub struct Email;
impl Detector for Email {
fn kind(&self) -> Kind {
Kind::Email
}
fn detect(&self, input: &str, out: &mut Vec<Match>) {
let b = input.as_bytes();
let mut i = 0;
while i < b.len() {
if b[i] != b'@' {
i += 1;
continue;
}
let mut start = i;
while start > 0 && is_email_atom(b[start - 1]) {
start -= 1;
}
if start == i || b[start] == b'.' || b[i - 1] == b'.' {
i += 1;
continue;
}
let domain_start = i + 1;
let mut j = domain_start;
let mut last_dot = None;
while j < b.len() {
let c = b[j];
if c.is_ascii_alphanumeric() || c == b'-' {
j += 1;
} else if c == b'.' {
last_dot = Some(j);
j += 1;
} else {
break;
}
}
if let Some(dot) = last_dot {
let tld = &b[dot + 1..j];
if tld.len() >= 2
&& tld.iter().all(|c| c.is_ascii_alphabetic())
&& dot > domain_start
{
out.push(Match::new(Kind::Email, start, j));
i = j;
continue;
}
}
i += 1;
}
}
}
pub struct CreditCard;
fn luhn_ok(digits: &[u8]) -> bool {
let mut sum = 0u32;
let mut alt = false;
for &d in digits.iter().rev() {
let mut v = (d - b'0') as u32;
if alt {
v *= 2;
if v > 9 {
v -= 9;
}
}
sum += v;
alt = !alt;
}
sum % 10 == 0
}
impl Detector for CreditCard {
fn kind(&self) -> Kind {
Kind::CreditCard
}
fn detect(&self, input: &str, out: &mut Vec<Match>) {
let b = input.as_bytes();
let mut i = 0;
while i < b.len() {
if !is_ascii_digit(b[i]) || (i > 0 && is_ascii_digit(b[i - 1])) {
i += 1;
continue;
}
let mut j = i;
let mut digits = [0u8; 19];
let mut dlen = 0usize;
let mut end = i;
while j < b.len() {
if is_ascii_digit(b[j]) {
if dlen < 19 {
digits[dlen] = b[j];
dlen += 1;
}
j += 1;
end = j;
} else if (b[j] == b' ' || b[j] == b'-')
&& j + 1 < b.len()
&& is_ascii_digit(b[j + 1])
{
j += 1;
} else {
break;
}
if dlen > 19 {
break;
}
}
let trailing_digit = end < b.len() && is_ascii_digit(b[end]);
if (13..=19).contains(&dlen) && !trailing_digit && luhn_ok(&digits[..dlen]) {
out.push(Match::new(Kind::CreditCard, i, end));
i = end;
} else {
i += 1;
}
}
}
}
pub struct IpV4;
impl Detector for IpV4 {
fn kind(&self) -> Kind {
Kind::IpV4
}
fn detect(&self, input: &str, out: &mut Vec<Match>) {
let b = input.as_bytes();
let mut i = 0;
while i < b.len() {
if !is_ascii_digit(b[i]) || (i > 0 && (is_ascii_digit(b[i - 1]) || b[i - 1] == b'.')) {
i += 1;
continue;
}
let mut j = i;
let mut octets = 0;
let mut ok = true;
while octets < 4 {
let oct_start = j;
j = run(b, j, is_ascii_digit);
let len = j - oct_start;
if len == 0 || len > 3 {
ok = false;
break;
}
let val: u32 = input[oct_start..j].parse().unwrap_or(999);
if val > 255 {
ok = false;
break;
}
octets += 1;
if octets < 4 {
if j < b.len() && b[j] == b'.' {
j += 1;
} else {
ok = false;
break;
}
}
}
let trailing = j < b.len() && (is_ascii_digit(b[j]) || b[j] == b'.');
if ok && octets == 4 && !trailing {
out.push(Match::new(Kind::IpV4, i, j));
i = j;
} else {
i += 1;
}
}
}
}
pub struct IpV6;
fn looks_like_ipv6(s: &str) -> bool {
if !s.contains(':') {
return false;
}
let double = s.matches("::").count();
if double > 1 {
return false;
}
let groups: Vec<&str> = s.split(':').collect();
if groups.len() < 3 {
return false;
}
let mut hex_groups = 0;
for g in &groups {
if g.is_empty() {
continue; }
if g.len() > 4 || !g.bytes().all(|c| c.is_ascii_hexdigit()) {
return false;
}
hex_groups += 1;
}
if double == 1 {
hex_groups <= 7
} else {
groups.len() == 8 && hex_groups == 8
}
}
impl Detector for IpV6 {
fn kind(&self) -> Kind {
Kind::IpV6
}
fn detect(&self, input: &str, out: &mut Vec<Match>) {
let b = input.as_bytes();
let is_tok = |c: u8| c.is_ascii_hexdigit() || c == b':';
let mut i = 0;
while i < b.len() {
if !is_tok(b[i]) || (i > 0 && is_tok(b[i - 1])) {
i += 1;
continue;
}
let j = run(b, i, is_tok);
let candidate = &input[i..j];
if looks_like_ipv6(candidate) {
out.push(Match::new(Kind::IpV6, i, j));
}
i = j.max(i + 1);
}
}
}
pub struct Jwt;
impl Detector for Jwt {
fn kind(&self) -> Kind {
Kind::Jwt
}
fn detect(&self, input: &str, out: &mut Vec<Match>) {
let b = input.as_bytes();
let mut i = 0;
while i + 3 <= b.len() {
if &b[i..i + 3] != b"eyJ" || (i > 0 && is_b64url(b[i - 1])) {
i += 1;
continue;
}
let seg1 = run(b, i, is_b64url);
if seg1 >= b.len() || b[seg1] != b'.' {
i += 1;
continue;
}
let seg2_start = seg1 + 1;
let seg2 = run(b, seg2_start, is_b64url);
if seg2 == seg2_start || seg2 >= b.len() || b[seg2] != b'.' {
i += 1;
continue;
}
let seg3_start = seg2 + 1;
let seg3 = run(b, seg3_start, is_b64url);
if seg3 == seg3_start {
i += 1;
continue;
}
out.push(Match::new(Kind::Jwt, i, seg3));
i = seg3;
}
}
}
pub struct UsSsn;
impl Detector for UsSsn {
fn kind(&self) -> Kind {
Kind::UsSsn
}
fn detect(&self, input: &str, out: &mut Vec<Match>) {
let b = input.as_bytes();
let n = b.len();
let mut i = 0;
while i + 11 <= n {
let win = &b[i..i + 11];
let shape = win[0..3].iter().all(|c| is_ascii_digit(*c))
&& win[3] == b'-'
&& win[4..6].iter().all(|c| is_ascii_digit(*c))
&& win[6] == b'-'
&& win[7..11].iter().all(|c| is_ascii_digit(*c));
let bounded_left = i == 0 || !is_ascii_digit(b[i - 1]);
let bounded_right = i + 11 == n || !is_ascii_digit(b[i + 11]);
let area = &input[i..i + 3];
let invalid_area = area == "000" || area == "666" || b[i] == b'9';
let group = &input[i + 4..i + 6];
let serial = &input[i + 7..i + 11];
if shape
&& bounded_left
&& bounded_right
&& !invalid_area
&& group != "00"
&& serial != "0000"
{
out.push(Match::new(Kind::UsSsn, i, i + 11));
i += 11;
} else {
i += 1;
}
}
}
}
pub struct MacAddress;
impl Detector for MacAddress {
fn kind(&self) -> Kind {
Kind::MacAddress
}
fn detect(&self, input: &str, out: &mut Vec<Match>) {
let b = input.as_bytes();
let n = b.len();
let mut i = 0;
while i + 17 <= n {
let sep = b[i + 2];
if sep != b':' && sep != b'-' {
i += 1;
continue;
}
let mut ok = true;
for k in 0..6 {
let off = i + k * 3;
if !(b[off].is_ascii_hexdigit() && b[off + 1].is_ascii_hexdigit()) {
ok = false;
break;
}
if k < 5 && b[off + 2] != sep {
ok = false;
break;
}
}
let bounded_left = i == 0 || !b[i - 1].is_ascii_hexdigit();
let bounded_right = i + 17 == n || !(b[i + 17].is_ascii_hexdigit() || b[i + 17] == sep);
if ok && bounded_left && bounded_right {
out.push(Match::new(Kind::MacAddress, i, i + 17));
i += 17;
} else {
i += 1;
}
}
}
}
pub struct AwsAccessKey;
const AWS_PREFIXES: [&[u8; 4]; 7] = [
b"AKIA", b"ASIA", b"AGPA", b"AIDA", b"AROA", b"AIPA", b"ANPA",
];
impl Detector for AwsAccessKey {
fn kind(&self) -> Kind {
Kind::AwsAccessKey
}
fn detect(&self, input: &str, out: &mut Vec<Match>) {
let b = input.as_bytes();
let n = b.len();
let mut i = 0;
while i + 20 <= n {
let prefix = &b[i..i + 4];
let is_prefix = AWS_PREFIXES.iter().any(|p| p.as_slice() == prefix);
let bounded_left = i == 0 || !(b[i - 1].is_ascii_alphanumeric());
if is_prefix
&& bounded_left
&& b[i + 4..i + 20]
.iter()
.all(|c| c.is_ascii_uppercase() || c.is_ascii_digit())
{
let bounded_right = i + 20 == n || !b[i + 20].is_ascii_alphanumeric();
if bounded_right {
out.push(Match::new(Kind::AwsAccessKey, i, i + 20));
i += 20;
continue;
}
}
i += 1;
}
}
}
pub struct UrlCredentials;
impl Detector for UrlCredentials {
fn kind(&self) -> Kind {
Kind::UrlCredentials
}
fn detect(&self, input: &str, out: &mut Vec<Match>) {
let needle = "://";
let mut from = 0;
while let Some(rel) = input[from..].find(needle) {
let sep = from + rel;
let b = input.as_bytes();
let mut scheme_start = sep;
while scheme_start > 0
&& (b[scheme_start - 1].is_ascii_alphanumeric()
|| matches!(b[scheme_start - 1], b'+' | b'.' | b'-'))
{
scheme_start -= 1;
}
let auth_start = sep + needle.len();
let rest = &input[auth_start..];
let at = rest.find('@');
let path = rest.find(['/', '?', '#']);
let has_userinfo = match (at, path) {
(Some(a), Some(p)) => a < p,
(Some(_), None) => true,
_ => false,
};
if scheme_start < sep && has_userinfo {
let at_abs = auth_start + at.unwrap();
let userinfo = &input[auth_start..at_abs];
if userinfo.contains(':') {
out.push(Match::new(Kind::UrlCredentials, auth_start, at_abs));
}
}
from = auth_start;
}
}
}
#[inline]
fn is_token_char(b: u8) -> bool {
b.is_ascii_alphanumeric() || b == b'-' || b == b'_'
}
#[inline]
fn bounded_left(b: &[u8], i: usize) -> bool {
i == 0 || !is_token_char(b[i - 1])
}
#[inline]
fn bounded_right(b: &[u8], end: usize) -> bool {
end >= b.len() || !is_token_char(b[end])
}
fn scan_prefixed(
input: &str,
prefix: &str,
min_body: usize,
max_body: usize,
kind: Kind,
out: &mut Vec<Match>,
) {
let b = input.as_bytes();
let plen = prefix.len();
let pb = prefix.as_bytes();
if plen == 0 || b.len() < plen {
return;
}
let mut i = 0;
while i + plen <= b.len() {
if &b[i..i + plen] == pb && bounded_left(b, i) {
let body_start = i + plen;
let body_end = run(b, body_start, is_token_char);
let body_len = body_end - body_start;
if body_len >= min_body && body_len <= max_body {
out.push(Match::new(kind.clone(), i, body_end));
i = body_end;
continue;
}
}
i += 1;
}
}
pub struct GitHubToken;
impl Detector for GitHubToken {
fn kind(&self) -> Kind {
Kind::GitHubToken
}
fn detect(&self, input: &str, out: &mut Vec<Match>) {
scan_prefixed(input, "github_pat_", 20, 200, Kind::GitHubToken, out);
for p in ["ghp_", "gho_", "ghu_", "ghs_", "ghr_"] {
scan_prefixed(input, p, 30, 255, Kind::GitHubToken, out);
}
}
}
pub struct SlackToken;
impl Detector for SlackToken {
fn kind(&self) -> Kind {
Kind::SlackToken
}
fn detect(&self, input: &str, out: &mut Vec<Match>) {
for p in ["xoxb-", "xoxp-", "xoxa-", "xoxr-", "xoxs-", "xoxo-"] {
scan_prefixed(input, p, 10, 255, Kind::SlackToken, out);
}
}
}
pub struct StripeKey;
impl Detector for StripeKey {
fn kind(&self) -> Kind {
Kind::StripeKey
}
fn detect(&self, input: &str, out: &mut Vec<Match>) {
for p in [
"sk_live_", "sk_test_", "rk_live_", "rk_test_", "pk_live_", "pk_test_",
] {
scan_prefixed(input, p, 10, 255, Kind::StripeKey, out);
}
}
}
pub struct GoogleApiKey;
impl Detector for GoogleApiKey {
fn kind(&self) -> Kind {
Kind::GoogleApiKey
}
fn detect(&self, input: &str, out: &mut Vec<Match>) {
let b = input.as_bytes();
let mut i = 0;
while i + 39 <= b.len() {
if &b[i..i + 4] == b"AIza"
&& bounded_left(b, i)
&& b[i + 4..i + 39].iter().all(|&c| is_token_char(c))
&& bounded_right(b, i + 39)
{
out.push(Match::new(Kind::GoogleApiKey, i, i + 39));
i += 39;
} else {
i += 1;
}
}
}
}
pub struct OpenAiKey;
impl Detector for OpenAiKey {
fn kind(&self) -> Kind {
Kind::OpenAiKey
}
fn detect(&self, input: &str, out: &mut Vec<Match>) {
scan_prefixed(input, "sk-proj-", 20, 255, Kind::OpenAiKey, out);
scan_prefixed(input, "sk-", 20, 255, Kind::OpenAiKey, out);
}
}
pub struct PrivateKey;
impl Detector for PrivateKey {
fn kind(&self) -> Kind {
Kind::PrivateKey
}
fn detect(&self, input: &str, out: &mut Vec<Match>) {
let begin = "-----BEGIN ";
let mut from = 0;
while let Some(rel) = input[from..].find(begin) {
let start = from + rel;
let after = start + begin.len();
let header_end = match input[after..].find("-----") {
Some(h) => after + h,
None => break,
};
let header = &input[after..header_end];
if !header.contains("PRIVATE KEY") {
from = after;
continue;
}
let end_marker = "-----END ";
if let Some(erel) = input[header_end..].find(end_marker) {
let end_label_start = header_end + erel + end_marker.len();
if let Some(drel) = input[end_label_start..].find("-----") {
let end = end_label_start + drel + 5; out.push(Match::new(Kind::PrivateKey, start, end));
from = end;
continue;
}
}
from = after;
}
}
}
pub struct Iban;
fn iban_mod97_ok(s: &str) -> bool {
let bytes = s.as_bytes();
let n = bytes.len();
let mut remainder: u32 = 0;
let order = (4..n).chain(0..4);
for idx in order {
let c = bytes[idx];
if c.is_ascii_digit() {
remainder = (remainder * 10 + (c - b'0') as u32) % 97;
} else if c.is_ascii_uppercase() {
let v = (c - b'A' + 10) as u32; remainder = (remainder * 100 + v) % 97;
} else {
return false;
}
}
remainder == 1
}
impl Detector for Iban {
fn kind(&self) -> Kind {
Kind::Iban
}
fn detect(&self, input: &str, out: &mut Vec<Match>) {
let b = input.as_bytes();
let n = b.len();
let mut i = 0;
while i < n {
let head_ok = i + 4 <= n
&& b[i].is_ascii_uppercase()
&& b[i + 1].is_ascii_uppercase()
&& b[i + 2].is_ascii_digit()
&& b[i + 3].is_ascii_digit()
&& bounded_left(b, i);
if !head_ok {
i += 1;
continue;
}
let mut j = i;
while j < n && (b[j].is_ascii_uppercase() || b[j].is_ascii_digit()) {
j += 1;
}
let len = j - i;
if (15..=34).contains(&len) && bounded_right(b, j) {
let candidate = &input[i..j];
if iban_mod97_ok(candidate) {
out.push(Match::new(Kind::Iban, i, j));
i = j;
continue;
}
}
i += 1;
}
}
}
pub struct PhoneNumber;
impl Detector for PhoneNumber {
fn kind(&self) -> Kind {
Kind::PhoneNumber
}
fn detect(&self, input: &str, out: &mut Vec<Match>) {
let b = input.as_bytes();
let n = b.len();
let is_sep = |c: u8| matches!(c, b'-' | b'.' | b' ' | b'(' | b')');
let mut i = 0;
while i < n {
let plus = b[i] == b'+';
let starts = (plus || b[i].is_ascii_digit() || b[i] == b'(')
&& (i == 0 || !(b[i - 1].is_ascii_digit() || b[i - 1] == b'+'));
if !starts {
i += 1;
continue;
}
let mut j = if plus { i + 1 } else { i };
let mut digits = 0usize;
let mut last_digit_end = i;
let mut internal_seps = 0usize;
let mut seen_digit = false;
while j < n {
let c = b[j];
if c.is_ascii_digit() {
digits += 1;
last_digit_end = j + 1;
seen_digit = true;
j += 1;
} else if is_sep(c) {
if seen_digit {
internal_seps += 1;
}
j += 1;
} else {
break;
}
}
let end = last_digit_end; let trailing_ok = bounded_right(b, end) && !(end < n && b[end] == b'@');
let has_grouping = plus || (internal_seps >= 1 && grouped_between_digits(&b[i..end]));
let plausible = (7..=15).contains(&digits) && has_grouping && trailing_ok;
if plausible {
out.push(Match::new(Kind::PhoneNumber, i, end));
i = end.max(i + 1);
} else {
i = j.max(i + 1);
}
}
}
}
fn grouped_between_digits(s: &[u8]) -> bool {
let is_sep = |c: u8| matches!(c, b'-' | b'.' | b' ' | b'(' | b')');
let mut prev_digit = false;
let mut pending_sep = false;
for &c in s {
if c.is_ascii_digit() {
if pending_sep && prev_digit {
return true;
}
prev_digit = true;
pending_sep = false;
} else if is_sep(c) {
if prev_digit {
pending_sep = true;
}
} else {
prev_digit = false;
pending_sep = false;
}
}
false
}
pub struct HighEntropy {
pub min_len: usize,
pub min_entropy: f64,
}
impl Default for HighEntropy {
fn default() -> Self {
Self {
min_len: 20,
min_entropy: 3.5,
}
}
}
impl HighEntropy {
pub fn new(min_len: usize, min_entropy: f64) -> Self {
Self {
min_len,
min_entropy,
}
}
}
#[cfg(feature = "std")]
fn shannon_entropy(s: &[u8]) -> f64 {
let mut counts = [0usize; 256];
for &c in s {
counts[c as usize] += 1;
}
let len = s.len() as f64;
let mut entropy = 0.0;
for &count in counts.iter() {
if count > 0 {
let p = count as f64 / len;
entropy -= p * (p.log2());
}
}
entropy
}
#[cfg(not(feature = "std"))]
fn shannon_entropy(s: &[u8]) -> f64 {
let mut seen = [false; 256];
let mut distinct = 0usize;
for &c in s {
if !seen[c as usize] {
seen[c as usize] = true;
distinct += 1;
}
}
let bits = (usize::BITS - distinct.next_power_of_two().leading_zeros()) as f64;
bits.max(0.0)
}
impl Detector for HighEntropy {
fn kind(&self) -> Kind {
Kind::GenericSecret
}
fn detect(&self, input: &str, out: &mut Vec<Match>) {
let b = input.as_bytes();
let n = b.len();
let mut i = 0;
while i < n {
if !is_token_char(b[i]) || !bounded_left(b, i) {
i += 1;
continue;
}
let end = run(b, i, is_token_char);
let span = &b[i..end];
if span.len() >= self.min_len {
let has_digit = span.iter().any(|c| c.is_ascii_digit());
let has_alpha = span.iter().any(|c| c.is_ascii_alphabetic());
if has_digit && has_alpha && shannon_entropy(span) >= self.min_entropy {
out.push(Match::new(Kind::GenericSecret, i, end));
}
}
i = end.max(i + 1);
}
}
}