use std::sync::LazyLock;
const MAX_PATTERN_LEN: usize = 64;
#[derive(Clone)]
struct PatternMasks {
pattern: String,
char_masks: [u64; 256],
len: usize,
}
impl PatternMasks {
fn new(pattern: &str) -> Self {
let bytes = pattern.as_bytes();
let len = bytes.len().min(MAX_PATTERN_LEN);
let mut char_masks = [0u64; 256];
for (i, &byte) in bytes.iter().take(len).enumerate() {
char_masks[byte as usize] |= 1u64 << i;
if byte.is_ascii_lowercase() {
char_masks[byte.to_ascii_uppercase() as usize] |= 1u64 << i;
} else if byte.is_ascii_uppercase() {
char_masks[byte.to_ascii_lowercase() as usize] |= 1u64 << i;
}
}
Self {
pattern: pattern.to_string(),
char_masks,
len,
}
}
#[inline]
fn levenshtein(&self, text: &str) -> usize {
if self.len == 0 {
return text.len();
}
if text.is_empty() {
return self.len;
}
let text_bytes = text.as_bytes();
let m = self.len;
let mut vp: u64 = u64::MAX; let mut vn: u64 = 0; let mut score = m;
let pattern_mask = if m >= 64 {
u64::MAX
} else {
(1u64 << m) - 1
};
let last_bit = 1u64 << (m - 1);
for &byte in text_bytes {
let pm = self.char_masks[byte as usize];
let d0 = ((pm & vp).wrapping_add(vp)) ^ vp | pm | vn;
let mut hp = vn | !(d0 | vp);
let hn = d0 & vp;
if hp & last_bit != 0 {
score += 1;
} else if hn & last_bit != 0 {
score = score.saturating_sub(1);
}
hp = (hp << 1) | 1;
let hn_shifted = hn << 1;
vp = hn_shifted | !(d0 | hp);
vn = d0 & hp;
}
score
}
}
pub struct FuzzyMatcher {
patterns: Vec<PatternMasks>,
max_distance: usize,
}
impl FuzzyMatcher {
pub fn new(patterns: &[&str], max_distance: usize) -> Self {
Self {
patterns: patterns.iter().map(|&p| PatternMasks::new(p)).collect(),
max_distance,
}
}
#[inline]
pub fn find_match(&self, text: &str) -> Option<(&str, usize)> {
let text_lower = text.to_ascii_lowercase();
let mut best_match: Option<(&str, usize)> = None;
for pattern in &self.patterns {
let len_diff = (pattern.len as isize - text_lower.len() as isize).unsigned_abs();
if len_diff > self.max_distance {
continue;
}
let distance = pattern.levenshtein(&text_lower);
if distance <= self.max_distance {
match best_match {
None => best_match = Some((&pattern.pattern, distance)),
Some((_, best_dist)) if distance < best_dist => {
best_match = Some((&pattern.pattern, distance));
}
_ => {}
}
if distance == 0 {
return best_match;
}
}
}
best_match
}
#[inline]
pub fn is_fuzzy_match(&self, text: &str) -> bool {
self.find_match(text).is_some()
}
pub fn find_all_matches(&self, text: &str) -> Vec<(&str, usize)> {
let text_lower = text.to_ascii_lowercase();
let mut matches = Vec::new();
for pattern in &self.patterns {
let len_diff = (pattern.len as isize - text_lower.len() as isize).unsigned_abs();
if len_diff > self.max_distance {
continue;
}
let distance = pattern.levenshtein(&text_lower);
if distance <= self.max_distance {
matches.push((&pattern.pattern as &str, distance));
}
}
matches.sort_by_key(|&(_, d)| d);
matches
}
}
pub static AUTH_TERMS: LazyLock<FuzzyMatcher> = LazyLock::new(|| {
FuzzyMatcher::new(
&[
"password",
"secret",
"credential",
"token",
"apikey",
"api_key",
"private_key",
"secret_key",
"access_token",
"auth_token",
"bearer",
"jwt",
"session",
],
2, )
});
pub static SENSITIVE_FIELDS: LazyLock<FuzzyMatcher> = LazyLock::new(|| {
FuzzyMatcher::new(
&[
"password",
"passwd",
"secret",
"private",
"credential",
"auth",
"authorization",
"authenticate",
"token",
"key",
"apikey",
"certificate",
"signature",
],
2,
)
});
#[inline]
pub fn is_fuzzy_auth_like(name: &str) -> Option<(&'static str, usize)> {
AUTH_TERMS.find_match(name)
}
#[inline]
pub fn is_fuzzy_sensitive(name: &str) -> Option<(&'static str, usize)> {
SENSITIVE_FIELDS.find_match(name)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_exact_match() {
let matcher = FuzzyMatcher::new(&["password", "secret"], 2);
assert_eq!(matcher.find_match("password"), Some(("password", 0)));
assert_eq!(matcher.find_match("secret"), Some(("secret", 0)));
}
#[test]
fn test_typo_matches() {
let matcher = FuzzyMatcher::new(&["password"], 2);
assert!(matcher.find_match("passwd").is_some()); assert!(matcher.find_match("passwrd").is_some()); assert!(matcher.find_match("passowrd").is_some()); assert!(matcher.find_match("pasword").is_some()); assert!(matcher.find_match("passw0rd").is_some()); }
#[test]
fn test_case_insensitive() {
let matcher = FuzzyMatcher::new(&["password"], 0);
assert_eq!(matcher.find_match("PASSWORD"), Some(("password", 0)));
assert_eq!(matcher.find_match("Password"), Some(("password", 0)));
}
#[test]
fn test_no_match() {
let matcher = FuzzyMatcher::new(&["password"], 2);
assert!(matcher.find_match("username").is_none()); assert!(matcher.find_match("config").is_none());
}
#[test]
fn test_distance_threshold() {
let matcher = FuzzyMatcher::new(&["password"], 1);
assert!(matcher.find_match("passwd").is_none()); assert!(matcher.find_match("passwort").is_some()); }
#[test]
fn test_prebuilt_auth_terms() {
assert!(is_fuzzy_auth_like("password").is_some());
assert!(is_fuzzy_auth_like("passwd").is_some());
assert!(is_fuzzy_auth_like("passowrd").is_some());
assert!(is_fuzzy_auth_like("api_key").is_some());
assert!(is_fuzzy_auth_like("apikey").is_some());
assert!(is_fuzzy_auth_like("username").is_none());
}
#[test]
fn test_find_all_matches() {
let matcher = FuzzyMatcher::new(&["password", "passwd", "passkey"], 2);
let matches = matcher.find_all_matches("passwd");
assert!(matches.iter().any(|(p, d)| *p == "passwd" && *d == 0));
assert!(matches.iter().any(|(p, _)| *p == "password"));
}
#[test]
fn test_myers_algorithm() {
let pattern = PatternMasks::new("kitten");
assert_eq!(pattern.levenshtein("kitten"), 0);
assert_eq!(pattern.levenshtein("sitten"), 1); assert_eq!(pattern.levenshtein("sittin"), 2); assert_eq!(pattern.levenshtein("sitting"), 3); }
#[test]
fn test_empty_strings() {
let pattern = PatternMasks::new("test");
assert_eq!(pattern.levenshtein(""), 4);
let empty = PatternMasks::new("");
assert_eq!(empty.levenshtein("test"), 4);
assert_eq!(empty.levenshtein(""), 0);
}
}