const PLACEHOLDER_WORDS: &[&[u8]] = &[
b"example",
b"dummy",
b"fake",
b"sample",
b"placeholder",
b"changeme",
];
use super::{CONFIDENCE_MAX, CONFIDENCE_MIN};
#[inline]
pub fn finalize_confidence(score: f64) -> f64 {
if score.is_nan() {
return CONFIDENCE_MIN;
}
score.clamp(CONFIDENCE_MIN, CONFIDENCE_MAX)
}
pub fn contains_placeholder_word(credential: &str) -> bool {
PLACEHOLDER_WORDS
.iter()
.any(|word| contains_ascii_case_insensitive(credential, word))
}
fn contains_ascii_case_insensitive(haystack: &str, needle: &[u8]) -> bool {
if needle.is_empty() {
return true;
}
haystack
.as_bytes()
.windows(needle.len())
.any(|window| window.eq_ignore_ascii_case(needle))
}
pub fn char_diversity(credential: &str) -> f64 {
let len = credential.len();
if len == 0 {
return 1.0;
}
let mut seen = [false; 256];
let mut unique = 0usize;
for &byte in credential.as_bytes() {
let slot = &mut seen[byte as usize];
if !*slot {
*slot = true;
unique += 1;
}
}
unique as f64 / len as f64
}
pub fn max_repeat_run(credential: &str) -> f64 {
let bytes = credential.as_bytes();
let len = bytes.len();
if len == 0 {
return 0.0;
}
let mut max_run = 1usize;
let mut current_run = 1usize;
for index in 1..len {
if bytes[index] == bytes[index - 1] {
current_run += 1;
if current_run > max_run {
max_run = current_run;
}
} else {
current_run = 1;
}
}
max_run as f64 / len as f64
}
pub(crate) fn is_service_anchored_detector(detector_id: &str) -> bool {
!detector_id.starts_with("generic-")
&& !detector_id.starts_with("entropy-")
&& detector_id != "private-key"
}
pub fn apply_post_ml_penalties(score: f64, credential: &str, is_named: bool) -> f64 {
if credential.is_empty() {
return score;
}
let mut adjusted = score;
if contains_placeholder_word(credential)
|| crate::decode_structure::decoded_contains_placeholder(credential)
{
adjusted *= 0.05;
}
if is_named {
if char_diversity(credential) < 0.1 {
adjusted *= 0.1;
}
if max_repeat_run(credential) > 0.8 {
adjusted *= 0.1;
}
} else {
if char_diversity(credential) < 0.3 {
adjusted *= 0.1;
}
if max_repeat_run(credential) > 0.5 {
adjusted *= 0.1;
}
if crate::decode_structure::is_encoded_binary(credential) {
adjusted *= 0.02;
}
if crate::decode_structure::looks_like_uniform_base64_blob(credential) {
adjusted *= 0.02;
}
}
finalize_confidence(adjusted)
}
pub fn apply_calibration_multiplier(score: f64, detector_id: &str) -> f64 {
use keyhog_core::calibration::Calibration;
use std::sync::OnceLock;
static CALIBRATION: OnceLock<Option<Calibration>> = OnceLock::new();
let calibration = CALIBRATION.get_or_init(|| {
let path = keyhog_core::calibration::default_cache_path()?;
if !path.exists() {
return None;
}
Some(Calibration::load(&path))
});
let Some(calibration) = calibration else {
return finalize_confidence(score);
};
let counters = calibration.counters(detector_id);
if counters.observations() == 0 {
return finalize_confidence(score);
}
let multiplier = counters.posterior_mean();
finalize_confidence(score * multiplier)
}
pub fn apply_path_confidence_penalties(score: f64, path: Option<&str>) -> f64 {
let Some(path) = path else {
return finalize_confidence(score);
};
let is_test_like = path.split(['/', '\\']).any(|component| {
component.eq_ignore_ascii_case("test")
|| component.eq_ignore_ascii_case("tests")
|| component.eq_ignore_ascii_case("example")
|| component.eq_ignore_ascii_case("examples")
|| component.eq_ignore_ascii_case("sample")
|| component.eq_ignore_ascii_case("samples")
|| component.eq_ignore_ascii_case("dummy")
});
let adjusted = if is_test_like { score * 0.5 } else { score };
finalize_confidence(adjusted)
}