#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CalibratedSeverity {
Critical,
Low,
}
impl CalibratedSeverity {
pub fn as_str(self) -> &'static str {
match self {
CalibratedSeverity::Critical => "Critical",
CalibratedSeverity::Low => "Low",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Calibration {
Emit(CalibratedSeverity),
Skip,
}
const LOW_SEVERITY_DETECTORS: &[&str] = &[
"Box",
"GoogleGeminiAPIKey",
"Honeycomb",
"Myfreshworks",
"Shortcut",
"Splunk Observability Token",
];
const CLASS_NAME_SUBSTRINGS: &[&str] = &[
"BitmapFactory",
"Compat",
"Factory",
"ApprovalScreen",
"PeerCleaner",
];
pub fn calibrate(detector: &str, raw: &str) -> Calibration {
if detector_in_class_name_scope(detector) && looks_like_class_name(raw) {
return Calibration::Skip;
}
if detector.eq_ignore_ascii_case("OpenAI") && !openai_sk_anchored(raw) {
return Calibration::Emit(CalibratedSeverity::Low);
}
if detector.eq_ignore_ascii_case("Discord") && !discord_bot_token_anchored(raw) {
return Calibration::Emit(CalibratedSeverity::Low);
}
if detector_in_low_severity_list(detector) {
return Calibration::Emit(CalibratedSeverity::Low);
}
Calibration::Emit(CalibratedSeverity::Critical)
}
fn detector_in_low_severity_list(detector: &str) -> bool {
LOW_SEVERITY_DETECTORS
.iter()
.any(|name| detector.eq_ignore_ascii_case(name))
}
fn detector_in_class_name_scope(detector: &str) -> bool {
detector.eq_ignore_ascii_case("Box")
|| detector.eq_ignore_ascii_case("Splunk Observability Token")
}
fn looks_like_class_name(raw: &str) -> bool {
let bytes = raw.as_bytes();
let Some(first) = bytes.first() else {
return false;
};
if !first.is_ascii_uppercase() {
return false;
}
if !bytes.iter().all(|b| {
b.is_ascii_alphanumeric() || *b == b'_' || *b == b'$'
}) {
return false;
}
CLASS_NAME_SUBSTRINGS
.iter()
.any(|needle| raw.contains(needle))
}
fn openai_sk_anchored(raw: &str) -> bool {
if raw.starts_with("sk-proj-") {
return true;
}
if raw.len() == 51 && raw.starts_with("sk-") {
return true;
}
false
}
fn discord_bot_token_anchored(raw: &str) -> bool {
let Some((seg1, rest)) = raw.split_once('.') else {
return false;
};
let Some((seg2, seg3)) = rest.split_once('.') else {
return false;
};
if !looks_like_base64url(seg1)
|| !looks_like_base64url(seg2)
|| !looks_like_base64url(seg3)
{
return false;
}
if char_class_count(raw) < 3 {
return false;
}
if normalized_entropy(seg1) < 0.7
|| normalized_entropy(seg2) < 0.7
|| normalized_entropy(seg3) < 0.7
{
return false;
}
true
}
fn looks_like_base64url(s: &str) -> bool {
if s.is_empty() {
return false;
}
s.bytes()
.all(|b| b.is_ascii_alphanumeric() || b == b'-' || b == b'_')
}
fn char_class_count(s: &str) -> u32 {
let mut flags = 0u8;
for b in s.bytes() {
let bit = if b.is_ascii_uppercase() {
1u8
} else if b.is_ascii_lowercase() {
2u8
} else if b.is_ascii_digit() {
4u8
} else if b == b'-' || b == b'_' {
8u8
} else {
0u8
};
flags |= bit;
}
flags.count_ones()
}
fn normalized_entropy(s: &str) -> f64 {
let len = s.len();
if len < 2 {
return 0.0;
}
let mut counts = [0u32; 256];
for b in s.bytes() {
#[allow(clippy::as_conversions, reason = "PROOF: u8 → usize widening, lossless on all targets.")]
let idx = b as usize;
if let Some(c) = counts.get_mut(idx) {
*c = c.saturating_add(1);
}
}
#[allow(
clippy::cast_precision_loss,
clippy::as_conversions,
reason = "PROOF: entropy thresholds tolerate f64 mantissa loss; len/cap are bounded by string length (well within 2^52 for any realistic input). Output range is [0, 1] for normalized entropy."
)]
let len_f = len as f64;
let mut h = 0.0_f64;
for c in counts.iter() {
if *c > 0 {
let p = f64::from(*c) / len_f;
h -= p * p.log2();
}
}
#[allow(
clippy::cast_precision_loss,
clippy::as_conversions,
reason = "PROOF: cap is .min(64), trivially exact in f64."
)]
let cap = len.min(64) as f64;
h / cap.log2()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn box_class_name_skip() {
assert_eq!(
calibrate("Box", "BatteryMonitorFactoryPeerCleaner"),
Calibration::Skip,
);
}
#[test]
fn splunk_class_name_skip() {
assert_eq!(
calibrate("Splunk Observability Token", "SpoofingApprovalScreen"),
Calibration::Skip,
);
}
#[test]
fn honeycomb_bitmap_factory_downgraded_not_skipped() {
assert_eq!(
calibrate("Honeycomb", "HoneycombBitmapFactory"),
Calibration::Emit(CalibratedSeverity::Low),
);
}
#[test]
fn box_real_token_downgraded_not_skipped() {
assert_eq!(
calibrate("Box", "abc123XYZ/=tokenshape="),
Calibration::Emit(CalibratedSeverity::Low),
);
}
#[test]
fn openai_sk_proj_preserves_critical() {
assert_eq!(
calibrate("OpenAI", "sk-proj-abcd1234deadbeefcafe5678"),
Calibration::Emit(CalibratedSeverity::Critical),
);
}
#[test]
fn openai_classic_51char_preserves_critical() {
let key = format!("sk-{}", "a".repeat(48));
assert_eq!(key.len(), 51);
assert_eq!(
calibrate("OpenAI", &key),
Calibration::Emit(CalibratedSeverity::Critical),
);
}
#[test]
fn openai_locale_prefix_downgraded() {
assert_eq!(
calibrate("OpenAI", "sk-Latn-SK"),
Calibration::Emit(CalibratedSeverity::Low),
);
}
#[test]
fn openai_unanchored_downgraded() {
assert_eq!(
calibrate("OpenAI", "sk-something"),
Calibration::Emit(CalibratedSeverity::Low),
);
}
#[test]
fn known_real_critical_preserved() {
assert_eq!(
calibrate("Pendo", "eyJhbGciOiJIUzI1NiJ9.payload.signature"),
Calibration::Emit(CalibratedSeverity::Critical),
);
}
#[test]
fn aws_real_critical_preserved() {
assert_eq!(
calibrate("AWS", "AKIAIOSFODNN7EXAMPLE"),
Calibration::Emit(CalibratedSeverity::Critical),
);
}
#[test]
fn empty_inputs_default_critical() {
assert_eq!(
calibrate("", ""),
Calibration::Emit(CalibratedSeverity::Critical),
);
}
#[test]
fn detector_case_insensitive() {
assert_eq!(
calibrate("BOX", "abc123"),
Calibration::Emit(CalibratedSeverity::Low),
);
assert_eq!(
calibrate("box", "abc123"),
Calibration::Emit(CalibratedSeverity::Low),
);
}
#[test]
fn class_name_lowercase_first_char_not_skipped() {
assert_eq!(
calibrate("Box", "boxapprovalscreen"),
Calibration::Emit(CalibratedSeverity::Low),
);
}
#[test]
fn class_name_with_punctuation_not_skipped() {
assert_eq!(
calibrate("Box", "Factory=foo"),
Calibration::Emit(CalibratedSeverity::Low),
);
}
#[test]
fn calibrated_severity_as_str_round_trips() {
assert_eq!(CalibratedSeverity::Critical.as_str(), "Critical");
assert_eq!(CalibratedSeverity::Low.as_str(), "Low");
}
#[test]
fn unicode_raw_does_not_panic() {
let raw = "Bo\u{1F600}xCompat";
assert_eq!(
calibrate("Box", raw),
Calibration::Emit(CalibratedSeverity::Low),
);
}
#[test]
fn discord_real_token_shape_preserves_critical() {
let raw = "MTAxOTQ4MzgxMjU3Njc0NDgyOA.Gxq3-T.ZX1J2k7N0pQ-vY3Cm8sR5tU9wW";
assert_eq!(
calibrate("Discord", raw),
Calibration::Emit(CalibratedSeverity::Critical),
);
}
#[test]
fn discord_minified_bundle_fp_two_class_downgraded() {
let raw = "myhookcomponentstate1234.dolist.somerandomidentifier1234567";
assert_eq!(
calibrate("Discord", raw),
Calibration::Emit(CalibratedSeverity::Low),
);
}
#[test]
fn discord_three_class_low_entropy_downgraded() {
let raw = "AAAaaa1AAAaaa1AAAaaa1AAA.BBBbb1.CCCccc222CCCccc222CCCccc22";
assert_eq!(
calibrate("Discord", raw),
Calibration::Emit(CalibratedSeverity::Low),
);
}
#[test]
fn discord_two_segment_only_downgraded() {
let raw = "MTAxOTQ4MzgxMjU3Njc0NDgyOA.Gxq3vT";
assert_eq!(
calibrate("Discord", raw),
Calibration::Emit(CalibratedSeverity::Low),
);
}
#[test]
fn discord_empty_segments_downgraded() {
let raw = "MTAxOTQ4MzgxMjU3Njc0NDgyOA..ZX1J2k7N0pQ-vY3Cm8sR5tU9wW";
assert_eq!(
calibrate("Discord", raw),
Calibration::Emit(CalibratedSeverity::Low),
);
}
#[test]
fn discord_invalid_base64url_chars_downgraded() {
let raw = "MTAxOTQ4MzgxMjU3Njc0NDgyOA=.Gxq3vT.ZX1J2k7N0pQ-vY3Cm8sR5tU9wW";
assert_eq!(
calibrate("Discord", raw),
Calibration::Emit(CalibratedSeverity::Low),
);
}
#[test]
fn discord_detector_case_insensitive() {
let real = "MTAxOTQ4MzgxMjU3Njc0NDgyOA.Gxq3-T.ZX1J2k7N0pQ-vY3Cm8sR5tU9wW";
assert_eq!(
calibrate("DISCORD", real),
Calibration::Emit(CalibratedSeverity::Critical),
);
assert_eq!(
calibrate("discord", real),
Calibration::Emit(CalibratedSeverity::Critical),
);
}
#[test]
fn discord_unicode_raw_does_not_panic() {
let raw = "MTAxOTQ4M\u{1F600}gxMjU3Njc0NDgyOA.Gxq3vT.ZX1J2k7N0pQ-vY3Cm8sR5tU9wW";
assert_eq!(
calibrate("Discord", raw),
Calibration::Emit(CalibratedSeverity::Low),
);
}
#[test]
fn char_class_count_counts_all_four_classes() {
assert_eq!(char_class_count("Aa1-"), 4);
assert_eq!(char_class_count("Aa1_"), 4);
}
#[test]
fn char_class_count_counts_three_classes() {
assert_eq!(char_class_count("Aa1"), 3);
assert_eq!(char_class_count("Aa-"), 3);
}
#[test]
fn char_class_count_counts_two_classes() {
assert_eq!(char_class_count("aa1"), 2);
assert_eq!(char_class_count("AA1"), 2);
}
#[test]
fn char_class_count_empty_string_zero() {
assert_eq!(char_class_count(""), 0);
}
#[test]
fn normalized_entropy_uniform_string_near_one() {
let h = normalized_entropy("Aa1-Bb2_");
assert!(
(h - 1.0).abs() < 1e-9,
"expected ~1.0, got {h}"
);
}
#[test]
fn normalized_entropy_all_same_char_zero() {
let h = normalized_entropy("aaaaaaaa");
assert!(h.abs() < 1e-9, "expected 0.0, got {h}");
}
#[test]
fn normalized_entropy_short_string_zero() {
assert!(normalized_entropy("").abs() < 1e-9);
assert!(normalized_entropy("a").abs() < 1e-9);
}
#[test]
fn looks_like_base64url_accepts_alphabet() {
assert!(looks_like_base64url("AaZz09-_"));
}
#[test]
fn looks_like_base64url_rejects_padding() {
assert!(!looks_like_base64url("Aa1="));
}
#[test]
fn looks_like_base64url_rejects_empty() {
assert!(!looks_like_base64url(""));
}
#[test]
fn gemini_api_key_downgraded() {
assert_eq!(
calibrate("GoogleGeminiAPIKey", "AIzaSyABCDEFGHIJKLMNOPQRSTUVWXYZ123456"),
Calibration::Emit(CalibratedSeverity::Low),
);
}
#[test]
fn gemini_case_insensitive() {
assert_eq!(
calibrate("googlegeminiapikey", "abc123"),
Calibration::Emit(CalibratedSeverity::Low),
);
}
#[test]
fn calibrate_exhaustiveness_known_detectors() {
let openai_classic_51 = format!("sk-{}", "a".repeat(48));
#[allow(clippy::type_complexity)]
let cases: &[(&str, &str, Calibration)] = &[
("Box", "abc123", Calibration::Emit(CalibratedSeverity::Low)),
("GoogleGeminiAPIKey", "AIzaSyABC", Calibration::Emit(CalibratedSeverity::Low)),
("Honeycomb", "abc123", Calibration::Emit(CalibratedSeverity::Low)),
("Myfreshworks", "abc123", Calibration::Emit(CalibratedSeverity::Low)),
("Shortcut", "abc123", Calibration::Emit(CalibratedSeverity::Low)),
("Splunk Observability Token", "abc123", Calibration::Emit(CalibratedSeverity::Low)),
("Box", "BatteryMonitorFactoryPeerCleaner", Calibration::Skip),
("Splunk Observability Token", "SpoofingApprovalScreen", Calibration::Skip),
("OpenAI", "sk-proj-abcd1234deadbeefcafe5678", Calibration::Emit(CalibratedSeverity::Critical)),
("OpenAI", &openai_classic_51, Calibration::Emit(CalibratedSeverity::Critical)),
("OpenAI", "sk-Latn-SK", Calibration::Emit(CalibratedSeverity::Low)),
("Discord", "MTAxOTQ4MzgxMjU3Njc0NDgyOA.Gxq3-T.ZX1J2k7N0pQ-vY3Cm8sR5tU9wW",
Calibration::Emit(CalibratedSeverity::Critical)),
("Discord", "myhookcomponentstate1234.dolist.somerandomidentifier1234567",
Calibration::Emit(CalibratedSeverity::Low)),
("Pendo", "eyJhbGciOiJIUzI1NiJ9.payload.sig", Calibration::Emit(CalibratedSeverity::Critical)), ("AWS", "AKIAIOSFODNN7EXAMPLE", Calibration::Emit(CalibratedSeverity::Critical)), ];
for (detector, raw, expected) in cases {
assert_eq!(
calibrate(detector, raw),
*expected,
"calibrate({detector:?}, {raw:?}) routing mismatch — \
update CASES in calibrate_exhaustiveness_known_detectors \
or add a new calibration family",
);
}
}
}