use keyhog_scanner::unicode_hardening::*;
#[test]
fn normalize_removes_combining_diacritical_marks() {
let text = "ghp_e\u{0301}xample";
let normalized = normalize_homoglyphs(text);
assert!(
normalized.contains("ghp_example"),
"Combining acute accent (U+0301) must be removed; got: {normalized:?}"
);
assert!(!normalized.contains('\u{0301}'));
}
#[test]
fn normalize_removes_multiple_combining_marks_on_base() {
let text = "token=o\u{0308}\u{0304}ther";
let normalized = normalize_homoglyphs(text);
assert!(
normalized.contains("token=other"),
"Multiple combining marks (U+0308, U+0304) must be removed; got: {normalized:?}"
);
assert!(!normalized.contains('\u{0308}'));
assert!(!normalized.contains('\u{0304}'));
}
#[test]
fn normalize_removes_combining_marks_throughout_string() {
let text = "ghp_p\u{0300}a\u{0301}ssw\u{0302}ord";
let normalized = normalize_homoglyphs(text);
assert_eq!(
normalized.as_ref(),
"ghp_password",
"All distributed combining marks must be removed; got: {normalized:?}"
);
assert!(!normalized.contains('\u{0300}'));
assert!(!normalized.contains('\u{0301}'));
assert!(!normalized.contains('\u{0302}'));
}
#[test]
fn normalize_removes_combining_grave_accent() {
let text = "token=a\u{0300}ccountId";
let normalized = normalize_homoglyphs(text);
assert!(
normalized.contains("token=accountId"),
"Combining grave accent (U+0300) must be removed; got: {normalized:?}"
);
assert!(!normalized.contains('\u{0300}'));
}
#[test]
fn normalize_removes_combining_ring_above() {
let text = "sk_live_a\u{030A}pi";
let normalized = normalize_homoglyphs(text);
assert!(
normalized.contains("sk_live_api"),
"Combining ring above (U+030A) must be removed; got: {normalized:?}"
);
assert!(!normalized.contains('\u{030A}'));
}
#[test]
fn normalize_removes_combining_mark_at_boundary() {
let text = "ghp_abcdefghijklmnopqrstuv\u{0308}y";
let normalized = normalize_homoglyphs(text);
assert!(
normalized.ends_with('y') && !normalized.contains('\u{0308}'),
"Combining mark at boundary must be removed; got: {normalized:?}"
);
assert!(!normalized.contains('\u{0308}'));
}
#[test]
fn detect_combining_marks_as_evasion() {
let text = "ghp_e\u{0301}xample";
let attacks = detect_unicode_attacks(text);
assert!(
!attacks.is_empty(),
"Combining marks must be detected as evasion; got empty attacks"
);
assert!(
attacks
.iter()
.any(|a| matches!(a.kind, EvasionKind::Decomposed | EvasionKind::Suspicious)),
"Combining mark (U+0301) must be flagged; attacks={:?}",
attacks
);
}