use keyhog_scanner::compiler::{
extract_inner_literals, extract_literal_prefixes, rewrite_alternation_prefix,
split_leading_inline_flag,
};
#[test]
fn alternation_rewrite_basic() {
let out = rewrite_alternation_prefix("(?:ghp_|github_pat_)[a-zA-Z0-9_]{36}", "ghp_", "[gɡ]hp_");
assert_eq!(out.unwrap(), "[gɡ]hp_[a-zA-Z0-9_]{36}");
}
#[test]
fn alternation_rewrite_with_inline_flag() {
let out = rewrite_alternation_prefix(
"(?i)(?:ghp_|github_pat_)[a-zA-Z0-9_]{36}",
"ghp_",
"[gɡ]hp_",
);
assert_eq!(out.unwrap(), "(?i)[gɡ]hp_[a-zA-Z0-9_]{36}");
}
#[test]
fn alternation_rewrite_with_alternative_flag_prefix() {
let out = rewrite_alternation_prefix("(?i:abc|def)\\w+", "abc", "[a]bc");
assert_eq!(out.unwrap(), "[a]bc\\w+");
}
#[test]
fn alternation_rewrite_handles_nested_groups() {
let out = rewrite_alternation_prefix("(?:abc(?:\\d{2})|def)body", "abc", "[a]bc");
assert_eq!(out.unwrap(), "[a]bc(?:\\d{2})body");
}
#[test]
fn alternation_rewrite_preserves_branch_local_suffix() {
let out = rewrite_alternation_prefix(
"(?:-----BEGIN RSA PRIVATE KEY-----[\\s\\S]*?-----END RSA PRIVATE KEY-----|-----BEGIN EC PRIVATE KEY-----[\\s\\S]*?-----END EC PRIVATE KEY-----)",
"-----BEGIN EC PRIVATE KEY-----",
"-----B[EΕ]GIN [EΕ]C PRIV[AΑ]T[EΕ] K[EΕ]Y-----",
);
assert_eq!(
out.unwrap(),
"-----B[EΕ]GIN [EΕ]C PRIV[AΑ]T[EΕ] K[EΕ]Y-----[\\s\\S]*?-----END EC PRIVATE KEY-----"
);
}
#[test]
fn alternation_rewrite_returns_none_for_literal_head() {
let out = rewrite_alternation_prefix("AKIA[A-Z0-9]{16}", "AKIA", "[a]kia");
assert!(out.is_none());
}
#[test]
fn alternation_rewrite_returns_none_for_capturing_full_pattern() {
let out = rewrite_alternation_prefix(
"(FLWSECK_(?:TEST|LIVE)-[a-f0-9]{32,64}-X)",
"FLWSECK_TEST-",
"FLW[SСS]ECK_TEST-",
);
assert!(
out.is_none(),
"must not rewrite a capturing-group-around-full-credential; \
a non-None result here matches the prefix anywhere"
);
}
#[test]
fn alternation_rewrite_returns_none_for_singleton_group() {
let out = rewrite_alternation_prefix("(?:foobody)tail", "foo", "[fF]oo");
assert!(out.is_none());
}
#[test]
fn split_leading_inline_flag_parses_common_shapes() {
assert_eq!(split_leading_inline_flag("(?i)body"), ("(?i)", "body"));
assert_eq!(split_leading_inline_flag("(?im)body"), ("(?im)", "body"));
assert_eq!(split_leading_inline_flag("(?ims)body"), ("(?ims)", "body"));
assert_eq!(split_leading_inline_flag("body"), ("", "body"));
assert_eq!(
split_leading_inline_flag("(?:abc|def)body"),
("", "(?:abc|def)body")
);
}
#[test]
fn inner_literal_after_leading_class() {
let lits = extract_inner_literals(r"[a-zA-Z0-9]{20}_AKIA[A-Z0-9]{16}");
assert_eq!(lits, vec!["_AKIA"]);
}
#[test]
fn inner_literal_alternation_branches() {
let lits = extract_inner_literals(r"(?:secret|api_key)\s*=\s*[a-z0-9]{32}");
assert!(lits.iter().any(|s| s == "secret"));
assert!(lits.iter().any(|s| s == "api_key"));
}
#[test]
fn inner_literal_pure_class_yields_empty() {
assert!(extract_inner_literals(r"[a-f0-9]{32}").is_empty());
}
#[test]
fn inner_literal_below_threshold_dropped() {
assert!(extract_inner_literals(r"wx[a-f0-9]{16}").is_empty());
}
#[test]
fn inner_literal_handles_escaped_dot() {
let lits = extract_inner_literals(r"https?://[^/]+\.lambda-url\.[a-z]+\.on\.aws/path");
assert!(
lits.iter().any(|s| s.contains("lambda-url")),
"expected lambda-url in inner literals; got {lits:?}"
);
}
#[test]
fn inner_literal_dedup() {
let lits = extract_inner_literals(r"(?:KEYY|KEYY|other)foo");
let key_count = lits.iter().filter(|s| *s == "KEYY").count();
assert!(key_count <= 1, "expected dedup; got {lits:?}");
}
#[test]
fn inner_literal_garbage_regex_returns_empty() {
assert!(extract_inner_literals(r"[unclosed").is_empty());
}
#[test]
fn inner_literal_corpus_coverage() {
let mut promoted_patterns = 0usize;
let mut total_inner_literals = 0usize;
let mut total_patterns = 0usize;
for (_, toml_str) in keyhog_core::embedded_detector_tomls() {
let Ok(detectors) = keyhog_core::load_detectors_from_str(toml_str) else {
continue;
};
for d in &detectors {
for p in &d.patterns {
total_patterns += 1;
let prefixes = extract_literal_prefixes(&p.regex);
if !prefixes.is_empty() {
continue; }
let inner = extract_inner_literals(&p.regex);
if !inner.is_empty() {
promoted_patterns += 1;
total_inner_literals += inner.len();
}
}
}
}
assert!(
promoted_patterns >= 3,
"expected ≥3 patterns promoted out of fallback via inner-literal extraction; \
got {promoted_patterns} (of {total_patterns} total)"
);
eprintln!(
"inner-literal coverage: {promoted_patterns} patterns promoted out of fallback, \
{total_inner_literals} inner literals added (of {total_patterns} total patterns)"
);
}