use crate::error::{Error, Result};
const HOST_CAPTURE: &str =
r"(?P<host>(?:::[fF]{4}:)?\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|[0-9a-fA-F:]{2,39})";
const HOST_TAG: &str = "<HOST>";
pub fn expand_host(pattern: &str) -> Result<String> {
let count = pattern.matches(HOST_TAG).count();
if count == 0 {
return Err(Error::config(format!(
"pattern missing <HOST> placeholder: {pattern}"
)));
}
if count > 1 {
return Err(Error::config(format!(
"pattern has multiple <HOST> placeholders ({count}): {pattern}"
)));
}
Ok(pattern.replace(HOST_TAG, HOST_CAPTURE))
}
#[derive(Debug, Clone)]
pub enum HostExtractor {
AtStart,
AfterLiteral(String),
BeforeLiteral(String),
Captures,
}
const META_CHARS: &[char] = &[
'\\', '.', '*', '+', '?', '(', ')', '[', ']', '{', '}', '|', '^', '$',
];
pub fn host_extractor(pattern: &str) -> HostExtractor {
let Some(host_pos) = pattern.find(HOST_TAG) else {
return HostExtractor::Captures;
};
let before = &pattern[..host_pos];
let after = &pattern[host_pos + HOST_TAG.len()..];
if before.is_empty() || before.chars().all(|c| c == '^') {
return HostExtractor::AtStart;
}
let lit_before = trailing_literal(before);
if lit_before.len() >= 2 {
let prefix_before_literal = &before[..before.len() - lit_before.len()];
if !prefix_before_literal.contains(&*lit_before) {
return HostExtractor::AfterLiteral(lit_before);
}
}
let lit_after = leading_literal(after);
if lit_after.len() >= 2 && !before.contains(&*lit_after) {
return HostExtractor::BeforeLiteral(lit_after);
}
HostExtractor::Captures
}
fn trailing_literal(s: &str) -> String {
let start = s
.rfind(|c: char| META_CHARS.contains(&c))
.map_or(0, |pos| pos + 1);
s[start..].to_string()
}
fn leading_literal(s: &str) -> String {
let end = s.find(|c: char| META_CHARS.contains(&c)).unwrap_or(s.len());
s[..end].to_string()
}
pub fn literal_prefix(pattern: &str) -> Option<String> {
let host_pos = pattern.find(HOST_TAG)?;
let before = &pattern[..host_pos];
if before.is_empty() {
return None;
}
let meta_chars = &[
'\\', '.', '*', '+', '?', '(', ')', '[', ']', '{', '}', '|', '^', '$',
];
let literal_start = before
.rfind(|c: char| meta_chars.contains(&c))
.map_or(0, |pos| pos + 1);
let trailing = &before[literal_start..];
if trailing.len() >= 3 {
return Some(trailing.to_string());
}
if let Some(longer) = extract_longest_literal(before) {
return Some(longer);
}
if !trailing.is_empty() {
return Some(trailing.to_string());
}
None
}
fn extract_longest_literal(s: &str) -> Option<String> {
let meta_chars = &[
'\\', '.', '*', '+', '?', '(', ')', '[', ']', '{', '}', '|', '^', '$',
];
let mut best = "";
let mut current_start = 0;
for (i, c) in s.char_indices() {
if meta_chars.contains(&c) {
let segment = &s[current_start..i];
if segment.len() > best.len() {
best = segment;
}
current_start = i + c.len_utf8();
}
}
let segment = &s[current_start..];
if segment.len() > best.len() {
best = segment;
}
if best.len() >= 3 {
Some(best.to_string())
} else {
None
}
}
#[cfg(test)]
#[allow(
clippy::panic,
clippy::indexing_slicing,
clippy::unwrap_used,
clippy::needless_pass_by_value
)]
mod tests {
use crate::detect::pattern::{HostExtractor, expand_host, host_extractor, literal_prefix};
#[test]
fn expand_host_ipv4() {
let expanded = expand_host(r"Failed password for .* from <HOST>").unwrap();
assert!(expanded.contains("(?P<host>"));
assert!(!expanded.contains("<HOST>"));
regex::Regex::new(&expanded).unwrap();
}
#[test]
fn expand_host_with_regex() {
let expanded =
expand_host(r"sshd\[\d+\]: Failed password for .* from <HOST> port").unwrap();
let re = regex::Regex::new(&expanded).unwrap();
assert!(re.is_match("sshd[1234]: Failed password for root from 192.168.1.100 port"));
}
#[test]
fn expand_host_ipv6() {
let expanded = expand_host(r"from <HOST>").unwrap();
let re = regex::Regex::new(&expanded).unwrap();
assert!(re.is_match("from 2001:db8::1"));
}
#[test]
fn expand_host_missing() {
let result = expand_host(r"no host placeholder here");
assert!(result.is_err());
}
#[test]
fn expand_host_multiple() {
let result = expand_host(r"<HOST> and <HOST>");
assert!(result.is_err());
}
#[test]
fn literal_prefix_ssh() {
let prefix = literal_prefix(r"sshd\[\d+\]: Failed password for .* from <HOST>");
let p = prefix.unwrap();
assert!(p.contains("from ") || p.contains(" from"), "got: {p}");
}
#[test]
fn literal_prefix_simple() {
let prefix = literal_prefix(r"Connection refused from <HOST>");
assert_eq!(prefix, Some("Connection refused from ".to_string()));
}
#[test]
fn literal_prefix_none() {
let prefix = literal_prefix(r"<HOST> did something");
assert!(prefix.is_none());
}
#[test]
fn literal_prefix_short() {
let prefix = literal_prefix(r".*<HOST>");
assert!(prefix.is_none());
}
#[test]
fn literal_prefix_dot_treated_as_meta() {
let prefix = literal_prefix(r"prefix.thing from <HOST>");
let p = prefix.unwrap();
assert!(
p.contains("thing from "),
"dot should split segments; got: {p}"
);
}
#[test]
fn expand_host_empty_pattern() {
let result = expand_host("");
assert!(result.is_err());
}
#[test]
fn literal_prefix_all_metacharacters() {
let prefix = literal_prefix(r".*\d+\[\d+\]<HOST>");
assert!(prefix.is_none());
}
#[test]
fn literal_prefix_boundary_three_chars() {
let prefix = literal_prefix(r".*abc<HOST>");
assert_eq!(prefix, Some("abc".to_string()));
}
#[test]
fn literal_prefix_boundary_two_chars() {
let prefix = literal_prefix(r".*ab<HOST>");
assert_eq!(prefix, Some("ab".to_string()));
}
#[test]
fn literal_prefix_fallback_too_short() {
let prefix = literal_prefix(r".*a\d+b\w+<HOST>");
assert!(prefix.is_none());
}
#[test]
fn extractor_at_start_bare() {
assert!(matches!(
host_extractor(r"<HOST> - - \["),
HostExtractor::AtStart
));
}
#[test]
fn extractor_at_start_with_caret() {
assert!(matches!(
host_extractor(r"^<HOST> .*"),
HostExtractor::AtStart
));
}
#[test]
fn extractor_after_literal() {
match host_extractor(r"from <HOST> port") {
HostExtractor::AfterLiteral(lit) => assert_eq!(lit, "from "),
other => panic!("expected AfterLiteral, got {other:?}"),
}
}
#[test]
fn extractor_after_literal_with_regex_prefix() {
match host_extractor(r"sshd\[\d+\]: .* from <HOST>") {
HostExtractor::AfterLiteral(lit) => assert!(
lit.contains("from "),
"expected literal containing 'from ', got '{lit}'"
),
other => panic!("expected AfterLiteral, got {other:?}"),
}
}
#[test]
fn extractor_before_literal() {
match host_extractor(r"user .* <HOST> port \d+") {
HostExtractor::BeforeLiteral(lit) => assert_eq!(lit, " port "),
other => panic!("expected BeforeLiteral, got {other:?}"),
}
}
#[test]
fn extractor_before_literal_real_sshd() {
match host_extractor(
r"sshd\[\d+\]: Connection closed by authenticating user .* <HOST> port \d+",
) {
HostExtractor::BeforeLiteral(lit) => assert_eq!(lit, " port "),
other => panic!("expected BeforeLiteral, got {other:?}"),
}
}
#[test]
fn extractor_captures_fallback() {
assert!(matches!(
host_extractor(r"\d+ <HOST> \d+"),
HostExtractor::Captures
));
}
#[test]
fn extractor_after_literal_repeated_keyword() {
match host_extractor(r"from .* from <HOST> port \d+") {
HostExtractor::AfterLiteral(lit) => assert_eq!(lit, " from "),
other => panic!("expected AfterLiteral, got {other:?}"),
}
}
#[test]
fn extractor_bracket_encapsulated() {
assert!(matches!(
host_extractor(r"\[<HOST>\]"),
HostExtractor::Captures
));
}
#[test]
fn extractor_no_host_tag() {
assert!(matches!(
host_extractor(r"no host here"),
HostExtractor::Captures
));
}
}