use super::Token;
use regex::Regex;
use std::sync::LazyLock;
static SINGLE_BRACKET_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\[([a-zA-Z][a-zA-Z0-9_.-]*)\]").unwrap());
static CHAINED_BRACKET_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?:\[([a-zA-Z][a-zA-Z0-9_.-]*)\]\s*){2,}").unwrap());
pub struct BracketContextDetector;
impl BracketContextDetector {
pub fn detect_and_replace(text: &str) -> (String, Vec<Token>) {
if !Self::has_bracket_indicators(text) {
return (text.to_string(), Vec::new());
}
let mut result = text.to_string();
let mut tokens = Vec::new();
Self::apply_chained_bracket_pattern(&mut result, &mut tokens);
Self::apply_single_bracket_pattern(&mut result, &mut tokens);
(result, tokens)
}
fn has_bracket_indicators(text: &str) -> bool {
text.contains('[') && text.contains(']') &&
!text.contains("[2001:") && !text.contains("array[") && !text.contains("index[") && !text.contains("param=") && !text.contains("[1 +") && !text.contains("[0-9") && !Self::has_kubernetes_indicators(text)
}
fn has_kubernetes_indicators(text: &str) -> bool {
text.contains("kubernetes.io/") ||
text.contains("namespace/") ||
text.contains("pod/") ||
text.contains("service/") ||
text.contains("configmap/") ||
text.contains("secret/") ||
text.contains("deployment/") ||
text.contains("volumes/") ||
text.contains("projected-") ||
text.contains("volume-subpath") ||
text.contains("projected") ||
text.contains("apiserver") ||
text.contains("kube-") ||
text.contains("kubelet") ||
text.contains("kube-proxy") ||
text.contains("kube-scheduler") ||
text.contains("kube-controller") ||
text.contains("etcd") ||
text.contains("coredns")
}
fn apply_chained_bracket_pattern(text: &mut String, tokens: &mut Vec<Token>) {
let mut processed_indices = std::collections::HashSet::new();
for mat in CHAINED_BRACKET_REGEX.find_iter(text) {
let match_text = mat.as_str();
let contexts = Self::extract_contexts_from_chain(match_text);
if contexts.len() >= 2 && Self::are_logging_contexts(&contexts) {
tokens.push(Token::BracketContext(contexts));
processed_indices.insert(mat.start());
}
}
if !processed_indices.is_empty() {
*text = CHAINED_BRACKET_REGEX
.replace_all(text, |caps: ®ex::Captures| {
let contexts = Self::extract_contexts_from_chain(caps.get(0).unwrap().as_str());
if contexts.len() >= 2 && Self::are_logging_contexts(&contexts) {
"<BRACKET_CONTEXT>".to_string()
} else {
caps.get(0).unwrap().as_str().to_string()
}
})
.to_string();
}
}
fn apply_single_bracket_pattern(text: &mut String, tokens: &mut Vec<Token>) {
*text = SINGLE_BRACKET_REGEX
.replace_all(text, |caps: ®ex::Captures| {
let context = caps.get(1).unwrap().as_str();
if Self::is_logging_context(context) {
tokens.push(Token::BracketContext(vec![context.to_lowercase()]));
"<BRACKET_CONTEXT>".to_string()
} else {
caps.get(0).unwrap().as_str().to_string()
}
})
.to_string();
}
fn extract_contexts_from_chain(chain: &str) -> Vec<String> {
SINGLE_BRACKET_REGEX
.captures_iter(chain)
.map(|cap| cap.get(1).unwrap().as_str().to_lowercase())
.collect()
}
fn is_logging_context(context: &str) -> bool {
let lower_context = context.to_lowercase();
let log_levels = [
"error",
"err",
"warn",
"warning",
"info",
"information",
"debug",
"trace",
"fatal",
"crit",
"critical",
"notice",
"emerg",
"emergency",
"alert",
];
let log_components = [
"upstream",
"downstream",
"proxy",
"ssl",
"tls",
"auth",
"config",
"listener",
"cluster",
"backend",
"frontend",
"handler",
"worker",
"manager",
"service",
"client",
"server",
"connection",
"request",
"response",
"session",
];
let web_modules = [
"mod_jk",
"mod_ssl",
"mod_rewrite",
"mod_security",
"mod_proxy",
"ngx_http",
"core",
"main",
"event",
"http",
];
let system_contexts = [
"kernel", "systemd", "init", "cron", "syslog", "audit", "security", "firewall",
"network", "storage", "memory",
];
log_levels.contains(&lower_context.as_str()) ||
log_components.contains(&lower_context.as_str()) ||
web_modules.iter().any(|&module| lower_context.contains(module)) ||
system_contexts.contains(&lower_context.as_str()) ||
lower_context.ends_with("_service") ||
lower_context.ends_with("_manager") ||
lower_context.ends_with("_client") ||
lower_context.ends_with("_server") ||
lower_context.starts_with("mod_") ||
lower_context.starts_with("ngx_")
}
fn are_logging_contexts(contexts: &[String]) -> bool {
contexts.iter().any(|ctx| Self::is_logging_context(ctx)) &&
!contexts.iter().any(|ctx| {
ctx.chars().all(|c| c.is_ascii_digit()) || ctx.contains(':') || ctx.len() == 1 })
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_apache_mod_jk_detection() {
let apache_line =
"[Sun Dec 04 04:47:44 2005] [error] mod_jk child workerEnv in error state 6";
let (result, tokens) = BracketContextDetector::detect_and_replace(apache_line);
assert!(!tokens.is_empty());
assert!(result.contains("<BRACKET_CONTEXT>"));
let has_error_context = tokens.iter().any(|token| {
if let Token::BracketContext(contexts) = token {
contexts.contains(&"error".to_string())
} else {
false
}
});
assert!(has_error_context);
}
#[test]
fn test_envoy_chained_contexts() {
let envoy_line =
"envoy[12345] [info] [upstream] cluster 'user-service' setting health check";
let (result, tokens) = BracketContextDetector::detect_and_replace(envoy_line);
assert!(!tokens.is_empty());
assert!(result.contains("<BRACKET_CONTEXT>"));
let has_chained = tokens.iter().any(|token| {
if let Token::BracketContext(contexts) = token {
contexts.len() >= 2
} else {
false
}
});
assert!(has_chained);
}
#[test]
fn test_systemd_contexts() {
let systemd_line = "systemd[1]: [info] [unit] Starting network service";
let (result, tokens) = BracketContextDetector::detect_and_replace(systemd_line);
if !tokens.is_empty() {
assert!(result.contains("<BRACKET_CONTEXT>"));
}
}
#[test]
fn test_no_false_positives() {
let non_logging_cases = vec![
"Array access array[index] operation",
"IPv6 address [2001:db8::1]:8080",
"Math expression [1 + 2] = 3",
"URL with query params [param=value]",
];
for test_case in non_logging_cases {
let (result, tokens) = BracketContextDetector::detect_and_replace(test_case);
assert_eq!(result, test_case);
assert_eq!(tokens.len(), 0);
}
}
#[test]
fn test_mixed_bracket_types() {
let mixed_line = "[error] Processing (failed) with {result: null}";
let (_result, tokens) = BracketContextDetector::detect_and_replace(mixed_line);
assert_eq!(tokens.len(), 1);
if let Token::BracketContext(contexts) = &tokens[0] {
assert_eq!(contexts[0], "error");
}
}
#[test]
fn test_context_classification() {
assert!(BracketContextDetector::is_logging_context("error"));
assert!(BracketContextDetector::is_logging_context("upstream"));
assert!(BracketContextDetector::is_logging_context("mod_ssl"));
assert!(!BracketContextDetector::is_logging_context("kubelet"));
assert!(!BracketContextDetector::is_logging_context("123"));
assert!(!BracketContextDetector::is_logging_context("2001:db8"));
assert!(!BracketContextDetector::is_logging_context("a"));
}
}