use crate::path::OwnedValuePath;
use crate::value::{KeyString, Value};
use std::sync::LazyLock;
use std::{
collections::{BTreeMap, HashMap},
convert::TryFrom,
};
use tracing::error;
use super::grok::Grok;
use super::{
ast::{self, Destination, GrokPattern},
grok_filter::GrokFilter,
matchers::{date, date::DateFilter},
parse_grok_pattern::parse_grok_pattern,
};
static GROK_PATTERN_RE: LazyLock<onig::Regex> = LazyLock::new(|| {
onig::Regex::new(r#"%\{(?:[^"\}]|(?<!\\)"(?:\\"|[^"])*(?<!\\)")+\}"#).unwrap()
});
#[derive(Clone, Debug)]
pub struct GrokRule {
pub pattern: super::grok::Pattern,
pub fields: HashMap<String, GrokField>,
}
#[derive(Debug, Clone)]
pub struct GrokField {
pub lookup: OwnedValuePath,
pub filters: Vec<GrokFilter>,
}
#[derive(Debug, Clone)]
pub struct GrokRuleParseContext {
pub regex: String,
pub fields: HashMap<String, GrokField>,
pub aliases: BTreeMap<KeyString, String>,
pub alias_stack: Vec<String>,
}
impl GrokRuleParseContext {
fn append_regex(&mut self, regex: &str) {
self.regex.push_str(regex);
}
fn register_grok_field(&mut self, grok_name: &str, field: GrokField) {
self.fields.insert(grok_name.to_string(), field);
}
fn register_filter(&mut self, grok_name: &str, filter: GrokFilter) {
self.fields
.entry(grok_name.to_string())
.and_modify(|v| v.filters.insert(0, filter));
}
fn new(aliases: BTreeMap<KeyString, String>) -> Self {
Self {
regex: String::new(),
fields: HashMap::new(),
aliases,
alias_stack: vec![],
}
}
fn generate_grok_compliant_name(&mut self) -> String {
format!("grok{}", self.fields.len())
}
}
#[derive(thiserror::Error, Debug, PartialEq, Eq)]
pub enum Error {
#[error("failed to parse grok expression '{}': {}", .0, .1)]
InvalidGrokExpression(String, String),
#[error("invalid arguments for the function '{}'", .0)]
InvalidFunctionArguments(String),
#[error("unknown filter '{}'", .0)]
UnknownFilter(String),
#[error("Circular dependency found in the alias '{}'", .0)]
CircularDependencyInAliasDefinition(String),
}
pub fn parse_grok_rules(
patterns: &[String],
aliases: BTreeMap<KeyString, String>,
) -> Result<Vec<GrokRule>, Error> {
let mut grok = Grok::with_patterns();
patterns
.iter()
.filter(|&r| !r.is_empty())
.map(|r| {
parse_pattern(
r,
&mut GrokRuleParseContext::new(aliases.clone()),
&mut grok,
)
})
.collect::<Result<Vec<GrokRule>, Error>>()
}
fn parse_alias(
name: &str,
definition: &str,
context: &mut GrokRuleParseContext,
) -> Result<(), Error> {
if context.alias_stack.iter().any(|a| a == name) {
return Err(Error::CircularDependencyInAliasDefinition(
context.alias_stack.first().unwrap().to_string(),
));
} else {
context.alias_stack.push(name.to_string());
}
parse_grok_rule(definition, context)?;
context.alias_stack.pop();
Ok(())
}
fn parse_pattern(
pattern: &str,
context: &mut GrokRuleParseContext,
grok: &mut Grok,
) -> Result<GrokRule, Error> {
parse_grok_rule(pattern, context)?;
let pattern = [
r"(?m)\A", &context
.regex
.replace("(?s)", "(?m)")
.replace("(?-s)", "(?-m)"),
r"\z",
]
.concat();
let pattern = grok
.compile(&pattern, true)
.map_err(|e| Error::InvalidGrokExpression(pattern, e.to_string()))?;
Ok(GrokRule {
pattern,
fields: context.fields.clone(),
})
}
fn parse_grok_rule(rule: &str, context: &mut GrokRuleParseContext) -> Result<(), Error> {
let mut regex_i = 0;
for (start, end) in GROK_PATTERN_RE.find_iter(rule) {
context.append_regex(&rule[regex_i..start]);
regex_i = end;
let pattern = parse_grok_pattern(&rule[start..end])
.map_err(|e| Error::InvalidGrokExpression(rule[start..end].to_string(), e))?;
resolve_grok_pattern(&pattern, context)?;
}
context.append_regex(&rule[regex_i..]);
Ok(())
}
fn resolve_grok_pattern(
pattern: &GrokPattern,
context: &mut GrokRuleParseContext,
) -> Result<(), Error> {
let grok_alias = pattern
.destination
.as_ref()
.map(|_| context.generate_grok_compliant_name());
match pattern {
GrokPattern {
destination:
Some(Destination {
path,
filter_fn: Some(filter),
}),
..
} => {
context.register_grok_field(
grok_alias.as_ref().expect("grok alias is not defined"),
GrokField {
lookup: path.clone(),
filters: vec![GrokFilter::try_from(filter)?],
},
);
}
GrokPattern {
destination:
Some(Destination {
path,
filter_fn: None,
}),
..
} => {
context.register_grok_field(
grok_alias.as_ref().expect("grok alias is not defined"),
GrokField {
lookup: path.clone(),
filters: vec![],
},
);
}
_ => {}
}
let match_name = &pattern.match_fn.name;
match context.aliases.get(match_name.as_str()).cloned() {
Some(alias_def) => match &grok_alias {
Some(grok_alias) => {
context.append_regex("(?<");
context.append_regex(grok_alias);
context.append_regex(">");
parse_alias(match_name, &alias_def, context)?;
context.append_regex(")");
}
None => {
parse_alias(match_name, &alias_def, context)?;
}
},
None if match_name == "regex" || match_name == "date" || match_name == "boolean" => {
match &grok_alias {
Some(grok_alias) => {
context.append_regex("(?<");
context.append_regex(grok_alias);
context.append_regex(">");
}
None => {
context.append_regex("(?:"); }
}
resolves_match_function(grok_alias, pattern, context)?;
context.append_regex(")");
}
None => {
context.append_regex("%{");
resolves_match_function(grok_alias.clone(), pattern, context)?;
if let Some(grok_alias) = &grok_alias {
context.append_regex(&format!(":{grok_alias}"));
}
context.append_regex("}");
}
}
Ok(())
}
fn resolves_match_function(
grok_alias: Option<String>,
pattern: &ast::GrokPattern,
context: &mut GrokRuleParseContext,
) -> Result<(), Error> {
let match_fn = &pattern.match_fn;
match match_fn.name.as_ref() {
"regex" => match match_fn.args.as_ref() {
Some(args) if !args.is_empty() => {
if let ast::FunctionArgument::Arg(Value::Bytes(ref b)) = args[0] {
context.append_regex(&String::from_utf8_lossy(b));
return Ok(());
}
Err(Error::InvalidFunctionArguments(match_fn.name.clone()))
}
_ => Err(Error::InvalidFunctionArguments(match_fn.name.clone())),
},
"integer" => {
if let Some(grok_alias) = &grok_alias {
context.register_filter(grok_alias, GrokFilter::Integer);
}
context.append_regex("integerStr");
Ok(())
}
"integerExt" => {
if let Some(grok_alias) = &grok_alias {
context.register_filter(grok_alias, GrokFilter::IntegerExt);
}
context.append_regex("integerExtStr");
Ok(())
}
"number" => {
if let Some(grok_alias) = &grok_alias {
context.register_filter(grok_alias, GrokFilter::Number);
}
context.append_regex("numberStr");
Ok(())
}
"numberExt" => {
if let Some(grok_alias) = &grok_alias {
context.register_filter(grok_alias, GrokFilter::NumberExt);
}
context.append_regex("numberExtStr");
Ok(())
}
"date" => {
match match_fn.args.as_ref() {
Some(args) if !args.is_empty() && args.len() <= 2 => {
if let ast::FunctionArgument::Arg(Value::Bytes(b)) = &args[0] {
let format = String::from_utf8_lossy(b);
let result = date::time_format_to_regex(&format, true)
.map_err(|_e| Error::InvalidFunctionArguments(match_fn.name.clone()))?;
let filter_re = regex::Regex::new(&result.regex).map_err(|error| {
error!(message = "Error compiling regex", regex = %result.regex, %error);
Error::InvalidFunctionArguments(match_fn.name.clone())
})?;
let strp_format = date::convert_time_format(&format).map_err(|error| {
error!(message = "Error compiling regex", regex = %result.regex, %error);
Error::InvalidFunctionArguments(match_fn.name.clone())
})?;
let mut target_tz = None;
if args.len() == 2
&& let ast::FunctionArgument::Arg(Value::Bytes(b)) = &args[1]
{
let tz = String::from_utf8_lossy(b);
date::parse_timezone(&tz).map_err(|error| {
error!(message = "Invalid(unrecognized) timezone", %error);
Error::InvalidFunctionArguments(match_fn.name.clone())
})?;
target_tz = Some(tz.to_string());
}
let filter = GrokFilter::Date(DateFilter {
original_format: format.to_string(),
strp_format,
regex: filter_re,
target_tz,
tz_aware: result.with_tz,
with_tz_capture: result.with_tz_capture,
with_fraction_second: result.with_fraction_second,
});
let grok_re = date::time_format_to_regex(&format, false)
.map_err(|error| {
error!(message = "Invalid time format", format = %format, %error);
Error::InvalidFunctionArguments(match_fn.name.clone())
})?
.regex;
if let Some(grok_alias) = &grok_alias {
context.register_filter(grok_alias, filter);
}
context.append_regex(&grok_re);
return Ok(());
}
Err(Error::InvalidFunctionArguments(match_fn.name.clone()))
}
_ => Err(Error::InvalidFunctionArguments(match_fn.name.clone())),
}
}
grok_pattern_name => {
context.append_regex(grok_pattern_name);
Ok(())
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn supports_escaped_quotes() {
let rules = parse_grok_rules(
&[r#"%{notSpace:field:nullIf("with \"escaped\" quotes")}"#.to_string()],
BTreeMap::new(),
)
.expect("couldn't parse rules");
assert!(matches!(
&rules[0]
.fields
.iter().next()
.expect("invalid grok pattern").1
.filters[0],
GrokFilter::NullIf(v) if *v == r#"with "escaped" quotes"#
));
}
}