include!(concat!(env!("OUT_DIR"), "/patterns.rs"));
use std::collections::{BTreeMap, btree_map};
use std::panic;
use std::sync::Arc;
use super::parse_grok::FatalError;
use onig::{Captures, Regex};
use thiserror::Error;
const MAX_RECURSION: usize = 1024;
const GROK_PATTERN: &str = r"%\{(?<name>(?<pattern>[A-z0-9]+)(?::(?<alias>[A-z0-9_:;\/\s\.]+))?)(?:=(?<definition>(?:(?:[^{}]+|\.+)+)+))?\}";
const NAME_INDEX: usize = 1;
const PATTERN_INDEX: usize = 2;
const ALIAS_INDEX: usize = 3;
const DEFINITION_INDEX: usize = 4;
#[derive(Debug)]
pub struct Matches<'a> {
captures: Captures<'a>,
names: &'a BTreeMap<String, usize>,
}
impl<'a> Matches<'a> {
pub fn new(captures: Captures<'a>, names: &'a BTreeMap<String, usize>) -> Self {
Matches { captures, names }
}
pub fn iter(&'a self) -> MatchesIter<'a> {
MatchesIter {
captures: &self.captures,
names: self.names.iter(),
}
}
}
pub struct MatchesIter<'a> {
captures: &'a Captures<'a>,
names: btree_map::Iter<'a, String, usize>,
}
impl<'a> Iterator for MatchesIter<'a> {
type Item = (&'a str, &'a str);
fn next(&mut self) -> Option<Self::Item> {
self.names.next().map(|(k, v)| {
let key = k.as_str();
let value = self.captures.at(*v).unwrap_or("");
(key, value)
})
}
}
#[derive(Clone, Debug)]
pub struct Pattern {
regex: Arc<Regex>,
names: BTreeMap<String, usize>,
}
impl Pattern {
fn new(regex: &str, alias: &BTreeMap<String, String>) -> Result<Self, Error> {
match Regex::new(regex) {
Ok(r) => Ok({
let mut names: BTreeMap<String, usize> = BTreeMap::new();
r.foreach_name(|cap_name, cap_idx| {
let name = match alias.iter().find(|&(_k, v)| *v == cap_name) {
Some(item) => item.0.clone(),
None => String::from(cap_name),
};
names.insert(name, cap_idx[0] as usize);
true
});
Pattern {
regex: Arc::new(r),
names,
}
}),
Err(_) => Err(Error::RegexCompilationFailed(regex.into())),
}
}
#[inline]
pub fn match_against<'a>(&'a self, text: &'a str) -> Result<Option<Matches<'a>>, FatalError> {
let result = panic::catch_unwind(|| self.regex.captures(text));
match result {
Ok(Some(cap)) => Ok(Some(Matches::new(cap, &self.names))),
Ok(None) => Ok(None),
Err(_) => Err(FatalError::RegexEngineError),
}
}
}
#[derive(Debug)]
pub struct Grok {
definitions: BTreeMap<String, String>,
}
impl Grok {
pub fn with_patterns() -> Self {
let mut grok = Grok {
definitions: BTreeMap::new(),
};
for &(key, value) in PATTERNS {
grok.insert_definition(String::from(key), String::from(value));
}
grok
}
pub fn insert_definition<S: Into<String>>(&mut self, name: S, pattern: S) {
self.definitions.insert(name.into(), pattern.into());
}
pub fn compile(&mut self, pattern: &str, with_alias_only: bool) -> Result<Pattern, Error> {
let mut named_regex = String::from(pattern);
let mut alias: BTreeMap<String, String> = BTreeMap::new();
let mut index = 0;
let mut iteration_left = MAX_RECURSION;
let mut continue_iteration = true;
let grok_regex = match Regex::new(GROK_PATTERN) {
Ok(r) => r,
Err(_) => return Err(Error::RegexCompilationFailed(GROK_PATTERN.into())),
};
while continue_iteration {
continue_iteration = false;
if iteration_left == 0 {
return Err(Error::RecursionTooDeep(MAX_RECURSION));
}
iteration_left -= 1;
if let Some(m) = grok_regex.captures(&named_regex.clone()) {
continue_iteration = true;
let raw_pattern = match m.at(PATTERN_INDEX) {
Some(p) => p,
None => {
return Err(Error::GenericCompilationFailure(
"Could not find pattern in matches".into(),
));
}
};
let mut name = match m.at(NAME_INDEX) {
Some(n) => String::from(n),
None => {
return Err(Error::GenericCompilationFailure(
"Could not find name in matches".into(),
));
}
};
if let Some(definition) = m.at(DEFINITION_INDEX) {
self.insert_definition(raw_pattern, definition);
name = format!("{name}={definition}");
}
for _ in 0..named_regex.matches(&format!("%{{{name}}}")).count() {
let pattern_definition = match self.definitions.get(raw_pattern) {
Some(d) => d,
None => return Err(Error::DefinitionNotFound(String::from(raw_pattern))),
};
let replacement = if with_alias_only && m.at(ALIAS_INDEX).is_none() {
format!("(?:{pattern_definition})")
} else {
alias.insert(
match m.at(ALIAS_INDEX) {
Some(a) => String::from(a),
None => name.clone(),
},
format!("name{index}"),
);
format!("(?<name{index}>{pattern_definition})")
};
named_regex = named_regex.replacen(&format!("%{{{name}}}"), &replacement, 1);
index += 1;
}
}
}
if named_regex.is_empty() {
Err(Error::CompiledPatternIsEmpty(pattern.into()))
} else {
Pattern::new(&named_regex, &alias)
}
}
}
#[derive(Clone, Error, Debug, PartialEq, Eq)]
#[non_exhaustive]
pub enum Error {
#[error("Recursion while compiling reached the limit of {0}")]
RecursionTooDeep(usize),
#[error("The given pattern \"{0}\" ended up compiling into an empty regex")]
CompiledPatternIsEmpty(String),
#[error("The given pattern definition name \"{0}\" could not be found in the definition map")]
DefinitionNotFound(String),
#[error("The given regex \"{0}\" failed compilation in the underlying engine")]
RegexCompilationFailed(String),
#[error("Something unexpected happened during the compilation phase: \"{0}\"")]
GenericCompilationFailure(String),
}