dissect 0.5.0

Logstash inspired dissect extractor
// Copyright 2018-2022, The Tremor Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Dissect is a library which is loosely based on logstash's dissect. It extracts data from
//! strings.
//!
//! ```rust
//! use dissect::{Pattern, Error};
//! use simd_json::borrowed::Value;
//!
//! let filter = Pattern::compile("%{a} %{b}")?;
//! let input = "John Doe";
//!
//! let output = filter.run(input).unwrap_or_default();
//! let mut expected = halfbrown::HashMap::new();
//! expected.insert("a".into(), Value::from("John"));
//! expected.insert("b".into(), Value::from("Doe"));
//!
//! assert_eq!(output, expected);
//! # Ok::<(), Error>(())
//! ```
//!
//! ### Categories
//!
//! 1) Simple:
//!
//! Named fields can be extracted using the syntax %{<name>} where the given name is then used as a
//! key for the value. The characters between two fields are used as delimiters.
//!
//! ```rust
//! use dissect::{Pattern, Error};
//! use simd_json::borrowed::Value;
//!
//! let output = Pattern::compile("%{name}, %{age}")?;
//! let output = output.run("John Doe, 22").unwrap_or_default();
//! let mut expected = halfbrown::HashMap::new();
//! expected.insert("name".into(), Value::from("John Doe"));
//! expected.insert("age".into(), Value::from("22"));
//!
//! assert_eq!(output, expected);
//! # Ok::<(), Error>(())
//! ```
//!
//! 2) Append (+)
//!
//! The append operator will append the value to another value creating an array.
//!
//! ```rust
//! use dissect::{Pattern, Error};
//! use simd_json::borrowed::Value;
//! let output = Pattern::compile( "%{+name} %{+name}, %{age}")?;
//! let output = output.run("John Doe, 22").unwrap_or_default();
//! let mut expected = halfbrown::HashMap::new();
//! expected.insert("name".into(), Value::from("John Doe"));
//! expected.insert("age".into(), Value::from("22"));
//!
//! assert_eq!(output, expected);
//! # Ok::<(), Error>(())
//! ```
//!
//! Append works only on strings and doesn't support other types. Using append with any non-string
//! type will result in an error.
//!
//! 3) Named keys (&)
//!
//! The named operator will return a key value pair of the field. It takes the key from the
//! previous matched field. Given  the rule, `%{?name}, %{&name}` and input `"John Doe, 22"`,
//! the `"%{?name}"` will match `"John Doe"` but the `?` will prevent this from being stored
//! in the output.
//!
//! The seperator `, ` is skipped and `%{&name}` matches `"22"`. Since the `&` is used, name
//! doesn't become the key but the previous value found for  name `"John Doe"` even so isn't stored
//! in the output, will become the key for `"22"`.
//!
//!
//! ```rust
//! use dissect::{Pattern, Error};
//! use simd_json::borrowed::Value;
//! let output = Pattern::compile("%{?name}, %{&name}")?;
//! let output = output.run( "John Doe, 22").unwrap_or_default();
//! let mut expected = halfbrown::HashMap::new();
//! expected.insert("John Doe".into(), Value::from("22"));
//! assert_eq!(output, expected);
//! # Ok::<(), Error>(())
//! ```
//!
//! 4) Empty fields
//!
//! Fields  will return an empty value if no data is present in the input.
//!
//! ```rust
//! use dissect::{Pattern, Error};
//! use simd_json::borrowed::Value;
//!
//! let output = Pattern::compile("%{name}, %{age}")?;
//! let output = output.run(", 22").unwrap_or_default();
//! let mut expected = halfbrown::HashMap::new();
//! expected.insert("name".into(), Value::from(""));
//! expected.insert("age".into(), Value::from("22"));
//! assert_eq!(output, expected);
//! # Ok::<(), Error>(())
//! ```
//!
//! 5) Skipped fields (?)
//!
//! The operator will prevent the value from being stored in the output, effectively skipping it.
//!
//! ```rust
//! use dissect::{Pattern, Error};
//! use simd_json::borrowed::Value;
//! let output = Pattern::compile("%{?first_name} %{last_name}, %{age}")?;
//! let output = output.run("John Doe, 22").unwrap_or_default();
//! let mut expected = halfbrown::HashMap::new();
//! expected.insert("last_name".into(), Value::from("Doe"));
//! expected.insert("age".into(), Value::from("22"));
//! assert_eq!(output, expected);
//! # Ok::<(), Error>(())
//! ```
//!
//! 6) Types
//!
//! We can convert the fields in the output to a different type by mentioning it in the field
//! definition. The types supported are: int, float, string. The type is specified with the
//! `field : type` syntax.
//!
//! ```rust
//!
//! use dissect::{Pattern, Error};
//! use simd_json::borrowed::Value;
//! let output = Pattern::compile("%{name}, %{age:int}")?;
//! let output = output.run( "John Doe, 22").unwrap_or_default();
//! let mut expected = halfbrown::HashMap::new();
//! expected.insert("name".into(), Value::from("John Doe"));
//! expected.insert("age".into(),Value::from(22));
//! assert_eq!(output, expected);
//! # Ok::<(), Error>(())
//! ```
//!
//! 7) Padding (_)
//!
//! The operator will remove padding when storing the field in the output. You can specify the
//! skipped character as a parameter to `_`. It will use ` ` by default.
//!
//! ```rust
//! use dissect::{Pattern, Error};
//! use simd_json::borrowed::Value;
//! let output = Pattern::compile("%{name}, %{_}%{age}")?;
//! let output = output.run("John Doe,                22").unwrap_or_default();
//! let mut expected = halfbrown::HashMap::new();
//! expected.insert("name".into(), Value::from("John Doe"));
//! expected.insert("age".into(), Value::from("22"));
//! assert_eq!(output, expected);
//! # Ok::<(), Error>(())
//! ```
//!
//! ```rust
//! use dissect::{Pattern, Error};
//! use simd_json::borrowed::Value;
//! let output = Pattern::compile("%{name}, %{_(-)}%{age}")?;
//! let output = output.run("John Doe, -----------------------22").unwrap_or_default();
//! let mut expected = halfbrown::HashMap::new();
//! expected.insert("name".into(), Value::from("John Doe"));
//! expected.insert("age".into(), Value::from("22"));
//! assert_eq!(output, expected);
//! # Ok::<(), Error>(())
//! ```

#![deny(warnings)]
#![recursion_limit = "1024"]
#![deny(
    clippy::all,
    clippy::unwrap_used,
    clippy::unnecessary_unwrap,
    clippy::pedantic
)]
#![allow(clippy::must_use_candidate)]

use halfbrown::HashMap;
use simd_json::value::borrowed::{Object, Value};
use std::fmt;

#[derive(PartialEq, Debug, Clone, Copy)]
enum ExtractType {
    String,
    Int,
    Float,
}

impl std::default::Default for ExtractType {
    fn default() -> Self {
        Self::String
    }
}

#[derive(PartialEq, Debug, Clone)]
enum Command {
    Delimiter(String),
    Pattern {
        ignore: bool,
        lookup: bool,
        add: bool,
        name: String,
        convert: ExtractType,
    },
    Padding(String),
}

#[derive(PartialEq, Debug, Clone, Eq)]
pub enum Error {
    ConnectedExtractors(usize),
    Unterminated(usize),
    PaddingFollowedBySelf(usize),
    InvalidPad(usize),
    InvalidType(usize, String),
    InvalidEscape(char),
    UnterminatedEscape,
}

impl std::error::Error for Error {}

impl fmt::Display for Error {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Self::ConnectedExtractors(p) => write!(
                f,
                "A dilimiter needs to be provided between the two patterns at {}",
                p
            ),
            Self::Unterminated(p) => write!(f, "Unterminated patter at {}", p),
            Self::PaddingFollowedBySelf(p) => write!(
                f,
                "The padding at {} can't be followed up by a dilimiter that begins with it",
                p
            ),
            Self::InvalidPad(p) => write!(f, "Invalid padding at {}", p),
            Self::InvalidType(p, t) => write!(f, "Invalid type '{}' at {}", p, t),
            Self::InvalidEscape(s) => write!(f, "Invalid escape sequence \\'{}' is not valid.", s),
            Self::UnterminatedEscape => write!(
                f,
                "Unterminated escape at the end of line or of a delimiter %{{ can't be escaped"
            ),
        }
    }
}

#[derive(PartialEq, Debug, Clone)]
pub struct Pattern {
    commands: Vec<Command>,
}
fn handle_scapes(s: &str) -> Result<String, Error> {
    let mut res = String::with_capacity(s.len());
    let mut cs = s.chars();
    while let Some(c) = cs.next() {
        match c {
            '\\' => {
                if let Some(c1) = cs.next() {
                    match c1 {
                        '\\' => res.push(c1),
                        'n' => res.push('\n'),
                        't' => res.push('\t'),
                        'r' => res.push('\r'),
                        other => return Err(Error::InvalidEscape(other)),
                    }
                } else {
                    return Err(Error::UnterminatedEscape);
                }
            }
            c => res.push(c),
        }
    }
    Ok(res)
}
impl Pattern {
    /// Compiles a pattern
    /// # Errors
    /// fails if the pattern is invalid
    #[allow(clippy::too_many_lines)]
    pub fn compile(mut pattern: &str) -> Result<Self, Error> {
        fn parse_extractor(mut extractor: &str, idx: usize) -> Result<Command, Error> {
            if extractor.is_empty() {
                return Ok(Command::Pattern {
                    ignore: true,
                    add: false,
                    lookup: false,
                    name: String::new(),
                    convert: ExtractType::String,
                });
            }
            match &extractor[0..1] {
                "?" => Ok(Command::Pattern {
                    ignore: true,
                    add: false,
                    lookup: false,
                    name: extractor[1..].to_owned(),
                    convert: ExtractType::String,
                }),
                "&" => {
                    if let Some(type_pos) = extractor.find(':') {
                        let t = match extractor.get(type_pos + 1..) {
                            Some("int") => ExtractType::Int,
                            Some("float") => ExtractType::Float,
                            Some("string") => ExtractType::String,
                            Some(other) => return Err(Error::InvalidType(idx, other.to_string())),
                            None => return Err(Error::InvalidType(idx, "<EOF>".to_string())),
                        };
                        Ok(Command::Pattern {
                            lookup: true,
                            add: false,
                            ignore: false,
                            name: extractor[1..type_pos].to_owned(),
                            convert: t,
                        })
                    } else {
                        Ok(Command::Pattern {
                            lookup: true,
                            add: false,
                            ignore: false,
                            name: extractor[1..].to_owned(),
                            convert: ExtractType::String,
                        })
                    }
                }
                "+" => Ok(Command::Pattern {
                    add: true,
                    ignore: false,
                    lookup: false,
                    name: extractor[1..].to_owned(),
                    convert: ExtractType::String,
                }),
                "_" => {
                    if extractor.len() == 1 {
                        Ok(Command::Padding(" ".to_owned()))
                    } else {
                        extractor = &extractor[1..];
                        if extractor.starts_with('(') && extractor.ends_with(')') {
                            Ok(Command::Padding(
                                extractor[1..extractor.len() - 1].to_owned(),
                            ))
                        } else {
                            Err(Error::InvalidPad(idx))
                        }
                    }
                }
                _ => {
                    if let Some(type_pos) = extractor.find(':') {
                        let t = match extractor.get(type_pos + 1..) {
                            Some("int") => ExtractType::Int,
                            Some("float") => ExtractType::Float,
                            Some("string") => ExtractType::String,
                            Some(other) => return Err(Error::InvalidType(idx, other.to_string())),
                            None => return Err(Error::InvalidType(idx, "<EOF>".to_string())),
                        };
                        Ok(Command::Pattern {
                            ignore: false,
                            add: false,
                            lookup: false,
                            name: extractor[..type_pos].to_owned(),
                            convert: t,
                        })
                    } else {
                        Ok(Command::Pattern {
                            ignore: false,
                            add: false,
                            lookup: false,
                            name: extractor.to_owned(),
                            convert: ExtractType::String,
                        })
                    }
                }
            }
        }
        let mut commands = Vec::new();
        let mut idx = 0;
        let mut was_extract = false;
        loop {
            if pattern.is_empty() {
                return Ok(Self { commands });
            }
            if pattern.starts_with("%{") {
                if let Some(i) = pattern.find('}') {
                    if let Some(next_open) = pattern[2..].find("%{") {
                        // Have to add 2 because we started searching at pattern + 2
                        if (next_open + 2) < i {
                            return Err(Error::Unterminated(idx));
                        }
                    }
                    let p = parse_extractor(&pattern[2..i], idx)?;
                    // Padding doesn't count as an extractor
                    pattern = &pattern[i + 1..];
                    was_extract = if let Command::Padding(pad) = &p {
                        if pattern.starts_with(pad) {
                            return Err(Error::PaddingFollowedBySelf(idx));
                        };
                        false
                    } else {
                        if was_extract {
                            return Err(Error::ConnectedExtractors(idx));
                        };
                        true
                    };
                    commands.push(p);
                    idx += i + 1;
                } else {
                    return Err(Error::Unterminated(idx));
                }
            } else {
                was_extract = false;
                if let Some(i) = pattern.find("%{") {
                    commands.push(Command::Delimiter(handle_scapes(&pattern[0..i])?));
                    pattern = &pattern[i..];
                    idx += i;
                } else {
                    // No more extractors found
                    commands.push(Command::Delimiter(handle_scapes(pattern)?));
                    return Ok(Self { commands });
                }
            }
        }
    }

    #[allow(clippy::too_many_lines)]
    pub fn run(&self, mut data: &str) -> Option<Object<'static>> {
        #[allow(clippy::too_many_arguments)]
        fn insert(
            r: &mut Object<'static>,
            name: String,
            data: &str,
            add: bool,
            ignored: &mut HashMap<String, String>,
            ignore: bool,
            last_sep: &str,
            convert: ExtractType,
        ) -> Option<()> {
            if ignore {
                ignored.insert(name, data.to_owned());
            } else if add {
                match r.remove(name.as_str()) {
                    None => r.insert(name.into(), Value::from(data.to_owned())),
                    Some(Value::String(s)) => {
                        let mut s = s.to_string();
                        s.push_str(last_sep);
                        s.push_str(data);
                        r.insert(name.into(), Value::from(s))
                    }
                    Some(_) => None,
                };
            } else {
                let v = match convert {
                    ExtractType::String => Value::from(data.to_owned()),
                    ExtractType::Int => Value::from(data.parse::<i64>().ok()?),
                    ExtractType::Float => Value::from(data.parse::<f64>().ok()?),
                };
                r.insert(name.into(), v);
            }
            Some(())
        }

        let mut r = Object::new();
        let mut ignored: HashMap<String, String> = HashMap::new();
        let mut last_sep = String::from(" ");
        let mut t = 0;
        loop {
            match self.commands.get(t) {
                // No more pattern we're good if no data is left otherwise
                // we do not match
                None => {
                    return if data.is_empty() {
                        Some(r)
                    } else {
                        // We still have data left so it's not a string
                        None
                    };
                }
                // We want to skip some text, do so if it's there
                Some(Command::Delimiter(s)) => {
                    if data.starts_with(s) {
                        data = &data[s.len()..];
                    } else {
                        return None;
                    }
                }
                Some(Command::Padding(p)) => {
                    last_sep = p.clone();
                    data = data.trim_start_matches(p);
                }
                // We know a extractor can never be followed by another extractor
                Some(Command::Pattern {
                    ignore,
                    lookup,
                    name,
                    add,
                    convert,
                }) => {
                    let name = if *lookup {
                        if let Some(s) = ignored.remove(name) {
                            if s.is_empty() {
                                return None;
                            }
                            s
                        } else {
                            return None;
                        }
                    } else {
                        name.clone()
                    };
                    match self.commands.get(t + 1) {
                        // This is the last pattern so we eat it all
                        None => {
                            insert(
                                &mut r,
                                name,
                                data,
                                *add,
                                &mut ignored,
                                *ignore,
                                &last_sep,
                                *convert,
                            )?;
                            return Some(r);
                        }
                        Some(Command::Padding(s)) => {
                            if let Some(i) = data.find(s) {
                                insert(
                                    &mut r,
                                    name,
                                    &data[..i],
                                    *add,
                                    &mut ignored,
                                    *ignore,
                                    &last_sep,
                                    *convert,
                                )?;
                                data = &data[i..];
                            } else {
                                // If the padding is the last element we don't need it.
                                match self.commands.get(t + 2) {
                                    None => {
                                        insert(
                                            &mut r,
                                            name,
                                            data,
                                            *add,
                                            &mut ignored,
                                            *ignore,
                                            &last_sep,
                                            *convert,
                                        )?;
                                        data = &data[data.len()..];
                                    }
                                    Some(Command::Delimiter(s)) => {
                                        if let Some(i) = data.find(s) {
                                            insert(
                                                &mut r,
                                                name,
                                                &data[..i],
                                                *add,
                                                &mut ignored,
                                                *ignore,
                                                &last_sep,
                                                *convert,
                                            )?;
                                            data = &data[i..];
                                        } else {
                                            return None;
                                        }
                                    }
                                    Some(_) => {
                                        return None;
                                    }
                                }
                            }
                        }
                        Some(Command::Delimiter(s)) => {
                            if let Some(i) = data.find(s) {
                                insert(
                                    &mut r,
                                    name,
                                    &data[..i],
                                    *add,
                                    &mut ignored,
                                    *ignore,
                                    &last_sep,
                                    *convert,
                                )?;
                                data = &data[i..];
                            } else {
                                return None;
                            }
                        }
                        // We do not allow having two extractors next to each other
                        Some(_) => return None,
                    }
                }
            };
            t += 1;
        }
    }
}

#[cfg(test)]
impl PartialEq<Vec<Command>> for Pattern {
    fn eq(&self, other: &Vec<Command>) -> bool {
        self.commands == *other
    }
}
#[cfg(test)]
mod test {
    use super::*;
    use simd_json::value::borrowed::Value;

    fn cp(pattern: &str) -> Pattern {
        Pattern::compile(pattern).expect("failed to compile pattern")
    }
    fn run(pattern: &str, input: &str) -> Option<Object<'static>> {
        cp(pattern).run(input)
    }
    fn pat(name: &str) -> Command {
        Command::Pattern {
            ignore: false,
            lookup: false,
            add: false,
            name: name.to_string(),
            convert: ExtractType::String,
        }
    }
    fn del(name: &str) -> Command {
        Command::Delimiter(name.to_string())
    }
    fn pad(name: &str) -> Command {
        Command::Padding(name.to_string())
    }

    fn v<'dissect, T: Copy>(s: &'dissect [(&str, T)]) -> Option<Object<'dissect>>
    where
        Value<'dissect>: From<T>,
    {
        use std::borrow::Cow;
        Some(
            s.iter()
                .map(|(x, y)| (Into::<Cow<'dissect, str>>::into(*x), Value::from(*y)))
                .collect(),
        )
    }

    macro_rules! assert_pattern {
        // crate::metrics::INSTANCE is never muated after the initial setting
        // in main::run() so we can use this safely.
        ($pattern:expr, $input:expr) => {
            assert_eq!(run($pattern, $input), None)
        };
        ($pattern:expr, $input:expr, $($args:expr),*) => {
            assert_eq!(run($pattern, $input), v(&[$($args),*]))
        };
    }

    #[test]
    fn empty() {
        assert_eq!(cp(""), vec![]);
    }

    #[test]
    fn parse_stray_closing_backet_is_considered_a_literal() {
        assert_eq!(cp("}"), vec![del("}")]);
    }

    #[test]
    fn parse_literal_percentage() {
        assert_eq!(cp("%{a}%"), vec![pat("a"), del("%")]);
    }

    #[test]
    fn parse_all_edgecases() {
        let testcases = [
            ("%{name}}%{age}", vec![pat("name"), del("}"), pat("age")]),
            ("%{name}%%{age}", vec![pat("name"), del("%"), pat("age")]),
            (".%{name}", vec![del("."), pat("name")]),
            ("foo %{name}", vec![del("foo "), pat("name")]),
            (
                "foo %{name} bar",
                vec![del("foo "), pat("name"), del(" bar")],
            ),
            ("%{name} bar", vec![pat("name"), del(" bar")]),
            ("%{name}bar", vec![pat("name"), del("bar")]),
            ("name%{bar}", vec![del("name"), pat("bar")]),
            (
                "%{name} %{age} %{country}",
                vec![pat("name"), del(" "), pat("age"), del(" "), pat("country")],
            ),
            (
                "%{name} %{age}-%{country}",
                vec![pat("name"), del(" "), pat("age"), del("-"), pat("country")],
            ),
            ("this is %{test}", vec![del("this is "), pat("test")]),
            ("%{test} case", vec![pat("test"), del(" case")]),
            (
                "this is %{test} case",
                vec![del("this is "), pat("test"), del(" case")],
            ),
            (
                "this is %{test} case named %{name}",
                vec![
                    del("this is "),
                    pat("test"),
                    del(" case named "),
                    pat("name"),
                ],
            ),
            (
                "this is %{test} case named %{?name}",
                vec![
                    del("this is "),
                    pat("test"),
                    del(" case named "),
                    Command::Pattern {
                        ignore: true,
                        lookup: false,
                        add: false,
                        name: "name".to_string(),
                        convert: ExtractType::String,
                    },
                ],
            ),
        ];

        for case in &testcases {
            assert_eq!(cp(case.0), case.1);
        }
    }

    #[test]
    fn extract() {
        assert_eq!(cp("%{test}"), vec![pat("test")]);
    }

    #[test]
    fn prefix() {
        assert_eq!(cp("this is %{test}"), vec![del("this is "), pat("test")]);
    }

    #[test]
    fn suffix() {
        assert_eq!(cp("%{test} case"), vec![pat("test"), del(" case"),]);
    }

    #[test]
    fn encircled() {
        assert_eq!(
            cp("this is %{test} case"),
            vec![
                del("this is "),
                Command::Pattern {
                    ignore: false,
                    lookup: false,
                    add: false,
                    convert: ExtractType::String,
                    name: "test".to_owned()
                },
                del(" case"),
            ]
        );
    }

    #[test]
    fn two_patterns() {
        assert_eq!(
            cp("this is %{test} case named %{name}"),
            vec![
                del("this is "),
                pat("test"),
                del(" case named "),
                pat("name"),
            ]
        );
    }

    #[test]
    fn two_ignore() {
        assert_eq!(
            cp("this is %{test} case named %{?name}"),
            vec![
                del("this is "),
                Command::Pattern {
                    ignore: false,
                    lookup: false,
                    add: false,
                    convert: ExtractType::String,
                    name: "test".to_owned()
                },
                del(" case named "),
                Command::Pattern {
                    ignore: true,
                    lookup: false,
                    add: false,
                    convert: ExtractType::String,
                    name: "name".to_owned()
                },
            ]
        );
    }

    #[test]
    fn open_extract() {
        assert_eq!(Pattern::compile("%{test"), Err(Error::Unterminated(0)));
    }

    #[test]
    fn open_middle() {
        assert_eq!(
            Pattern::compile("%{test %{case}"),
            Err(Error::Unterminated(0))
        );
    }

    #[test]
    fn extract_inside() {
        assert_eq!(
            Pattern::compile("this is %{test case"),
            Err(Error::Unterminated(8))
        );
    }

    #[test]
    fn connected_extract() {
        assert_eq!(
            Pattern::compile("%{test}%{pattern}"),
            Err(Error::ConnectedExtractors(7))
        );
    }

    #[test]
    fn dissect_all_edgecases() {
        let testcases = vec![
            (
                "%{name}}%{age}",
                "John}22",
                v(&[("name", "John"), ("age", "22")]),
            ),
            (
                "%{name}%%{age}",
                "John%22",
                v(&[("name", "John"), ("age", "22")]),
            ),
            ("%{name}%%{age}", "John}22", None),
            (".%{name}", ".John", v(&[("name", "John")])),
            (".%{name}", "John", None),
            ("foo %{name}", "foo John", v(&[("name", "John")])),
            ("foo %{name} bar", "foo John bar", v(&[("name", "John")])),
            ("%{name} bar", "John bar", v(&[("name", "John")])),
            ("%{name}bar", "Johnbar", v(&[("name", "John")])),
            ("name%{bar}", "nameJohn", v(&[("bar", "John")])),
            (
                "%{name} %{age} %{country}",
                "John 22 Germany",
                v(&[("name", "John"), ("age", "22"), ("country", "Germany")]),
            ),
            (
                "%{name} %{age}-%{country}",
                "John 22-Germany",
                v(&[("name", "John"), ("age", "22"), ("country", "Germany")]),
            ),
            (
                "this is a %{name} case",
                "this is a John case",
                v(&([("name", "John")])),
            ),
            (
                "this is a %{what} case named %{name}",
                "this is a test case named John",
                v(&([("what", "test"), ("name", "John")])),
            ),
            (
                "this is a %{what}%{_}case named %{name}",
                "this is a test  case named John",
                v(&[("what", "test"), ("name", "John")]),
            ),
            (
                "this is a %{arr} %{+arr}",
                "this is a test case",
                v(&[("arr", "test case")]),
            ),
            // FIXME: Do we want to suppor those?
            // (
            //     "%{name}%{_}%{_(|)}/%{age}",
            //     "John/22",
            //     v(&[("name", "John"), ("age", "22")])),
            // ),
            // (
            //     "%{name}%{_}%{_(|)}/%{age}",
            //     "John|/22",
            //     v(&[("name", "John"), ("age", "22")])),
            // ),
            // (
            //     "%{name}%{_}%{_(|)}/%{age}",
            //     "John /22",
            //     v(&[("name", "John"), ("age", "22")])),
            // ),
            // (
            //     "%{name}%{_}%{_(|)}/ %{age}",
            //     "John|/ 22",
            //     v(&[("name", "John"), ("age", "22")])),
            // ),
            // (
            //     "%{name}%{_}%{_(|)}%{age}",
            //     "John||22",
            //     v(&[("name", "John"), ("age", "22")])),
            // ),
            (
                "%{name}%{_}%{_(|)}%{age}",
                "John 22",
                v(&[("name", "John"), ("age", "22")]),
            ),
            ("%{name} cake", "John cake", v(&[("name", "John")])),
            ("%{name} cake", "John", None),
            ("%{name}%{_}%{_(|)}%{age}", "John22", None),
            (
                "%{a}%{_}%{b}",
                "this    works",
                v(&[("a", "this"), ("b", "works")]),
            ),
            ("%{a}%{_}", "this   ", v(&[("a", "this")])),
            ("%{a}%{_}", "this", v(&[("a", "this")])),
        ];

        for (pattern, input, expected) in testcases {
            assert_eq!(run(dbg!(pattern), dbg!(input)), expected);
        }
    }

    #[test]
    fn dissect_string_with_delimiter_at_the_end_returns_err() {
        let pattern = "%{syslog_timestamp} %{wf_host} %{syslog_program}: %{syslog_message}%{_}";
        let input = "2019-04-26 tremor-host tremor: dissect is working fine";
        assert_pattern!(pattern, input);
    }

    #[test]
    fn dissect_with_optional_padding_in_the_middle() {
        let pattern = "%{name}%{_}|%{age}";
        let input = "John|22";
        assert_pattern!(pattern, input, ("name", "John"), ("age", "22"));
    }

    #[test]
    fn do_extract() {
        let pattern = "this is a %{name} case";
        let input = "this is a test case";
        assert_pattern!(pattern, input, ("name", "test"))
    }

    #[test]
    fn do_extract2() {
        let p = Pattern::compile("this is a %{what} case named %{name}")
            .expect("failed to compile pattern");
        let mut m = Object::new();
        m.insert("what".into(), Value::from("test"));
        m.insert("name".into(), Value::from("cake"));
        assert_eq!(p.run("this is a test case named cake"), Some(m))
    }

    #[test]
    fn do_extract_with_padding() {
        let p = Pattern::compile("this is a %{what}%{_}case named %{name}")
            .expect("failed to compile pattern");
        assert_eq!(
            p,
            vec![
                del("this is a "),
                Command::Pattern {
                    ignore: false,
                    lookup: false,
                    add: false,
                    convert: ExtractType::String,
                    name: "what".to_owned()
                },
                pad(" "),
                del("case named "),
                Command::Pattern {
                    ignore: false,
                    lookup: false,
                    add: false,
                    convert: ExtractType::String,
                    name: "name".to_owned()
                },
            ]
        );
        let mut m = Object::new();
        m.insert("what".into(), Value::from("test"));
        m.insert("name".into(), Value::from("cake"));
        assert_eq!(p.run("this is a test      case named cake"), Some(m))
    }

    #[test]
    fn two_pads() {
        let p = cp("%{_}this%{_}%{_(-)}works");
        assert_eq!(
            p,
            vec![pad(" "), del("this"), pad(" "), pad("-"), del("works"),]
        );
        let m = HashMap::new();
        assert_eq!(p.run("this     -----works"), Some(m))
    }

    #[test]
    fn middle_pads_w_delim() {
        let p = cp("|%{n}%{_}|");
        assert_eq!(
            p,
            vec![
                del("|"),
                Command::Pattern {
                    ignore: false,
                    lookup: false,
                    add: false,
                    convert: ExtractType::String,
                    name: "n".to_owned()
                },
                pad(" "),
                del("|"),
            ]
        );
        let mut m = Object::new();
        m.insert("n".into(), Value::from("Jim"));
        assert_eq!(p.run("|Jim |"), Some(m));
        let mut m = Object::new();
        m.insert("n".into(), Value::from("John"));
        assert_eq!(p.run("|John|"), Some(m));
    }

    #[test]
    fn middle_pads() {
        let p = cp("%{a}%{_}%{b}");
        assert_eq!(p, vec![pat("a"), pad(" "), pat("b"),]);
        let mut m = Object::new();
        m.insert("a".into(), Value::from("this"));
        m.insert("b".into(), Value::from("works"));
        assert_eq!(p.run("this     works"), Some(m))
    }

    #[test]
    fn left_pads() {
        let p = Pattern::compile("%{_}%{b}").expect("failed to compile pattern");
        assert_eq!(p, vec![pad(" "), pat("b"),]);
        let mut m = Object::new();
        m.insert("b".into(), Value::from("works"));
        assert_eq!(p.run("     works"), Some(m))
    }

    #[test]
    fn right_pads() {
        let p = Pattern::compile("%{a}%{_}").expect("failed to compile pattern");
        assert_eq!(p, vec![pat("a"), pad(" ")]);
        let mut m = Object::new();
        m.insert("a".into(), Value::from("this"));
        assert_eq!(p.run("this     "), Some(m))
    }

    #[test]
    fn right_pads_last() {
        let p = Pattern::compile("%{a}%{_}").expect("failed to compile pattern");
        assert_eq!(
            p,
            vec![
                Command::Pattern {
                    ignore: false,
                    lookup: false,
                    add: false,
                    convert: ExtractType::String,
                    name: "a".to_owned()
                },
                pad(" "),
            ]
        );
        let mut m = Object::new();
        m.insert("a".into(), Value::from("this"));
        assert_eq!(p.run("this"), Some(m))
    }

    #[test]
    fn do_extract_with_padding_specific() {
        let p = Pattern::compile("this is a %{what}%{_( case)} named %{name}")
            .expect("failed to compile pattern");
        assert_eq!(
            p,
            vec![
                del("this is a "),
                Command::Pattern {
                    ignore: false,
                    lookup: false,
                    add: false,
                    convert: ExtractType::String,
                    name: "what".to_owned()
                },
                pad(" case"),
                del(" named "),
                Command::Pattern {
                    ignore: false,
                    lookup: false,
                    add: false,
                    convert: ExtractType::String,
                    name: "name".to_owned()
                },
            ]
        );
        let mut m = Object::new();
        m.insert("what".into(), Value::from("test"));
        m.insert("name".into(), Value::from("cake"));
        assert_eq!(p.run("this is a test case named cake"), Some(m))
    }

    #[test]
    fn do_extract_ignore() {
        let p = Pattern::compile("this is a %{?what} case named %{name}")
            .expect("failed to compile pattern");
        let mut m = Object::new();
        m.insert("name".into(), Value::from("cake"));
        assert_eq!(p.run("this is a test case named cake"), Some(m))
    }

    #[test]
    fn do_kv() {
        assert_pattern!(
            "this is a %{?name} case named %{&name}",
            "this is a test case named cake",
            ("test", "cake")
        );
        assert_pattern!(
            "this is a %{?name} %{what} named %{&name}",
            "this is a test case named cake",
            ("what", "case"),
            ("test", "cake")
        );
        assert_pattern!("%{?name}: %{&name:int}", "key: 42", ("key", 42));
    }

    #[test]
    fn do_arr() {
        let p = Pattern::compile("this is a %{+arr} case named %{+arr}")
            .expect("failed to compile pattern");
        let mut m = Object::new();
        m.insert("arr".into(), Value::from("test cake"));
        assert_eq!(p.run("this is a test case named cake"), Some(m))
    }

    #[test]
    fn do_arr_upgrade() {
        let p = Pattern::compile("this is a %{arr} case named %{+arr}")
            .expect("failed to compile pattern");
        let mut m = Object::new();
        m.insert("arr".into(), Value::from("test cake"));
        assert_eq!(p.run("this is a test case named cake"), Some(m))
    }

    #[test]
    fn dissect_all_usecases() {
        let patterns = vec![
                    (
                        "%{syslog_timestamp} %{wf_host} %{syslog_program}: %{syslog_message}%{_}",
                        "12345 host program: message ",
                        v(&([
                            ("syslog_timestamp", "12345"),
                            ("wf_host", "host"),
                            ("syslog_program", "program"),
                            ("syslog_message", "message"),
                        ])),
                    ),
                    (
                        "%{syslog_timestamp} %{wf_host} %{syslog_program}: %{syslog_message}",
                        "12345 host program: message",
                        v(&([
                            ("syslog_timestamp", "12345"),
                            ("wf_host", "host"),
                            ("syslog_program", "program"),
                            ("syslog_message", "message"),
                        ])),
                    ),
                    (
                        "%{}, [%{log_timestamp} #%{pid}] %{log_level} -- %{}: %{message}",
                        "foo, [12345 #12] high -- 1: log failed",
                        v(&([
                            ("log_timestamp", "12345"),
                            ("pid", "12"),
                            ("log_level", "high"),
                            ("message", "log failed"),
                        ])),
                    ),

                    (
                        "%{}>%{+syslog_timestamp} %{+syslog_timestamp} %{+syslog_timestamp} %{syslog_hostname} %{syslog_program}: %{full_message}",
                        "foo>12345 67890 12345 host program: log failed",
                        v(&([
                                 ("syslog_timestamp", "12345 67890 12345"),
                                 ("syslog_hostname", "host"),
                                 ("syslog_program", "program"),
                                 ("full_message", "log failed")
                        ]))
                    ),

                    (

                        "%{syslog_timestamp} %{wf_host} %{}: %{log_timestamp} %{+log_timestamp} %{+log_timestamp} %{+log_timestamp} %{+log_timestamp} %{job_name} %{build_num} %{message} completed: %{completed}\n",
                        "12345 host foo: 12345 67890 12345 67890 12345 some_job 12345 some_message completed: 100\n",
                        v(&([
                                 ("syslog_timestamp", "12345"),
                                  ("wf_host", "host"),
                                  ("log_timestamp", "12345 67890 12345 67890 12345"),
                                  ("job_name", "some_job"),
                                  ("build_num", "12345"),
                                  ("message", "some_message"),
                                  ("completed", "100"),
                        ])),
                    ),

                    (

                        "%{syslog_timestamp} %{wf_host} %{}: %{log_timestamp} %{+log_timestamp} %{+log_timestamp} %{+log_timestamp} %{+log_timestamp} %{job_name} %{build_num} %{message}\n",
                        "12345 host foo: 12345 67890 12345 67890 12345 nice_job 900 here we go again\n",
                        v(&([
                                 ("syslog_timestamp", "12345"),
                                 ("wf_host", "host"),
                                 ("log_timestamp", "12345 67890 12345 67890 12345"),
                                 ("job_name", "nice_job"),
                                 ("build_num", "900"),
                                 ("message", "here we go again")
                        ]))
                    ),

                   (
                        "%{syslog_timestamp} %{wf_host} %{} %{log_timestamp}  %{log_level}    %{main}     %{logger}%{_}%{message}%{_}",
                        "12345 host foo 12345  high    main     dummy_logger some_message  ",
                        v(&[
                                    ("syslog_timestamp", "12345"),
                                    ("wf_host", "host"),
                                    ("log_timestamp", "12345"),
                                    ("log_level", "high"),
                                    ("main", "main"),
                                    ("logger", "dummy_logger"),
                                    ("message", "some_message")
                                ])
                        ),


                        (

                        "%{syslog_timestamp} %{host} %{?kafka_tag} %{log_timestamp}: %{log_level} (%{logger}): %{full_message}",
                        "12345 foo some_tag 12345: high (dummy): welcome",
                        v(&[
                               ("syslog_timestamp", "12345"),
                                ("host", "foo"),
                                ("log_timestamp", "12345"),
                                ("log_level", "high"),
                                ("logger", "dummy"),
                                ("full_message", "welcome")
                        ])
                        ),

                        (
                            "%{syslog_timestamp} %{host} %{} %{message}",
                            "12345 foo bar here we go",
                            v(&[
                                   ("syslog_timestamp", "12345"),
                                   ("host", "foo"),
                                   ("message", "here we go")
                            ])
                            ),

                            (


                        "%{syslog_timestamp} %{host}  %{log_timestamp} %{+log_timestamp} %{message}",
                        "12345 foo  12345 67890 this works well",
                        v(&[
                               ("syslog_timestamp", "12345"),
                               ("host", "foo"),
                               ("log_timestamp", "12345 67890"),
                               ("message", "this works well")
                        ])

                        ),

        (
                        "%{syslog_timestamp} %{host}%{_}[%{log_timestamp}][%{log_level}%{_}][%{logger}%{_}] %{message}",
                        "12345 foo [12345 67890][high ][dummy ] too many brackets here",
                        v(&[
                               ("syslog_timestamp", "12345"),
                               ("host", "foo"),
                               ("log_timestamp", "12345 67890"),
                               ("log_level", "high"),
                               ("logger", "dummy"),
                               ("message", "too many brackets here")
                        ])
                        ),

        (
                        "%{syslog_timestamp} %{host}  %{} %{} %{} %{} %{syslog_program}[%{syslog_pid}]: %{message}",
                        "12345 foo  i dont care about program[12345]: some message here",
                        v(&[
                               ("syslog_timestamp", "12345"),
                               ("host", "foo"),
                               ("syslog_program", "program"),
                               ("syslog_pid", "12345"),
                               ("message", "some message here")
                            ])


                        ),
                        (

                            "%{syslog_timestamp} %{host}%{_}[%{log_timestamp}][%{log_level}%{_}][%{logger}%{_}] %{message}",
                            "12345 foo [12345][high ][dummy ] alexanderplatz",
                            v(&[
                                   ("syslog_timestamp", "12345"),
                                   ("host", "foo"),
                                   ("log_timestamp", "12345"),
                                   ("log_level", "high"),
                                   ("logger", "dummy"),
                                   ("message", "alexanderplatz")
                            ])

        ),

        (
                        "%{} %{} %{} %{source} %{}:%{message}",
                        "foo bar baz light quox:this was fun",
                        v(&[
                               ("source", "light"),
                               ("message", "this was fun")
                        ])
                        ),

                        (

                            "%{syslog_timestamp} %{wf_host}%{_}%{}: %{syslog_message}",
                            "12345 host foo: lorem ipsum",
                            v(&[
                                   ("syslog_timestamp", "12345"),
                                   ("wf_host", "host"),
                                   ("syslog_message", "lorem ipsum")
                            ])

                            ),

                            (

                "%{syslog_timestamp} %{host}%{_}%{}: %{syslog_message}",
                "12345 ghost foo: this is the last one",
                v(&[
                       ("syslog_timestamp", "12345"),
                       ("host", "ghost"),
                       ("syslog_message", "this is the last one")
                ])

                ),

                                (
                                    "this is a %{?what} named %{name}",
                                    "this is a test named cake",
                                    v(&[("name", "cake")])
                                    )

                ];

        for (pattern, input, expected) in patterns {
            assert_eq!(run(pattern, input), expected)
        }
    }

    #[test]
    fn test_patterns() {
        let _patterns_orig = vec![
        //logstash.git.transform.conf.erb
        "%{syslog_timestamp} %{wf_host} %{syslog_program}: %{syslog_message->}",
        "%{syslog_timestamp} %{wf_host} %{syslog_program}: %{syslog_message}",
        "%{}, [%{log_timestamp} #%{pid}]  %{log_level} -- %{}: %{message}",
        "%{}>%{+syslog_timestamp/1} %{+syslog_timestamp/2} %{+syslog_timestamp/3} %{syslog_hostname} %{syslog_program}: %{full_message}",
        // logstash.sox.source.conf.erb
        "%{syslog_timestamp} %{wf_host} %{}: %{log_timestamp} %{+log_timestamp} %{+log_timestamp} %{+log_timestamp} %{+log_timestamp} %{job_name} %{build_num} %{message} completed: %{completed}\n",
        "%{syslog_timestamp} %{wf_host} %{}: %{log_timestamp} %{+log_timestamp} %{+log_timestamp} %{+log_timestamp} %{+log_timestamp} %{job_name} %{build_num} %{message}\n",
        // logstash.presto.source.conf.erb
        "%{syslog_timestamp} %{wf_host} %{} %{log_timestamp}	%{log_level}	%{main}	%{logger->}	%{message->}",
        // logstash.aerospike.source.conf.erb
        "%{syslog_timestamp} %{host} %{?kafka_tag} %{log_timestamp}: %{log_level} (%{logger}): %{full_message}",
        // logstash.siem.sink.conf.erb
        "%{syslog_timestamp} %{host} %{} %{message}",
        // logstash.puppet.transform.conf.erb
        "%{syslog_timestamp} %{host} %{syslog_program}: %{syslog_message->}",
        // logstash.edw.conf.erb
        "%{syslog_timestamp} %{host}  %{log_timestamp} %{+log_timestamp} %{message}",
        // logstash.eslog.conf.erb
        "%{syslog_timestamp} %{host->} [%{log_timestamp}][%{log_level->}][%{logger->}] %{message}",
        "%{syslog_timestamp} %{host}  %{} %{} %{} %{} %{syslog_program}[%{syslog_pid}]: %{message}",
        "%{log_level} %{log_timestamp}: %{logger}: %{message}",
        "%{syslog_timestamp} %{host->} [%{log_timestamp}][%{log_level->}][%{logger->}] %{message}",
        "%{} %{} %{} %{source} %{}:%{message}",
        // logstash.misc.conf.erb
        "%{syslog_timestamp} %{wf_host->} %{}: %{syslog_message}",
        "%{syslog_timestamp} %{host->} %{}: %{syslog_message}",
    ];
        // translation:
        // remove /*
        // replace '->}' with '}%{_}'
        // remove ' ' after %{_}
        let patterns = vec![
        //logstash.git.transform.conf.erb
        "%{syslog_timestamp} %{wf_host} %{syslog_program}: %{syslog_message}%{_}",
        "%{syslog_timestamp} %{wf_host} %{syslog_program}: %{syslog_message}",
        "%{}, [%{log_timestamp} #%{pid}]  %{log_level} -- %{}: %{message}",
        "%{}>%{+syslog_timestamp} %{+syslog_timestamp} %{+syslog_timestamp} %{syslog_hostname} %{syslog_program}: %{full_message}",
        // logstash.sox.source.conf.erb
        "%{syslog_timestamp} %{wf_host} %{}: %{log_timestamp} %{+log_timestamp} %{+log_timestamp} %{+log_timestamp} %{+log_timestamp} %{job_name} %{build_num} %{message} completed: %{completed}\n",
        "%{syslog_timestamp} %{wf_host} %{}: %{log_timestamp} %{+log_timestamp} %{+log_timestamp} %{+log_timestamp} %{+log_timestamp} %{job_name} %{build_num} %{message}\n",
        // logstash.presto.source.conf.erb
        "%{syslog_timestamp} %{wf_host} %{} %{log_timestamp}	%{log_level}	%{main}	%{logger}%{_}%{message}%{_}",
        // logstash.aerospike.source.conf.erb
        "%{syslog_timestamp} %{host} %{?kafka_tag} %{log_timestamp}: %{log_level} (%{logger}): %{full_message}",
        // logstash.siem.sink.conf.erb
        "%{syslog_timestamp} %{host} %{} %{message}",
        // logstash.puppet.transform.conf.erb
        "%{syslog_timestamp} %{host} %{syslog_program}: %{syslog_message}%{_}",
        // logstash.edw.conf.erb
        "%{syslog_timestamp} %{host}  %{log_timestamp} %{+log_timestamp} %{message}",
        // logstash.eslog.conf.erb
        "%{syslog_timestamp} %{host}%{_}[%{log_timestamp}][%{log_level}%{_}][%{logger}%{_}] %{message}",
        "%{syslog_timestamp} %{host}  %{} %{} %{} %{} %{syslog_program}[%{syslog_pid}]: %{message}",
        "%{log_level} %{log_timestamp}: %{logger}: %{message}",
        "%{syslog_timestamp} %{host}%{_}[%{log_timestamp}][%{log_level}%{_}][%{logger}%{_}] %{message}",
        "%{} %{} %{} %{source} %{}:%{message}",
        // logstash.misc.conf.erb
        "%{syslog_timestamp} %{wf_host}%{_}%{}: %{syslog_message}",
        "%{syslog_timestamp} %{host}%{_}%{}: %{syslog_message}",
    ];
        for p in patterns {
            assert!(Pattern::compile(p).is_ok())
        }
    }
}