vrl/stdlib/
parse_regex.rs

1use crate::compiler::prelude::*;
2use regex::Regex;
3
4use super::util;
5
6fn parse_regex(value: &Value, numeric_groups: bool, pattern: &Regex) -> Resolved {
7    let value = value.try_bytes_utf8_lossy()?;
8    let parsed = pattern
9        .captures(&value)
10        .map(|capture| util::capture_regex_to_map(pattern, &capture, numeric_groups))
11        .ok_or("could not find any pattern matches")?;
12    Ok(parsed.into())
13}
14
15#[derive(Clone, Copy, Debug)]
16pub struct ParseRegex;
17
18impl Function for ParseRegex {
19    fn identifier(&self) -> &'static str {
20        "parse_regex"
21    }
22
23    fn parameters(&self) -> &'static [Parameter] {
24        &[
25            Parameter {
26                keyword: "value",
27                kind: kind::BYTES,
28                required: true,
29            },
30            Parameter {
31                keyword: "pattern",
32                kind: kind::REGEX,
33                required: true,
34            },
35            Parameter {
36                keyword: "numeric_groups",
37                kind: kind::BOOLEAN,
38                required: false,
39            },
40        ]
41    }
42
43    fn compile(
44        &self,
45        state: &state::TypeState,
46        _ctx: &mut FunctionCompileContext,
47        arguments: ArgumentList,
48    ) -> Compiled {
49        let value = arguments.required("value");
50        let pattern = arguments.required_regex("pattern", state)?;
51        let numeric_groups = arguments
52            .optional("numeric_groups")
53            .unwrap_or_else(|| expr!(false));
54
55        Ok(ParseRegexFn {
56            value,
57            pattern,
58            numeric_groups,
59        }
60        .as_expr())
61    }
62
63    fn examples(&self) -> &'static [Example] {
64        &[
65            Example {
66                title: "simple match",
67                source: r#"parse_regex!("8.7.6.5 - zorp", r'^(?P<host>[\w\.]+) - (?P<user>[\w]+)')"#,
68                result: Ok(indoc! { r#"{
69                "host": "8.7.6.5",
70                "user": "zorp"
71            }"# }),
72            },
73            Example {
74                title: "numeric groups",
75                source: r#"parse_regex!("8.7.6.5 - zorp", r'^(?P<host>[\w\.]+) - (?P<user>[\w]+)', numeric_groups: true)"#,
76                result: Ok(indoc! { r#"{
77                "0": "8.7.6.5 - zorp",
78                "1": "8.7.6.5",
79                "2": "zorp",
80                "host": "8.7.6.5",
81                "user": "zorp"
82            }"# }),
83            },
84            Example {
85                title: "match with variable",
86                source: r#"
87                variable = r'^(?P<host>[\w\.]+) - (?P<user>[\w]+)';
88                parse_regex!("8.7.6.5 - zorp", variable)"#,
89                result: Ok(indoc! { r#"{
90                "host": "8.7.6.5",
91                "user": "zorp"
92            }"# }),
93            },
94        ]
95    }
96}
97
98#[derive(Debug, Clone)]
99pub(crate) struct ParseRegexFn {
100    value: Box<dyn Expression>,
101    pattern: Regex,
102    numeric_groups: Box<dyn Expression>,
103}
104
105impl FunctionExpression for ParseRegexFn {
106    fn resolve(&self, ctx: &mut Context) -> Resolved {
107        let value = self.value.resolve(ctx)?;
108        let numeric_groups = self.numeric_groups.resolve(ctx)?;
109        let pattern = &self.pattern;
110
111        parse_regex(&value, numeric_groups.try_boolean()?, pattern)
112    }
113
114    fn type_def(&self, _: &state::TypeState) -> TypeDef {
115        TypeDef::object(util::regex_kind(&self.pattern)).fallible()
116    }
117}
118
119#[cfg(test)]
120#[allow(clippy::trivial_regex)]
121mod tests {
122    use super::*;
123    use crate::{btreemap, value};
124
125    test_function![
126        find => ParseRegex;
127
128        numeric_groups {
129            args: func_args! [
130                value: "5.86.210.12 - zieme4647 5667 [19/06/2019:17:20:49 -0400] \"GET /embrace/supply-chains/dynamic/vertical\" 201 20574",
131                pattern: Regex::new(r#"^(?P<host>[\w\.]+) - (?P<user>[\w]+) (?P<bytes_in>[\d]+) \[(?P<timestamp>.*)\] "(?P<method>[\w]+) (?P<path>.*)" (?P<status>[\d]+) (?P<bytes_out>[\d]+)$"#)
132                    .unwrap(),
133                numeric_groups: true,
134            ],
135            want: Ok(value!({"bytes_in": "5667",
136                             "host": "5.86.210.12",
137                             "user": "zieme4647",
138                             "timestamp": "19/06/2019:17:20:49 -0400",
139                             "method": "GET",
140                             "path": "/embrace/supply-chains/dynamic/vertical",
141                             "status": "201",
142                             "bytes_out": "20574",
143                             "0": "5.86.210.12 - zieme4647 5667 [19/06/2019:17:20:49 -0400] \"GET /embrace/supply-chains/dynamic/vertical\" 201 20574",
144                             "1": "5.86.210.12",
145                             "2": "zieme4647",
146                             "3": "5667",
147                             "4": "19/06/2019:17:20:49 -0400",
148                             "5": "GET",
149                             "6": "/embrace/supply-chains/dynamic/vertical",
150                             "7": "201",
151                             "8": "20574",
152            })),
153            tdef: TypeDef::object(btreemap! {
154                    Field::from("bytes_in") => Kind::bytes(),
155                    Field::from("host") => Kind::bytes(),
156                    Field::from("user") => Kind::bytes(),
157                    Field::from("timestamp") => Kind::bytes(),
158                    Field::from("method") => Kind::bytes(),
159                    Field::from("path") => Kind::bytes(),
160                    Field::from("status") => Kind::bytes(),
161                    Field::from("bytes_out") => Kind::bytes(),
162                    Field::from("0") => Kind::bytes() | Kind::null(),
163                    Field::from("1") => Kind::bytes() | Kind::null(),
164                    Field::from("2") => Kind::bytes() | Kind::null(),
165                    Field::from("3") => Kind::bytes() | Kind::null(),
166                    Field::from("4") => Kind::bytes() | Kind::null(),
167                    Field::from("5") => Kind::bytes() | Kind::null(),
168                    Field::from("6") => Kind::bytes() | Kind::null(),
169                    Field::from("7") => Kind::bytes() | Kind::null(),
170                    Field::from("8") => Kind::bytes() | Kind::null(),
171                }).fallible(),
172        }
173
174        single_match {
175            args: func_args! [
176                value: "first group and second group",
177                pattern: Regex::new("(?P<number>.*?) group").unwrap()
178            ],
179            want: Ok(value!({"number": "first"})),
180            tdef: TypeDef::object(btreemap! {
181                        Field::from("number") => Kind::bytes(),
182                        Field::from("0") => Kind::bytes() | Kind::null(),
183                        Field::from("1") => Kind::bytes() | Kind::null(),
184                }).fallible(),
185        }
186
187        no_match {
188            args: func_args! [
189                value: "I don't match",
190                pattern: Regex::new(r#"^(?P<host>[\w\.]+) - (?P<user>[\w]+) (?P<bytes_in>[\d]+) \[(?P<timestamp>.*)\] "(?P<method>[\w]+) (?P<path>.*)" (?P<status>[\d]+) (?P<bytes_out>[\d]+)$"#)
191                            .unwrap()
192            ],
193            want: Err("could not find any pattern matches"),
194            tdef: TypeDef::object(btreemap! {
195                    Field::from("host") => Kind::bytes(),
196                    Field::from("user") => Kind::bytes(),
197                    Field::from("bytes_in") => Kind::bytes(),
198                    Field::from("timestamp") => Kind::bytes(),
199                    Field::from("method") => Kind::bytes(),
200                    Field::from("path") => Kind::bytes(),
201                    Field::from("status") => Kind::bytes(),
202                    Field::from("bytes_out") => Kind::bytes(),
203                    Field::from("0") => Kind::bytes() | Kind::null(),
204                    Field::from("1") => Kind::bytes() | Kind::null(),
205                    Field::from("2") => Kind::bytes() | Kind::null(),
206                    Field::from("3") => Kind::bytes() | Kind::null(),
207                    Field::from("4") => Kind::bytes() | Kind::null(),
208                    Field::from("5") => Kind::bytes() | Kind::null(),
209                    Field::from("6") => Kind::bytes() | Kind::null(),
210                    Field::from("7") => Kind::bytes() | Kind::null(),
211                    Field::from("8") => Kind::bytes() | Kind::null(),
212                }).fallible(),
213        }
214    ];
215}