1use crate::compiler::prelude::*;
2use regex::Regex;
3
4use super::util;
5
6fn parse_regex(value: &Value, numeric_groups: bool, pattern: &Regex) -> Resolved {
7 let value = value.try_bytes_utf8_lossy()?;
8 let parsed = pattern
9 .captures(&value)
10 .map(|capture| util::capture_regex_to_map(pattern, &capture, numeric_groups))
11 .ok_or("could not find any pattern matches")?;
12 Ok(parsed.into())
13}
14
15#[derive(Clone, Copy, Debug)]
16pub struct ParseRegex;
17
18impl Function for ParseRegex {
19 fn identifier(&self) -> &'static str {
20 "parse_regex"
21 }
22
23 fn parameters(&self) -> &'static [Parameter] {
24 &[
25 Parameter {
26 keyword: "value",
27 kind: kind::BYTES,
28 required: true,
29 },
30 Parameter {
31 keyword: "pattern",
32 kind: kind::REGEX,
33 required: true,
34 },
35 Parameter {
36 keyword: "numeric_groups",
37 kind: kind::BOOLEAN,
38 required: false,
39 },
40 ]
41 }
42
43 fn compile(
44 &self,
45 state: &state::TypeState,
46 _ctx: &mut FunctionCompileContext,
47 arguments: ArgumentList,
48 ) -> Compiled {
49 let value = arguments.required("value");
50 let pattern = arguments.required_regex("pattern", state)?;
51 let numeric_groups = arguments
52 .optional("numeric_groups")
53 .unwrap_or_else(|| expr!(false));
54
55 Ok(ParseRegexFn {
56 value,
57 pattern,
58 numeric_groups,
59 }
60 .as_expr())
61 }
62
63 fn examples(&self) -> &'static [Example] {
64 &[
65 Example {
66 title: "simple match",
67 source: r#"parse_regex!("8.7.6.5 - zorp", r'^(?P<host>[\w\.]+) - (?P<user>[\w]+)')"#,
68 result: Ok(indoc! { r#"{
69 "host": "8.7.6.5",
70 "user": "zorp"
71 }"# }),
72 },
73 Example {
74 title: "numeric groups",
75 source: r#"parse_regex!("8.7.6.5 - zorp", r'^(?P<host>[\w\.]+) - (?P<user>[\w]+)', numeric_groups: true)"#,
76 result: Ok(indoc! { r#"{
77 "0": "8.7.6.5 - zorp",
78 "1": "8.7.6.5",
79 "2": "zorp",
80 "host": "8.7.6.5",
81 "user": "zorp"
82 }"# }),
83 },
84 Example {
85 title: "match with variable",
86 source: r#"
87 variable = r'^(?P<host>[\w\.]+) - (?P<user>[\w]+)';
88 parse_regex!("8.7.6.5 - zorp", variable)"#,
89 result: Ok(indoc! { r#"{
90 "host": "8.7.6.5",
91 "user": "zorp"
92 }"# }),
93 },
94 ]
95 }
96}
97
98#[derive(Debug, Clone)]
99pub(crate) struct ParseRegexFn {
100 value: Box<dyn Expression>,
101 pattern: Regex,
102 numeric_groups: Box<dyn Expression>,
103}
104
105impl FunctionExpression for ParseRegexFn {
106 fn resolve(&self, ctx: &mut Context) -> Resolved {
107 let value = self.value.resolve(ctx)?;
108 let numeric_groups = self.numeric_groups.resolve(ctx)?;
109 let pattern = &self.pattern;
110
111 parse_regex(&value, numeric_groups.try_boolean()?, pattern)
112 }
113
114 fn type_def(&self, _: &state::TypeState) -> TypeDef {
115 TypeDef::object(util::regex_kind(&self.pattern)).fallible()
116 }
117}
118
119#[cfg(test)]
120#[allow(clippy::trivial_regex)]
121mod tests {
122 use super::*;
123 use crate::{btreemap, value};
124
125 test_function![
126 find => ParseRegex;
127
128 numeric_groups {
129 args: func_args! [
130 value: "5.86.210.12 - zieme4647 5667 [19/06/2019:17:20:49 -0400] \"GET /embrace/supply-chains/dynamic/vertical\" 201 20574",
131 pattern: Regex::new(r#"^(?P<host>[\w\.]+) - (?P<user>[\w]+) (?P<bytes_in>[\d]+) \[(?P<timestamp>.*)\] "(?P<method>[\w]+) (?P<path>.*)" (?P<status>[\d]+) (?P<bytes_out>[\d]+)$"#)
132 .unwrap(),
133 numeric_groups: true,
134 ],
135 want: Ok(value!({"bytes_in": "5667",
136 "host": "5.86.210.12",
137 "user": "zieme4647",
138 "timestamp": "19/06/2019:17:20:49 -0400",
139 "method": "GET",
140 "path": "/embrace/supply-chains/dynamic/vertical",
141 "status": "201",
142 "bytes_out": "20574",
143 "0": "5.86.210.12 - zieme4647 5667 [19/06/2019:17:20:49 -0400] \"GET /embrace/supply-chains/dynamic/vertical\" 201 20574",
144 "1": "5.86.210.12",
145 "2": "zieme4647",
146 "3": "5667",
147 "4": "19/06/2019:17:20:49 -0400",
148 "5": "GET",
149 "6": "/embrace/supply-chains/dynamic/vertical",
150 "7": "201",
151 "8": "20574",
152 })),
153 tdef: TypeDef::object(btreemap! {
154 Field::from("bytes_in") => Kind::bytes(),
155 Field::from("host") => Kind::bytes(),
156 Field::from("user") => Kind::bytes(),
157 Field::from("timestamp") => Kind::bytes(),
158 Field::from("method") => Kind::bytes(),
159 Field::from("path") => Kind::bytes(),
160 Field::from("status") => Kind::bytes(),
161 Field::from("bytes_out") => Kind::bytes(),
162 Field::from("0") => Kind::bytes() | Kind::null(),
163 Field::from("1") => Kind::bytes() | Kind::null(),
164 Field::from("2") => Kind::bytes() | Kind::null(),
165 Field::from("3") => Kind::bytes() | Kind::null(),
166 Field::from("4") => Kind::bytes() | Kind::null(),
167 Field::from("5") => Kind::bytes() | Kind::null(),
168 Field::from("6") => Kind::bytes() | Kind::null(),
169 Field::from("7") => Kind::bytes() | Kind::null(),
170 Field::from("8") => Kind::bytes() | Kind::null(),
171 }).fallible(),
172 }
173
174 single_match {
175 args: func_args! [
176 value: "first group and second group",
177 pattern: Regex::new("(?P<number>.*?) group").unwrap()
178 ],
179 want: Ok(value!({"number": "first"})),
180 tdef: TypeDef::object(btreemap! {
181 Field::from("number") => Kind::bytes(),
182 Field::from("0") => Kind::bytes() | Kind::null(),
183 Field::from("1") => Kind::bytes() | Kind::null(),
184 }).fallible(),
185 }
186
187 no_match {
188 args: func_args! [
189 value: "I don't match",
190 pattern: Regex::new(r#"^(?P<host>[\w\.]+) - (?P<user>[\w]+) (?P<bytes_in>[\d]+) \[(?P<timestamp>.*)\] "(?P<method>[\w]+) (?P<path>.*)" (?P<status>[\d]+) (?P<bytes_out>[\d]+)$"#)
191 .unwrap()
192 ],
193 want: Err("could not find any pattern matches"),
194 tdef: TypeDef::object(btreemap! {
195 Field::from("host") => Kind::bytes(),
196 Field::from("user") => Kind::bytes(),
197 Field::from("bytes_in") => Kind::bytes(),
198 Field::from("timestamp") => Kind::bytes(),
199 Field::from("method") => Kind::bytes(),
200 Field::from("path") => Kind::bytes(),
201 Field::from("status") => Kind::bytes(),
202 Field::from("bytes_out") => Kind::bytes(),
203 Field::from("0") => Kind::bytes() | Kind::null(),
204 Field::from("1") => Kind::bytes() | Kind::null(),
205 Field::from("2") => Kind::bytes() | Kind::null(),
206 Field::from("3") => Kind::bytes() | Kind::null(),
207 Field::from("4") => Kind::bytes() | Kind::null(),
208 Field::from("5") => Kind::bytes() | Kind::null(),
209 Field::from("6") => Kind::bytes() | Kind::null(),
210 Field::from("7") => Kind::bytes() | Kind::null(),
211 Field::from("8") => Kind::bytes() | Kind::null(),
212 }).fallible(),
213 }
214 ];
215}