1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
#![allow(clippy::upper_case_acronyms)]
use itertools::Itertools;
use pest::iterators::Pair;
use pest_derive::Parser;
use crate::datadog_search_syntax::BooleanType;
use super::node::{Comparison, ComparisonValue, QueryNode, Range};
#[derive(Debug, Parser)]
#[grammar = "src/datadog/search/grammar.pest"]
pub struct EventPlatformQuery;
pub const DEFAULT_FIELD: &str = "_default_";
const EXISTS_FIELD: &str = "_exists_";
const MISSING_FIELD: &str = "_missing_";
/// The QueryVisitor is responsible for going through the output of our
/// parser and consuming the various tokens produced, digesting them and
/// converting them into QueryNodes. As per the name, we're doing this
/// via a Visitor pattern and walking our way through the syntax tree.
pub struct QueryVisitor;
impl QueryVisitor {
pub fn visit_queryroot(token: Pair<Rule>, default_field: &str) -> QueryNode {
let contents = token.into_inner().next().unwrap();
match contents.as_rule() {
Rule::query => Self::visit_query(contents, default_field),
// A queryroot will only ever contain a query
_ => unreachable!(),
}
}
fn visit_query(token: Pair<Rule>, default_field: &str) -> QueryNode {
let contents = token.into_inner();
let mut is_not: bool = false;
// AND takes precedence over OR.
// We will combine each consecutive clause in an AND group,
// and create a new and_group every time we encounter an OR.
// Finally, we will combine all the and_groups with OR.
let mut and_groups: Vec<QueryNode> = Vec::new();
let mut and_group: Vec<QueryNode> = Vec::new();
for node in contents {
let query_node: Option<QueryNode> = match node.as_rule() {
Rule::multiterm => Some(Self::visit_multiterm(node, default_field)),
Rule::conjunction => {
let inner = node.into_inner().next().unwrap();
match inner.as_rule() {
Rule::AND => (),
Rule::OR => {
// close the current and_group and create a new one
and_groups.push(QueryNode::new_boolean(BooleanType::And, and_group));
and_group = Vec::new();
}
_ => unreachable!(),
};
None
}
Rule::modifiers => {
let inner = node.into_inner().next().unwrap();
match inner.as_rule() {
Rule::PLUS => (),
Rule::NOT => {
is_not = true;
}
_ => unreachable!(),
};
None
}
Rule::clause => Some(Self::visit_clause(node, default_field)),
_ => unreachable!(),
};
// If we found a clause to add to our list, add it
if let Some(mut n) = query_node {
if is_not {
is_not = false;
n = QueryNode::NegatedNode { node: Box::new(n) }
}
and_group.push(n);
}
}
and_groups.push(QueryNode::new_boolean(BooleanType::And, and_group));
let query_node = QueryNode::new_boolean(BooleanType::Or, and_groups);
if let QueryNode::NegatedNode { node } = query_node {
// if the node is a negated MatchAllDocs, return MatchNoDocs
if let QueryNode::MatchAllDocs = *node {
return QueryNode::MatchNoDocs;
}
return QueryNode::NegatedNode { node };
}
query_node
}
fn visit_multiterm(token: Pair<Rule>, default_field: &str) -> QueryNode {
let contents = token.into_inner();
let mut terms: Vec<String> = Vec::new();
for node in contents {
match node.as_rule() {
// Can probably get a bit more suave with string allocation here but meh.
Rule::TERM => terms.push(Self::visit_term(node)),
_ => unreachable!(),
}
}
QueryNode::AttributeTerm {
attr: String::from(default_field),
value: terms.join(" "),
}
}
fn visit_clause(clause: Pair<Rule>, default_field: &str) -> QueryNode {
let mut field: Option<&str> = None;
for item in clause.into_inner() {
// As per the parser, a clause will only ever contain:
// matchall, field, value, query.
match item.as_rule() {
Rule::matchall => return QueryNode::MatchAllDocs,
Rule::field => {
field = Some(Self::visit_field(item));
}
Rule::value => {
// As per the parser, value can only ever be one of:
// STAR, PHRASE, TERM, TERM_PREFIX, TERM_GLOB, range, comparison.
let value_contents = item.into_inner().next().unwrap();
match ((field.unwrap_or(default_field)), value_contents.as_rule()) {
(EXISTS_FIELD, Rule::TERM) => {
return QueryNode::AttributeExists {
attr: Self::visit_term(value_contents),
};
}
(EXISTS_FIELD, Rule::PHRASE) => {
return QueryNode::AttributeExists {
attr: Self::visit_phrase(value_contents),
};
}
(MISSING_FIELD, Rule::TERM) => {
return QueryNode::AttributeMissing {
attr: Self::visit_term(value_contents),
};
}
(MISSING_FIELD, Rule::PHRASE) => {
return QueryNode::AttributeMissing {
attr: Self::visit_phrase(value_contents),
};
}
(DEFAULT_FIELD, Rule::STAR) => return QueryNode::MatchAllDocs,
(f, Rule::STAR) => {
return QueryNode::AttributeWildcard {
attr: unescape(f),
wildcard: String::from("*"),
};
}
(f, Rule::TERM) => {
return QueryNode::AttributeTerm {
attr: unescape(f),
value: Self::visit_term(value_contents),
};
}
(f, Rule::PHRASE) => {
return QueryNode::QuotedAttribute {
attr: unescape(f),
phrase: Self::visit_phrase(value_contents),
};
}
(f, Rule::TERM_PREFIX) => {
return QueryNode::AttributePrefix {
attr: unescape(f),
prefix: Self::visit_prefix(value_contents),
};
}
(f, Rule::TERM_GLOB) => {
return QueryNode::AttributeWildcard {
attr: unescape(f),
wildcard: Self::visit_wildcard(value_contents),
};
}
(f, Rule::range) => {
let range_values = value_contents.into_inner();
// There should always be 4; brackets + 2 range values.
let (lower_inclusive, lower, upper, upper_inclusive) =
match range_values
.map(Self::visit_range_value)
.collect_tuple()
.expect("should be exactly 4 range values")
{
(
Range::Comparison(lc),
Range::Value(lv),
Range::Value(rv),
Range::Comparison(rc),
) => match (lc, rc) {
(Comparison::Gte, Comparison::Lte) => (true, lv, rv, true),
(Comparison::Gt, Comparison::Lt) => (false, lv, rv, false),
_ => panic!("invalid range comparison"),
},
_ => panic!("invalid range value"),
};
return QueryNode::AttributeRange {
attr: unescape(f),
lower,
lower_inclusive,
upper,
upper_inclusive,
};
}
(f, Rule::comparison) => {
let mut compiter = value_contents.into_inner();
let comparator = Self::visit_operator(
compiter.next().unwrap().into_inner().next().unwrap(),
);
let comparison_value = compiter.next().unwrap();
let value = match comparison_value.as_rule() {
Rule::TERM => {
ComparisonValue::String(Self::visit_term(comparison_value))
}
Rule::PHRASE => {
ComparisonValue::String(Self::visit_phrase(comparison_value))
}
Rule::NUMERIC_TERM => comparison_value.as_str().into(),
_ => unreachable!(),
};
return QueryNode::AttributeComparison {
attr: unescape(f),
comparator,
value,
};
}
// We've covered all the cases, so this should never happen
_ => unreachable!(),
}
}
Rule::query => return Self::visit_query(item, field.unwrap_or(default_field)),
// We've covered all the cases, so this should never happen
_ => unreachable!(),
}
}
QueryNode::MatchAllDocs
}
fn visit_operator(token: Pair<Rule>) -> Comparison {
match token.as_rule() {
Rule::GT => Comparison::Gt,
Rule::GT_EQ => Comparison::Gte,
Rule::LT => Comparison::Lt,
Rule::LT_EQ => Comparison::Lte,
Rule::LBRACKET => Comparison::Gt,
Rule::RBRACKET => Comparison::Lt,
_ => unreachable!(),
}
}
fn visit_range_value(token: Pair<Rule>) -> Range {
match token.as_rule() {
Rule::RANGE_VALUE => Range::Value(token.as_str().into()),
Rule::LBRACKET => Range::Comparison(Comparison::Gt),
Rule::LSQRBRACKET => Range::Comparison(Comparison::Gte),
Rule::RBRACKET => Range::Comparison(Comparison::Lt),
Rule::RSQRBRACKET => Range::Comparison(Comparison::Lte),
_ => unreachable!(),
}
}
fn visit_term(token: Pair<Rule>) -> String {
unescape(token.as_str())
}
fn visit_prefix(token: Pair<Rule>) -> String {
let prefix_string = token.as_str();
unescape(&prefix_string[..prefix_string.len() - 1])
}
fn visit_wildcard(token: Pair<Rule>) -> String {
unescape(token.as_str())
}
fn visit_phrase(token: Pair<Rule>) -> String {
let quoted_string = token.as_str();
unescape("ed_string[1..quoted_string.len() - 1])
}
fn visit_field(token: Pair<'_, Rule>) -> &str {
let inner = token.into_inner().next().unwrap();
if let Rule::TERM = inner.as_rule() {
return inner.as_str();
}
"BROKEN"
}
}
/// Remove escaped characters from a string, returning a newly allocated
/// unescaped string. At this point we do NOT distinguish between chars
/// that REQUIRE escaping and those that don't, so we'll preserve anything
/// with a '\' before it
pub fn unescape(input: &str) -> String {
// Unescaping will only ever make a string shorter so let's go...
let mut output = String::with_capacity(input.len());
let mut escape_sequence = false;
for c in input.chars() {
if escape_sequence {
output.push(c);
escape_sequence = false;
} else if c == '\\' {
escape_sequence = true;
} else {
output.push(c)
}
}
// TODO: Check for unterminated escape sequence and signal a problem
output
}