Skip to main content

promql_parser/parser/
token.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use lazy_static::lazy_static;
16use std::collections::HashMap;
17use std::fmt;
18
19lrlex::lrlex_mod!("token_map");
20pub use token_map::*;
21
22pub type TokenId = u16;
23
24#[derive(Debug, Copy, Clone, PartialEq, Eq)]
25pub struct TokenType(TokenId);
26
27#[cfg(feature = "ser")]
28impl serde::Serialize for TokenType {
29    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
30    where
31        S: serde::Serializer,
32    {
33        serializer.serialize_str(token_display(self.0))
34    }
35}
36
37lazy_static! {
38    static ref KEYWORDS: HashMap<&'static str, TokenId> =
39        [
40            // Operators.
41            ("and", T_LAND),
42            ("or", T_LOR),
43            ("unless", T_LUNLESS),
44            ("atan2", T_ATAN2),
45
46            // Aggregators.
47            ("sum", T_SUM),
48            ("avg", T_AVG),
49            ("count", T_COUNT),
50            ("min", T_MIN),
51            ("max", T_MAX),
52            ("group", T_GROUP),
53            ("stddev", T_STDDEV),
54            ("stdvar", T_STDVAR),
55            ("topk", T_TOPK),
56            ("bottomk", T_BOTTOMK),
57            ("count_values", T_COUNT_VALUES),
58            ("quantile", T_QUANTILE),
59            ("limitk", T_LIMITK),
60            ("limit_ratio", T_LIMIT_RATIO),
61
62            // Keywords.
63            ("offset", T_OFFSET),
64            ("by", T_BY),
65            ("without", T_WITHOUT),
66            ("on", T_ON),
67            ("ignoring", T_IGNORING),
68            ("group_left", T_GROUP_LEFT),
69            ("group_right", T_GROUP_RIGHT),
70            ("bool", T_BOOL),
71            ("smoothed", T_SMOOTHED),
72            ("anchored", T_ANCHORED),
73            ("fill", T_FILL),
74            ("fill_left", T_FILL_LEFT),
75            ("fill_right", T_FILL_RIGHT),
76
77            // Preprocessors.
78            ("start", T_START),
79            ("end", T_END),
80
81            // Special numbers.
82            ("inf", T_NUMBER),
83            ("nan", T_NUMBER),
84        ].into_iter().collect();
85}
86
87/// this is for debug so far, maybe pretty feature in the future.
88#[allow(dead_code)]
89pub(crate) fn token_display(id: TokenId) -> &'static str {
90    match id {
91        // Token.
92        T_EQL => "=",
93        T_BLANK => "_",
94        T_COLON => ":",
95        T_COMMA => ",",
96        T_COMMENT => "#",
97        T_DURATION => "[du]",
98        T_EOF => "<eof>",
99        T_ERROR => "{Err}",
100        T_IDENTIFIER => "{ID}",
101        T_LEFT_BRACE => "{",
102        T_LEFT_BRACKET => "[",
103        T_LEFT_PAREN => "(",
104        T_OPEN_HIST => "{{",
105        T_CLOSE_HIST => "}}",
106        T_METRIC_IDENTIFIER => "{Metric_ID}",
107        T_NUMBER => "{Num}",
108        T_RIGHT_BRACE => "}",
109        T_RIGHT_BRACKET => "]",
110        T_RIGHT_PAREN => ")",
111        T_SEMICOLON => ",",
112        T_SPACE => "<space>",
113        T_STRING => "{Str}",
114        T_TIMES => "x",
115
116        // Operators.
117        T_OPERATORS_START => "operators_start",
118        T_ADD => "+",
119        T_DIV => "/",
120        T_EQLC => "==",
121        T_EQL_REGEX => "=~",
122        T_GTE => ">=",
123        T_GTR => ">",
124        T_LAND => "and",
125        T_LOR => "or",
126        T_LSS => "<",
127        T_LTE => "<=",
128        T_LUNLESS => "unless",
129        T_MOD => "%",
130        T_MUL => "*",
131        T_NEQ => "!=",
132        T_NEQ_REGEX => "!~",
133        T_POW => "^",
134        T_SUB => "-",
135        T_AT => "@",
136        T_ATAN2 => "atan2",
137        T_OPERATORS_END => "operators_end",
138
139        // Aggregators.
140        T_AGGREGATORS_START => "aggregators_start",
141        T_AVG => "avg",
142        T_BOTTOMK => "bottomk",
143        T_COUNT => "count",
144        T_COUNT_VALUES => "count_values",
145        T_GROUP => "group",
146        T_MAX => "max",
147        T_MIN => "min",
148        T_QUANTILE => "quantile",
149        T_STDDEV => "stddev",
150        T_STDVAR => "stdvar",
151        T_SUM => "sum",
152        T_TOPK => "topk",
153        T_LIMITK => "limitk",
154        T_LIMIT_RATIO => "limit_ratio",
155        T_AGGREGATORS_END => "aggregators_end",
156
157        // Keywords.
158        T_KEYWORDS_START => "keywords_start",
159        T_BOOL => "bool",
160        T_BY => "by",
161        T_GROUP_LEFT => "group_left",
162        T_GROUP_RIGHT => "group_right",
163        T_IGNORING => "ignoring",
164        T_OFFSET => "offset",
165        T_SMOOTHED => "smoothed",
166        T_ANCHORED => "anchored",
167        T_ON => "on",
168        T_WITHOUT => "without",
169        T_FILL => "fill",
170        T_FILL_LEFT => "fill_left",
171        T_FILL_RIGHT => "fill_right",
172        T_KEYWORDS_END => "keywords_end",
173
174        // Preprocessors.
175        T_PREPROCESSOR_START => "preprocessor_start",
176        T_START => "start",
177        T_END => "end",
178        T_STEP => "step",
179        T_PREPROCESSOR_END => "preprocessors_end",
180
181        T_STARTSYMBOLS_START
182        | T_START_METRIC
183        | T_START_SERIES_DESCRIPTION
184        | T_START_EXPRESSION
185        | T_START_METRIC_SELECTOR
186        | T_STARTSYMBOLS_END => "not used",
187
188        _ => "unknown token",
189    }
190}
191
192/// This is a list of all keywords in PromQL.
193/// When changing this list, make sure to also change
194/// maybe_label grammar rule in generated parser
195/// to avoid misinterpretation of labels as keywords.
196pub(crate) fn get_keyword_token(s: &str) -> Option<TokenId> {
197    KEYWORDS.get(s).copied()
198}
199
200#[derive(Debug, Clone, PartialEq, Eq)]
201pub struct Token {
202    pub id: TokenType,
203    pub val: String,
204}
205
206impl Token {
207    pub fn new(id: TokenId, val: String) -> Self {
208        Self {
209            id: TokenType(id),
210            val,
211        }
212    }
213
214    pub fn id(&self) -> TokenId {
215        self.id.id()
216    }
217}
218
219impl TokenType {
220    pub fn new(id: TokenId) -> Self {
221        Self(id)
222    }
223
224    pub fn id(&self) -> TokenId {
225        self.0
226    }
227
228    pub fn is_aggregator(&self) -> bool {
229        self.0 > T_AGGREGATORS_START && self.0 < T_AGGREGATORS_END
230    }
231
232    pub fn is_aggregator_with_param(&self) -> bool {
233        matches!(
234            self.0,
235            T_TOPK | T_BOTTOMK | T_COUNT_VALUES | T_QUANTILE | T_LIMITK | T_LIMIT_RATIO
236        )
237    }
238
239    pub fn is_comparison_operator(&self) -> bool {
240        matches!(self.0, T_EQLC | T_NEQ | T_LTE | T_LSS | T_GTE | T_GTR)
241    }
242
243    pub fn is_set_operator(&self) -> bool {
244        matches!(self.0, T_LAND | T_LOR | T_LUNLESS)
245    }
246
247    pub fn is_operator(&self) -> bool {
248        self.0 > T_OPERATORS_START && self.0 < T_OPERATORS_END
249    }
250}
251
252impl fmt::Display for TokenType {
253    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
254        write!(f, "{}", token_display(self.id()))
255    }
256}
257
258#[cfg(test)]
259mod tests {
260    use super::*;
261
262    #[test]
263    fn test_token_display() {
264        assert_eq!(token_display(T_EQL), "=");
265        assert_eq!(token_display(T_BLANK), "_");
266        assert_eq!(token_display(T_COLON), ":");
267        assert_eq!(token_display(T_COMMA), ",");
268        assert_eq!(token_display(T_COMMENT), "#");
269        assert_eq!(token_display(T_DURATION), "[du]");
270        assert_eq!(token_display(T_EOF), "<eof>");
271        assert_eq!(token_display(T_ERROR), "{Err}");
272        assert_eq!(token_display(T_IDENTIFIER), "{ID}");
273        assert_eq!(token_display(T_LEFT_BRACE), "{");
274        assert_eq!(token_display(T_LEFT_BRACKET), "[");
275        assert_eq!(token_display(T_LEFT_PAREN), "(");
276        assert_eq!(token_display(T_OPEN_HIST), "{{");
277        assert_eq!(token_display(T_CLOSE_HIST), "}}");
278        assert_eq!(token_display(T_METRIC_IDENTIFIER), "{Metric_ID}");
279        assert_eq!(token_display(T_NUMBER), "{Num}");
280        assert_eq!(token_display(T_RIGHT_BRACE), "}");
281        assert_eq!(token_display(T_RIGHT_BRACKET), "]");
282        assert_eq!(token_display(T_RIGHT_PAREN), ")");
283        assert_eq!(token_display(T_SEMICOLON), ",");
284        assert_eq!(token_display(T_SPACE), "<space>");
285        assert_eq!(token_display(T_STRING), "{Str}");
286        assert_eq!(token_display(T_TIMES), "x");
287        assert_eq!(token_display(T_OPERATORS_START), "operators_start");
288        assert_eq!(token_display(T_ADD), "+");
289        assert_eq!(token_display(T_DIV), "/");
290        assert_eq!(token_display(T_EQLC), "==");
291        assert_eq!(token_display(T_EQL_REGEX), "=~");
292        assert_eq!(token_display(T_GTE), ">=");
293        assert_eq!(token_display(T_GTR), ">");
294        assert_eq!(token_display(T_LAND), "and");
295        assert_eq!(token_display(T_LOR), "or");
296        assert_eq!(token_display(T_LSS), "<");
297        assert_eq!(token_display(T_LTE), "<=");
298        assert_eq!(token_display(T_LUNLESS), "unless");
299        assert_eq!(token_display(T_MOD), "%");
300        assert_eq!(token_display(T_MUL), "*");
301        assert_eq!(token_display(T_NEQ), "!=");
302        assert_eq!(token_display(T_NEQ_REGEX), "!~");
303        assert_eq!(token_display(T_POW), "^");
304        assert_eq!(token_display(T_SUB), "-");
305        assert_eq!(token_display(T_AT), "@");
306        assert_eq!(token_display(T_ATAN2), "atan2");
307        assert_eq!(token_display(T_OPERATORS_END), "operators_end");
308        assert_eq!(token_display(T_AGGREGATORS_START), "aggregators_start");
309        assert_eq!(token_display(T_AVG), "avg");
310        assert_eq!(token_display(T_BOTTOMK), "bottomk");
311        assert_eq!(token_display(T_COUNT), "count");
312        assert_eq!(token_display(T_COUNT_VALUES), "count_values");
313        assert_eq!(token_display(T_GROUP), "group");
314        assert_eq!(token_display(T_MAX), "max");
315        assert_eq!(token_display(T_MIN), "min");
316        assert_eq!(token_display(T_QUANTILE), "quantile");
317        assert_eq!(token_display(T_STDDEV), "stddev");
318        assert_eq!(token_display(T_STDVAR), "stdvar");
319        assert_eq!(token_display(T_SUM), "sum");
320        assert_eq!(token_display(T_TOPK), "topk");
321        assert_eq!(token_display(T_LIMITK), "limitk");
322        assert_eq!(token_display(T_LIMIT_RATIO), "limit_ratio");
323        assert_eq!(token_display(T_AGGREGATORS_END), "aggregators_end");
324        assert_eq!(token_display(T_KEYWORDS_START), "keywords_start");
325        assert_eq!(token_display(T_BOOL), "bool");
326        assert_eq!(token_display(T_BY), "by");
327        assert_eq!(token_display(T_GROUP_LEFT), "group_left");
328        assert_eq!(token_display(T_GROUP_RIGHT), "group_right");
329        assert_eq!(token_display(T_IGNORING), "ignoring");
330        assert_eq!(token_display(T_OFFSET), "offset");
331        assert_eq!(token_display(T_SMOOTHED), "smoothed");
332        assert_eq!(token_display(T_ANCHORED), "anchored");
333        assert_eq!(token_display(T_FILL), "fill");
334        assert_eq!(token_display(T_FILL_LEFT), "fill_left");
335        assert_eq!(token_display(T_FILL_RIGHT), "fill_right");
336        assert_eq!(token_display(T_ON), "on");
337        assert_eq!(token_display(T_WITHOUT), "without");
338        assert_eq!(token_display(T_KEYWORDS_END), "keywords_end");
339        assert_eq!(token_display(T_PREPROCESSOR_START), "preprocessor_start");
340        assert_eq!(token_display(T_START), "start");
341        assert_eq!(token_display(T_END), "end");
342        assert_eq!(token_display(T_STEP), "step");
343        assert_eq!(token_display(T_PREPROCESSOR_END), "preprocessors_end");
344
345        // if new token added in promql.y, this has to be updated
346        for i in 80..=85 {
347            assert_eq!(token_display(i), "not used");
348        }
349
350        // All tokens are now tested individually above
351
352        for i in 86..=u16::MAX {
353            assert_eq!(token_display(i), "unknown token");
354        }
355    }
356
357    #[test]
358    fn test_get_keyword_tokens() {
359        assert!(matches!(get_keyword_token("and"), Some(T_LAND)));
360        assert!(matches!(get_keyword_token("or"), Some(T_LOR)));
361        assert!(matches!(get_keyword_token("unless"), Some(T_LUNLESS)));
362        assert!(matches!(get_keyword_token("atan2"), Some(T_ATAN2)));
363        assert!(matches!(get_keyword_token("sum"), Some(T_SUM)));
364        assert!(matches!(get_keyword_token("avg"), Some(T_AVG)));
365        assert!(matches!(get_keyword_token("count"), Some(T_COUNT)));
366        assert!(matches!(get_keyword_token("min"), Some(T_MIN)));
367        assert!(matches!(get_keyword_token("max"), Some(T_MAX)));
368        assert!(matches!(get_keyword_token("group"), Some(T_GROUP)));
369        assert!(matches!(get_keyword_token("stddev"), Some(T_STDDEV)));
370        assert!(matches!(get_keyword_token("stdvar"), Some(T_STDVAR)));
371        assert!(matches!(get_keyword_token("topk"), Some(T_TOPK)));
372        assert!(matches!(get_keyword_token("bottomk"), Some(T_BOTTOMK)));
373        assert!(matches!(
374            get_keyword_token("count_values"),
375            Some(T_COUNT_VALUES)
376        ));
377        assert!(matches!(get_keyword_token("quantile"), Some(T_QUANTILE)));
378        assert!(matches!(get_keyword_token("offset"), Some(T_OFFSET)));
379        assert!(matches!(get_keyword_token("by"), Some(T_BY)));
380        assert!(matches!(get_keyword_token("without"), Some(T_WITHOUT)));
381        assert!(matches!(get_keyword_token("on"), Some(T_ON)));
382        assert!(matches!(get_keyword_token("ignoring"), Some(T_IGNORING)));
383        assert!(matches!(
384            get_keyword_token("group_left"),
385            Some(T_GROUP_LEFT)
386        ));
387        assert!(matches!(
388            get_keyword_token("group_right"),
389            Some(T_GROUP_RIGHT)
390        ));
391        assert!(matches!(get_keyword_token("bool"), Some(T_BOOL)));
392        assert!(matches!(get_keyword_token("start"), Some(T_START)));
393        assert!(matches!(get_keyword_token("end"), Some(T_END)));
394        assert!(matches!(get_keyword_token("fill"), Some(T_FILL)));
395        assert!(matches!(get_keyword_token("fill_left"), Some(T_FILL_LEFT)));
396        assert!(matches!(
397            get_keyword_token("fill_right"),
398            Some(T_FILL_RIGHT)
399        ));
400        assert!(matches!(get_keyword_token("inf"), Some(T_NUMBER)));
401        assert!(matches!(get_keyword_token("nan"), Some(T_NUMBER)));
402
403        // not keywords
404        assert!(get_keyword_token("at").is_none());
405        assert!(get_keyword_token("unknown").is_none());
406    }
407
408    #[test]
409    fn test_with_param() {
410        assert!(TokenType(T_TOPK).is_aggregator_with_param());
411        assert!(TokenType(T_BOTTOMK).is_aggregator_with_param());
412        assert!(TokenType(T_COUNT_VALUES).is_aggregator_with_param());
413        assert!(TokenType(T_QUANTILE).is_aggregator_with_param());
414        assert!(TokenType(T_LIMITK).is_aggregator_with_param());
415        assert!(TokenType(T_LIMIT_RATIO).is_aggregator_with_param());
416
417        assert!(!TokenType(T_MAX).is_aggregator_with_param());
418        assert!(!TokenType(T_MIN).is_aggregator_with_param());
419        assert!(!TokenType(T_AVG).is_aggregator_with_param());
420    }
421
422    #[test]
423    fn test_comparison_operator() {
424        assert!(TokenType(T_EQLC).is_comparison_operator());
425        assert!(TokenType(T_NEQ).is_comparison_operator());
426        assert!(TokenType(T_LTE).is_comparison_operator());
427        assert!(TokenType(T_LSS).is_comparison_operator());
428        assert!(TokenType(T_GTE).is_comparison_operator());
429        assert!(TokenType(T_GTR).is_comparison_operator());
430
431        assert!(!TokenType(T_ADD).is_comparison_operator());
432        assert!(!TokenType(T_LAND).is_comparison_operator());
433    }
434
435    #[test]
436    fn test_is_set_operator() {
437        assert!(TokenType(T_LAND).is_set_operator());
438        assert!(TokenType(T_LOR).is_set_operator());
439        assert!(TokenType(T_LUNLESS).is_set_operator());
440
441        assert!(!TokenType(T_ADD).is_set_operator());
442        assert!(!TokenType(T_MAX).is_set_operator());
443        assert!(!TokenType(T_NEQ).is_set_operator());
444    }
445
446    #[test]
447    fn test_is_operator() {
448        assert!(TokenType(T_ADD).is_operator());
449        assert!(TokenType(T_DIV).is_operator());
450        assert!(TokenType(T_EQLC).is_operator());
451        assert!(TokenType(T_EQL_REGEX).is_operator());
452        assert!(TokenType(T_GTE).is_operator());
453        assert!(TokenType(T_GTR).is_operator());
454        assert!(TokenType(T_LAND).is_operator());
455        assert!(TokenType(T_LOR).is_operator());
456        assert!(TokenType(T_LSS).is_operator());
457        assert!(TokenType(T_LTE).is_operator());
458        assert!(TokenType(T_LUNLESS).is_operator());
459        assert!(TokenType(T_MOD).is_operator());
460        assert!(TokenType(T_MUL).is_operator());
461        assert!(TokenType(T_NEQ).is_operator());
462        assert!(TokenType(T_NEQ_REGEX).is_operator());
463        assert!(TokenType(T_POW).is_operator());
464        assert!(TokenType(T_SUB).is_operator());
465        assert!(TokenType(T_AT).is_operator());
466        assert!(TokenType(T_ATAN2).is_operator());
467
468        assert!(!TokenType(T_SUM).is_operator());
469        assert!(!TokenType(T_OPERATORS_START).is_operator());
470        assert!(!TokenType(T_OPERATORS_END).is_operator());
471    }
472
473    #[test]
474    fn test_is_aggregator() {
475        assert!(TokenType(T_AVG).is_aggregator());
476        assert!(TokenType(T_BOTTOMK).is_aggregator());
477        assert!(TokenType(T_COUNT).is_aggregator());
478        assert!(TokenType(T_COUNT_VALUES).is_aggregator());
479        assert!(TokenType(T_GROUP).is_aggregator());
480        assert!(TokenType(T_MAX).is_aggregator());
481        assert!(TokenType(T_MIN).is_aggregator());
482        assert!(TokenType(T_QUANTILE).is_aggregator());
483        assert!(TokenType(T_STDDEV).is_aggregator());
484        assert!(TokenType(T_STDVAR).is_aggregator());
485        assert!(TokenType(T_SUM).is_aggregator());
486        assert!(TokenType(T_TOPK).is_aggregator());
487        assert!(TokenType(T_LIMITK).is_aggregator());
488        assert!(TokenType(T_LIMIT_RATIO).is_aggregator());
489
490        assert!(!TokenType(T_LOR).is_aggregator());
491        assert!(!TokenType(T_ADD).is_aggregator());
492    }
493}