simple_test/parser/
token.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use lazy_static::lazy_static;
16use std::collections::HashMap;
17use std::fmt;
18
19lrlex::lrlex_mod!("token_map");
20pub use token_map::*;
21
22pub type TokenId = u8;
23
24#[derive(Debug, Copy, Clone, PartialEq, Eq)]
25pub struct TokenType(TokenId);
26
27lazy_static! {
28    static ref KEYWORDS: HashMap<&'static str, TokenId> =
29        [
30            // Operators.
31            ("and", T_LAND),
32            ("or", T_LOR),
33            ("unless", T_LUNLESS),
34            ("atan2", T_ATAN2),
35
36            // Aggregators.
37            ("sum", T_SUM),
38            ("avg", T_AVG),
39            ("count", T_COUNT),
40            ("min", T_MIN),
41            ("max", T_MAX),
42            ("group", T_GROUP),
43            ("stddev", T_STDDEV),
44            ("stdvar", T_STDVAR),
45            ("topk", T_TOPK),
46            ("bottomk", T_BOTTOMK),
47            ("count_values", T_COUNT_VALUES),
48            ("quantile", T_QUANTILE),
49
50            // Keywords.
51            ("offset", T_OFFSET),
52            ("by", T_BY),
53            ("without", T_WITHOUT),
54            ("on", T_ON),
55            ("ignoring", T_IGNORING),
56            ("group_left", T_GROUP_LEFT),
57            ("group_right", T_GROUP_RIGHT),
58            ("bool", T_BOOL),
59
60            // Preprocessors.
61            ("start", T_START),
62            ("end", T_END),
63
64            // Special numbers.
65            ("inf", T_NUMBER),
66            ("nan", T_NUMBER),
67        ].into_iter().collect();
68}
69
70/// this is for debug so far, maybe pretty feature in the future.
71#[allow(dead_code)]
72pub(crate) fn token_display(id: TokenId) -> &'static str {
73    match id {
74        // Token.
75        T_EQL => "=",
76        T_BLANK => "_",
77        T_COLON => ":",
78        T_COMMA => ",",
79        T_COMMENT => "#",
80        T_DURATION => "[du]",
81        T_EOF => "<eof>",
82        T_ERROR => "{Err}",
83        T_IDENTIFIER => "{ID}",
84        T_LEFT_BRACE => "{",
85        T_LEFT_BRACKET => "[",
86        T_LEFT_PAREN => "(",
87        T_METRIC_IDENTIFIER => "{Metric_ID}",
88        T_NUMBER => "{Num}",
89        T_RIGHT_BRACE => "}",
90        T_RIGHT_BRACKET => "]",
91        T_RIGHT_PAREN => ")",
92        T_SEMICOLON => ",",
93        T_SPACE => "<space>",
94        T_STRING => "{Str}",
95        T_TIMES => "x",
96
97        // Operators.
98        T_OPERATORS_START => "operators_start",
99        T_ADD => "+",
100        T_DIV => "/",
101        T_EQLC => "==",
102        T_EQL_REGEX => "=~",
103        T_GTE => ">=",
104        T_GTR => ">",
105        T_LAND => "and",
106        T_LOR => "or",
107        T_LSS => "<",
108        T_LTE => "<=",
109        T_LUNLESS => "unless",
110        T_MOD => "%",
111        T_MUL => "*",
112        T_NEQ => "!=",
113        T_NEQ_REGEX => "!~",
114        T_POW => "^",
115        T_SUB => "-",
116        T_AT => "@",
117        T_ATAN2 => "atan2",
118        T_OPERATORS_END => "operators_end",
119
120        // Aggregators.
121        T_AGGREGATORS_START => "aggregators_start",
122        T_AVG => "avg",
123        T_BOTTOMK => "bottomk",
124        T_COUNT => "count",
125        T_COUNT_VALUES => "count_values",
126        T_GROUP => "group",
127        T_MAX => "max",
128        T_MIN => "min",
129        T_QUANTILE => "quantile",
130        T_STDDEV => "stddev",
131        T_STDVAR => "stdvar",
132        T_SUM => "sum",
133        T_TOPK => "topk",
134        T_AGGREGATORS_END => "aggregators_end",
135
136        // Keywords.
137        T_KEYWORDS_START => "keywords_start",
138        T_BOOL => "bool",
139        T_BY => "by",
140        T_GROUP_LEFT => "group_left",
141        T_GROUP_RIGHT => "group_right",
142        T_IGNORING => "ignoring",
143        T_OFFSET => "offset",
144        T_ON => "on",
145        T_WITHOUT => "without",
146        T_KEYWORDS_END => "keywords_end",
147
148        // Preprocessors.
149        T_PREPROCESSOR_START => "preprocessor_start",
150        T_START => "start",
151        T_END => "end",
152        T_PREPROCESSOR_END => "preprocessor_end",
153
154        T_STARTSYMBOLS_START
155        | T_START_METRIC
156        | T_START_SERIES_DESCRIPTION
157        | T_START_EXPRESSION
158        | T_START_METRIC_SELECTOR
159        | T_STARTSYMBOLS_END => "not used",
160
161        _ => "unknown token",
162    }
163}
164
165/// This is a list of all keywords in PromQL.
166/// When changing this list, make sure to also change
167/// the maybe_label grammar rule in the generated parser
168/// to avoid misinterpretation of labels as keywords.
169pub(crate) fn get_keyword_token(s: &str) -> Option<TokenId> {
170    KEYWORDS.get(s).copied()
171}
172
173#[derive(Debug, Clone, PartialEq, Eq)]
174pub struct Token {
175    pub id: TokenType,
176    pub val: String,
177}
178
179impl Token {
180    pub fn new(id: TokenId, val: String) -> Self {
181        Self {
182            id: TokenType(id),
183            val,
184        }
185    }
186
187    pub fn id(&self) -> TokenId {
188        self.id.id()
189    }
190}
191
192impl TokenType {
193    pub fn new(id: TokenId) -> Self {
194        Self(id)
195    }
196
197    pub fn id(&self) -> TokenId {
198        self.0
199    }
200
201    pub fn is_aggregator(&self) -> bool {
202        self.0 > T_AGGREGATORS_START && self.0 < T_AGGREGATORS_END
203    }
204
205    pub fn is_aggregator_with_param(&self) -> bool {
206        matches!(self.0, T_TOPK | T_BOTTOMK | T_COUNT_VALUES | T_QUANTILE)
207    }
208
209    pub fn is_comparison_operator(&self) -> bool {
210        matches!(self.0, T_EQLC | T_NEQ | T_LTE | T_LSS | T_GTE | T_GTR)
211    }
212
213    pub fn is_set_operator(&self) -> bool {
214        matches!(self.0, T_LAND | T_LOR | T_LUNLESS)
215    }
216
217    pub fn is_operator(&self) -> bool {
218        self.0 > T_OPERATORS_START && self.0 < T_OPERATORS_END
219    }
220}
221
222impl fmt::Display for TokenType {
223    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
224        write!(f, "{}", token_display(self.id()))
225    }
226}
227
228#[cfg(test)]
229mod tests {
230    use super::*;
231
232    #[test]
233    fn test_token_display() {
234        assert_eq!(token_display(T_EQL), "=");
235        assert_eq!(token_display(T_BLANK), "_");
236        assert_eq!(token_display(T_COLON), ":");
237        assert_eq!(token_display(T_COMMA), ",");
238        assert_eq!(token_display(T_COMMENT), "#");
239        assert_eq!(token_display(T_DURATION), "[du]");
240        assert_eq!(token_display(T_EOF), "<eof>");
241        assert_eq!(token_display(T_ERROR), "{Err}");
242        assert_eq!(token_display(T_IDENTIFIER), "{ID}");
243        assert_eq!(token_display(T_LEFT_BRACE), "{");
244        assert_eq!(token_display(T_LEFT_BRACKET), "[");
245        assert_eq!(token_display(T_LEFT_PAREN), "(");
246        assert_eq!(token_display(T_METRIC_IDENTIFIER), "{Metric_ID}");
247        assert_eq!(token_display(T_NUMBER), "{Num}");
248        assert_eq!(token_display(T_RIGHT_BRACE), "}");
249        assert_eq!(token_display(T_RIGHT_BRACKET), "]");
250        assert_eq!(token_display(T_RIGHT_PAREN), ")");
251        assert_eq!(token_display(T_SEMICOLON), ",");
252        assert_eq!(token_display(T_SPACE), "<space>");
253        assert_eq!(token_display(T_STRING), "{Str}");
254        assert_eq!(token_display(T_TIMES), "x");
255        assert_eq!(token_display(T_OPERATORS_START), "operators_start");
256        assert_eq!(token_display(T_ADD), "+");
257        assert_eq!(token_display(T_DIV), "/");
258        assert_eq!(token_display(T_EQLC), "==");
259        assert_eq!(token_display(T_EQL_REGEX), "=~");
260        assert_eq!(token_display(T_GTE), ">=");
261        assert_eq!(token_display(T_GTR), ">");
262        assert_eq!(token_display(T_LAND), "and");
263        assert_eq!(token_display(T_LOR), "or");
264        assert_eq!(token_display(T_LSS), "<");
265        assert_eq!(token_display(T_LTE), "<=");
266        assert_eq!(token_display(T_LUNLESS), "unless");
267        assert_eq!(token_display(T_MOD), "%");
268        assert_eq!(token_display(T_MUL), "*");
269        assert_eq!(token_display(T_NEQ), "!=");
270        assert_eq!(token_display(T_NEQ_REGEX), "!~");
271        assert_eq!(token_display(T_POW), "^");
272        assert_eq!(token_display(T_SUB), "-");
273        assert_eq!(token_display(T_AT), "@");
274        assert_eq!(token_display(T_ATAN2), "atan2");
275        assert_eq!(token_display(T_OPERATORS_END), "operators_end");
276        assert_eq!(token_display(T_AGGREGATORS_START), "aggregators_start");
277        assert_eq!(token_display(T_AVG), "avg");
278        assert_eq!(token_display(T_BOTTOMK), "bottomk");
279        assert_eq!(token_display(T_COUNT), "count");
280        assert_eq!(token_display(T_COUNT_VALUES), "count_values");
281        assert_eq!(token_display(T_GROUP), "group");
282        assert_eq!(token_display(T_MAX), "max");
283        assert_eq!(token_display(T_MIN), "min");
284        assert_eq!(token_display(T_QUANTILE), "quantile");
285        assert_eq!(token_display(T_STDDEV), "stddev");
286        assert_eq!(token_display(T_STDVAR), "stdvar");
287        assert_eq!(token_display(T_SUM), "sum");
288        assert_eq!(token_display(T_TOPK), "topk");
289        assert_eq!(token_display(T_AGGREGATORS_END), "aggregators_end");
290        assert_eq!(token_display(T_KEYWORDS_START), "keywords_start");
291        assert_eq!(token_display(T_BOOL), "bool");
292        assert_eq!(token_display(T_BY), "by");
293        assert_eq!(token_display(T_GROUP_LEFT), "group_left");
294        assert_eq!(token_display(T_GROUP_RIGHT), "group_right");
295        assert_eq!(token_display(T_IGNORING), "ignoring");
296        assert_eq!(token_display(T_OFFSET), "offset");
297        assert_eq!(token_display(T_ON), "on");
298        assert_eq!(token_display(T_WITHOUT), "without");
299        assert_eq!(token_display(T_KEYWORDS_END), "keywords_end");
300        assert_eq!(token_display(T_PREPROCESSOR_START), "preprocessor_start");
301        assert_eq!(token_display(T_START), "start");
302        assert_eq!(token_display(T_END), "end");
303        assert_eq!(token_display(T_PREPROCESSOR_END), "preprocessor_end");
304
305        // if new token added in promql.y, this has to be updated
306        for i in 70..=75 {
307            assert_eq!(token_display(i), "not used");
308        }
309
310        for i in 76..=255 {
311            assert_eq!(token_display(i), "unknown token");
312        }
313    }
314
315    #[test]
316    fn test_get_keyword_tokens() {
317        assert!(matches!(get_keyword_token("and"), Some(T_LAND)));
318        assert!(matches!(get_keyword_token("or"), Some(T_LOR)));
319        assert!(matches!(get_keyword_token("unless"), Some(T_LUNLESS)));
320        assert!(matches!(get_keyword_token("atan2"), Some(T_ATAN2)));
321        assert!(matches!(get_keyword_token("sum"), Some(T_SUM)));
322        assert!(matches!(get_keyword_token("avg"), Some(T_AVG)));
323        assert!(matches!(get_keyword_token("count"), Some(T_COUNT)));
324        assert!(matches!(get_keyword_token("min"), Some(T_MIN)));
325        assert!(matches!(get_keyword_token("max"), Some(T_MAX)));
326        assert!(matches!(get_keyword_token("group"), Some(T_GROUP)));
327        assert!(matches!(get_keyword_token("stddev"), Some(T_STDDEV)));
328        assert!(matches!(get_keyword_token("stdvar"), Some(T_STDVAR)));
329        assert!(matches!(get_keyword_token("topk"), Some(T_TOPK)));
330        assert!(matches!(get_keyword_token("bottomk"), Some(T_BOTTOMK)));
331        assert!(matches!(
332            get_keyword_token("count_values"),
333            Some(T_COUNT_VALUES)
334        ));
335        assert!(matches!(get_keyword_token("quantile"), Some(T_QUANTILE)));
336        assert!(matches!(get_keyword_token("offset"), Some(T_OFFSET)));
337        assert!(matches!(get_keyword_token("by"), Some(T_BY)));
338        assert!(matches!(get_keyword_token("without"), Some(T_WITHOUT)));
339        assert!(matches!(get_keyword_token("on"), Some(T_ON)));
340        assert!(matches!(get_keyword_token("ignoring"), Some(T_IGNORING)));
341        assert!(matches!(
342            get_keyword_token("group_left"),
343            Some(T_GROUP_LEFT)
344        ));
345        assert!(matches!(
346            get_keyword_token("group_right"),
347            Some(T_GROUP_RIGHT)
348        ));
349        assert!(matches!(get_keyword_token("bool"), Some(T_BOOL)));
350        assert!(matches!(get_keyword_token("start"), Some(T_START)));
351        assert!(matches!(get_keyword_token("end"), Some(T_END)));
352        assert!(matches!(get_keyword_token("inf"), Some(T_NUMBER)));
353        assert!(matches!(get_keyword_token("nan"), Some(T_NUMBER)));
354
355        // not keywords
356        assert!(get_keyword_token("at").is_none());
357        assert!(get_keyword_token("unknown").is_none());
358    }
359
360    #[test]
361    fn test_with_param() {
362        assert!(TokenType(T_TOPK).is_aggregator_with_param());
363        assert!(TokenType(T_BOTTOMK).is_aggregator_with_param());
364        assert!(TokenType(T_COUNT_VALUES).is_aggregator_with_param());
365        assert!(TokenType(T_QUANTILE).is_aggregator_with_param());
366
367        assert!(!TokenType(T_MAX).is_aggregator_with_param());
368        assert!(!TokenType(T_MIN).is_aggregator_with_param());
369        assert!(!TokenType(T_AVG).is_aggregator_with_param());
370    }
371
372    #[test]
373    fn test_comparison_operator() {
374        assert!(TokenType(T_EQLC).is_comparison_operator());
375        assert!(TokenType(T_NEQ).is_comparison_operator());
376        assert!(TokenType(T_LTE).is_comparison_operator());
377        assert!(TokenType(T_LSS).is_comparison_operator());
378        assert!(TokenType(T_GTE).is_comparison_operator());
379        assert!(TokenType(T_GTR).is_comparison_operator());
380
381        assert!(!TokenType(T_ADD).is_comparison_operator());
382        assert!(!TokenType(T_LAND).is_comparison_operator());
383    }
384
385    #[test]
386    fn test_is_set_operator() {
387        assert!(TokenType(T_LAND).is_set_operator());
388        assert!(TokenType(T_LOR).is_set_operator());
389        assert!(TokenType(T_LUNLESS).is_set_operator());
390
391        assert!(!TokenType(T_ADD).is_set_operator());
392        assert!(!TokenType(T_MAX).is_set_operator());
393        assert!(!TokenType(T_NEQ).is_set_operator());
394    }
395
396    #[test]
397    fn test_is_operator() {
398        assert!(TokenType(T_ADD).is_operator());
399        assert!(TokenType(T_DIV).is_operator());
400        assert!(TokenType(T_EQLC).is_operator());
401        assert!(TokenType(T_EQL_REGEX).is_operator());
402        assert!(TokenType(T_GTE).is_operator());
403        assert!(TokenType(T_GTR).is_operator());
404        assert!(TokenType(T_LAND).is_operator());
405        assert!(TokenType(T_LOR).is_operator());
406        assert!(TokenType(T_LSS).is_operator());
407        assert!(TokenType(T_LTE).is_operator());
408        assert!(TokenType(T_LUNLESS).is_operator());
409        assert!(TokenType(T_MOD).is_operator());
410        assert!(TokenType(T_MUL).is_operator());
411        assert!(TokenType(T_NEQ).is_operator());
412        assert!(TokenType(T_NEQ_REGEX).is_operator());
413        assert!(TokenType(T_POW).is_operator());
414        assert!(TokenType(T_SUB).is_operator());
415        assert!(TokenType(T_AT).is_operator());
416        assert!(TokenType(T_ATAN2).is_operator());
417
418        assert!(!TokenType(T_SUM).is_operator());
419        assert!(!TokenType(T_OPERATORS_START).is_operator());
420        assert!(!TokenType(T_OPERATORS_END).is_operator());
421    }
422
423    #[test]
424    fn test_is_aggregator() {
425        assert!(TokenType(T_AVG).is_aggregator());
426        assert!(TokenType(T_BOTTOMK).is_aggregator());
427        assert!(TokenType(T_COUNT).is_aggregator());
428        assert!(TokenType(T_COUNT_VALUES).is_aggregator());
429        assert!(TokenType(T_GROUP).is_aggregator());
430        assert!(TokenType(T_MAX).is_aggregator());
431        assert!(TokenType(T_MIN).is_aggregator());
432        assert!(TokenType(T_QUANTILE).is_aggregator());
433        assert!(TokenType(T_STDDEV).is_aggregator());
434        assert!(TokenType(T_STDVAR).is_aggregator());
435        assert!(TokenType(T_SUM).is_aggregator());
436        assert!(TokenType(T_TOPK).is_aggregator());
437
438        assert!(!TokenType(T_LOR).is_aggregator());
439        assert!(!TokenType(T_ADD).is_aggregator());
440    }
441}