promql_parser/parser/
token.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use lazy_static::lazy_static;
16use std::collections::HashMap;
17use std::fmt;
18
19lrlex::lrlex_mod!("token_map");
20pub use token_map::*;
21
22pub type TokenId = u8;
23
24#[derive(Debug, Copy, Clone, PartialEq, Eq)]
25pub struct TokenType(TokenId);
26
27#[cfg(feature = "ser")]
28impl serde::Serialize for TokenType {
29    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
30    where
31        S: serde::Serializer,
32    {
33        serializer.serialize_str(token_display(self.0))
34    }
35}
36
37lazy_static! {
38    static ref KEYWORDS: HashMap<&'static str, TokenId> =
39        [
40            // Operators.
41            ("and", T_LAND),
42            ("or", T_LOR),
43            ("unless", T_LUNLESS),
44            ("atan2", T_ATAN2),
45
46            // Aggregators.
47            ("sum", T_SUM),
48            ("avg", T_AVG),
49            ("count", T_COUNT),
50            ("min", T_MIN),
51            ("max", T_MAX),
52            ("group", T_GROUP),
53            ("stddev", T_STDDEV),
54            ("stdvar", T_STDVAR),
55            ("topk", T_TOPK),
56            ("bottomk", T_BOTTOMK),
57            ("count_values", T_COUNT_VALUES),
58            ("quantile", T_QUANTILE),
59
60            // Keywords.
61            ("offset", T_OFFSET),
62            ("by", T_BY),
63            ("without", T_WITHOUT),
64            ("on", T_ON),
65            ("ignoring", T_IGNORING),
66            ("group_left", T_GROUP_LEFT),
67            ("group_right", T_GROUP_RIGHT),
68            ("bool", T_BOOL),
69
70            // Preprocessors.
71            ("start", T_START),
72            ("end", T_END),
73
74            // Special numbers.
75            ("inf", T_NUMBER),
76            ("nan", T_NUMBER),
77        ].into_iter().collect();
78}
79
80/// this is for debug so far, maybe pretty feature in the future.
81#[allow(dead_code)]
82pub(crate) fn token_display(id: TokenId) -> &'static str {
83    match id {
84        // Token.
85        T_EQL => "=",
86        T_BLANK => "_",
87        T_COLON => ":",
88        T_COMMA => ",",
89        T_COMMENT => "#",
90        T_DURATION => "[du]",
91        T_EOF => "<eof>",
92        T_ERROR => "{Err}",
93        T_IDENTIFIER => "{ID}",
94        T_LEFT_BRACE => "{",
95        T_LEFT_BRACKET => "[",
96        T_LEFT_PAREN => "(",
97        T_METRIC_IDENTIFIER => "{Metric_ID}",
98        T_NUMBER => "{Num}",
99        T_RIGHT_BRACE => "}",
100        T_RIGHT_BRACKET => "]",
101        T_RIGHT_PAREN => ")",
102        T_SEMICOLON => ",",
103        T_SPACE => "<space>",
104        T_STRING => "{Str}",
105        T_TIMES => "x",
106
107        // Operators.
108        T_OPERATORS_START => "operators_start",
109        T_ADD => "+",
110        T_DIV => "/",
111        T_EQLC => "==",
112        T_EQL_REGEX => "=~",
113        T_GTE => ">=",
114        T_GTR => ">",
115        T_LAND => "and",
116        T_LOR => "or",
117        T_LSS => "<",
118        T_LTE => "<=",
119        T_LUNLESS => "unless",
120        T_MOD => "%",
121        T_MUL => "*",
122        T_NEQ => "!=",
123        T_NEQ_REGEX => "!~",
124        T_POW => "^",
125        T_SUB => "-",
126        T_AT => "@",
127        T_ATAN2 => "atan2",
128        T_OPERATORS_END => "operators_end",
129
130        // Aggregators.
131        T_AGGREGATORS_START => "aggregators_start",
132        T_AVG => "avg",
133        T_BOTTOMK => "bottomk",
134        T_COUNT => "count",
135        T_COUNT_VALUES => "count_values",
136        T_GROUP => "group",
137        T_MAX => "max",
138        T_MIN => "min",
139        T_QUANTILE => "quantile",
140        T_STDDEV => "stddev",
141        T_STDVAR => "stdvar",
142        T_SUM => "sum",
143        T_TOPK => "topk",
144        T_AGGREGATORS_END => "aggregators_end",
145
146        // Keywords.
147        T_KEYWORDS_START => "keywords_start",
148        T_BOOL => "bool",
149        T_BY => "by",
150        T_GROUP_LEFT => "group_left",
151        T_GROUP_RIGHT => "group_right",
152        T_IGNORING => "ignoring",
153        T_OFFSET => "offset",
154        T_ON => "on",
155        T_WITHOUT => "without",
156        T_KEYWORDS_END => "keywords_end",
157
158        // Preprocessors.
159        T_PREPROCESSOR_START => "preprocessor_start",
160        T_START => "start",
161        T_END => "end",
162        T_PREPROCESSOR_END => "preprocessor_end",
163
164        T_STARTSYMBOLS_START
165        | T_START_METRIC
166        | T_START_SERIES_DESCRIPTION
167        | T_START_EXPRESSION
168        | T_START_METRIC_SELECTOR
169        | T_STARTSYMBOLS_END => "not used",
170
171        _ => "unknown token",
172    }
173}
174
175/// This is a list of all keywords in PromQL.
176/// When changing this list, make sure to also change
177/// the maybe_label grammar rule in the generated parser
178/// to avoid misinterpretation of labels as keywords.
179pub(crate) fn get_keyword_token(s: &str) -> Option<TokenId> {
180    KEYWORDS.get(s).copied()
181}
182
183#[derive(Debug, Clone, PartialEq, Eq)]
184pub struct Token {
185    pub id: TokenType,
186    pub val: String,
187}
188
189impl Token {
190    pub fn new(id: TokenId, val: String) -> Self {
191        Self {
192            id: TokenType(id),
193            val,
194        }
195    }
196
197    pub fn id(&self) -> TokenId {
198        self.id.id()
199    }
200}
201
202impl TokenType {
203    pub fn new(id: TokenId) -> Self {
204        Self(id)
205    }
206
207    pub fn id(&self) -> TokenId {
208        self.0
209    }
210
211    pub fn is_aggregator(&self) -> bool {
212        self.0 > T_AGGREGATORS_START && self.0 < T_AGGREGATORS_END
213    }
214
215    pub fn is_aggregator_with_param(&self) -> bool {
216        matches!(self.0, T_TOPK | T_BOTTOMK | T_COUNT_VALUES | T_QUANTILE)
217    }
218
219    pub fn is_comparison_operator(&self) -> bool {
220        matches!(self.0, T_EQLC | T_NEQ | T_LTE | T_LSS | T_GTE | T_GTR)
221    }
222
223    pub fn is_set_operator(&self) -> bool {
224        matches!(self.0, T_LAND | T_LOR | T_LUNLESS)
225    }
226
227    pub fn is_operator(&self) -> bool {
228        self.0 > T_OPERATORS_START && self.0 < T_OPERATORS_END
229    }
230}
231
232impl fmt::Display for TokenType {
233    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
234        write!(f, "{}", token_display(self.id()))
235    }
236}
237
238#[cfg(test)]
239mod tests {
240    use super::*;
241
242    #[test]
243    fn test_token_display() {
244        assert_eq!(token_display(T_EQL), "=");
245        assert_eq!(token_display(T_BLANK), "_");
246        assert_eq!(token_display(T_COLON), ":");
247        assert_eq!(token_display(T_COMMA), ",");
248        assert_eq!(token_display(T_COMMENT), "#");
249        assert_eq!(token_display(T_DURATION), "[du]");
250        assert_eq!(token_display(T_EOF), "<eof>");
251        assert_eq!(token_display(T_ERROR), "{Err}");
252        assert_eq!(token_display(T_IDENTIFIER), "{ID}");
253        assert_eq!(token_display(T_LEFT_BRACE), "{");
254        assert_eq!(token_display(T_LEFT_BRACKET), "[");
255        assert_eq!(token_display(T_LEFT_PAREN), "(");
256        assert_eq!(token_display(T_METRIC_IDENTIFIER), "{Metric_ID}");
257        assert_eq!(token_display(T_NUMBER), "{Num}");
258        assert_eq!(token_display(T_RIGHT_BRACE), "}");
259        assert_eq!(token_display(T_RIGHT_BRACKET), "]");
260        assert_eq!(token_display(T_RIGHT_PAREN), ")");
261        assert_eq!(token_display(T_SEMICOLON), ",");
262        assert_eq!(token_display(T_SPACE), "<space>");
263        assert_eq!(token_display(T_STRING), "{Str}");
264        assert_eq!(token_display(T_TIMES), "x");
265        assert_eq!(token_display(T_OPERATORS_START), "operators_start");
266        assert_eq!(token_display(T_ADD), "+");
267        assert_eq!(token_display(T_DIV), "/");
268        assert_eq!(token_display(T_EQLC), "==");
269        assert_eq!(token_display(T_EQL_REGEX), "=~");
270        assert_eq!(token_display(T_GTE), ">=");
271        assert_eq!(token_display(T_GTR), ">");
272        assert_eq!(token_display(T_LAND), "and");
273        assert_eq!(token_display(T_LOR), "or");
274        assert_eq!(token_display(T_LSS), "<");
275        assert_eq!(token_display(T_LTE), "<=");
276        assert_eq!(token_display(T_LUNLESS), "unless");
277        assert_eq!(token_display(T_MOD), "%");
278        assert_eq!(token_display(T_MUL), "*");
279        assert_eq!(token_display(T_NEQ), "!=");
280        assert_eq!(token_display(T_NEQ_REGEX), "!~");
281        assert_eq!(token_display(T_POW), "^");
282        assert_eq!(token_display(T_SUB), "-");
283        assert_eq!(token_display(T_AT), "@");
284        assert_eq!(token_display(T_ATAN2), "atan2");
285        assert_eq!(token_display(T_OPERATORS_END), "operators_end");
286        assert_eq!(token_display(T_AGGREGATORS_START), "aggregators_start");
287        assert_eq!(token_display(T_AVG), "avg");
288        assert_eq!(token_display(T_BOTTOMK), "bottomk");
289        assert_eq!(token_display(T_COUNT), "count");
290        assert_eq!(token_display(T_COUNT_VALUES), "count_values");
291        assert_eq!(token_display(T_GROUP), "group");
292        assert_eq!(token_display(T_MAX), "max");
293        assert_eq!(token_display(T_MIN), "min");
294        assert_eq!(token_display(T_QUANTILE), "quantile");
295        assert_eq!(token_display(T_STDDEV), "stddev");
296        assert_eq!(token_display(T_STDVAR), "stdvar");
297        assert_eq!(token_display(T_SUM), "sum");
298        assert_eq!(token_display(T_TOPK), "topk");
299        assert_eq!(token_display(T_AGGREGATORS_END), "aggregators_end");
300        assert_eq!(token_display(T_KEYWORDS_START), "keywords_start");
301        assert_eq!(token_display(T_BOOL), "bool");
302        assert_eq!(token_display(T_BY), "by");
303        assert_eq!(token_display(T_GROUP_LEFT), "group_left");
304        assert_eq!(token_display(T_GROUP_RIGHT), "group_right");
305        assert_eq!(token_display(T_IGNORING), "ignoring");
306        assert_eq!(token_display(T_OFFSET), "offset");
307        assert_eq!(token_display(T_ON), "on");
308        assert_eq!(token_display(T_WITHOUT), "without");
309        assert_eq!(token_display(T_KEYWORDS_END), "keywords_end");
310        assert_eq!(token_display(T_PREPROCESSOR_START), "preprocessor_start");
311        assert_eq!(token_display(T_START), "start");
312        assert_eq!(token_display(T_END), "end");
313        assert_eq!(token_display(T_PREPROCESSOR_END), "preprocessor_end");
314
315        // if new token added in promql.y, this has to be updated
316        for i in 70..=75 {
317            assert_eq!(token_display(i), "not used");
318        }
319
320        for i in 76..=255 {
321            assert_eq!(token_display(i), "unknown token");
322        }
323    }
324
325    #[test]
326    fn test_get_keyword_tokens() {
327        assert!(matches!(get_keyword_token("and"), Some(T_LAND)));
328        assert!(matches!(get_keyword_token("or"), Some(T_LOR)));
329        assert!(matches!(get_keyword_token("unless"), Some(T_LUNLESS)));
330        assert!(matches!(get_keyword_token("atan2"), Some(T_ATAN2)));
331        assert!(matches!(get_keyword_token("sum"), Some(T_SUM)));
332        assert!(matches!(get_keyword_token("avg"), Some(T_AVG)));
333        assert!(matches!(get_keyword_token("count"), Some(T_COUNT)));
334        assert!(matches!(get_keyword_token("min"), Some(T_MIN)));
335        assert!(matches!(get_keyword_token("max"), Some(T_MAX)));
336        assert!(matches!(get_keyword_token("group"), Some(T_GROUP)));
337        assert!(matches!(get_keyword_token("stddev"), Some(T_STDDEV)));
338        assert!(matches!(get_keyword_token("stdvar"), Some(T_STDVAR)));
339        assert!(matches!(get_keyword_token("topk"), Some(T_TOPK)));
340        assert!(matches!(get_keyword_token("bottomk"), Some(T_BOTTOMK)));
341        assert!(matches!(
342            get_keyword_token("count_values"),
343            Some(T_COUNT_VALUES)
344        ));
345        assert!(matches!(get_keyword_token("quantile"), Some(T_QUANTILE)));
346        assert!(matches!(get_keyword_token("offset"), Some(T_OFFSET)));
347        assert!(matches!(get_keyword_token("by"), Some(T_BY)));
348        assert!(matches!(get_keyword_token("without"), Some(T_WITHOUT)));
349        assert!(matches!(get_keyword_token("on"), Some(T_ON)));
350        assert!(matches!(get_keyword_token("ignoring"), Some(T_IGNORING)));
351        assert!(matches!(
352            get_keyword_token("group_left"),
353            Some(T_GROUP_LEFT)
354        ));
355        assert!(matches!(
356            get_keyword_token("group_right"),
357            Some(T_GROUP_RIGHT)
358        ));
359        assert!(matches!(get_keyword_token("bool"), Some(T_BOOL)));
360        assert!(matches!(get_keyword_token("start"), Some(T_START)));
361        assert!(matches!(get_keyword_token("end"), Some(T_END)));
362        assert!(matches!(get_keyword_token("inf"), Some(T_NUMBER)));
363        assert!(matches!(get_keyword_token("nan"), Some(T_NUMBER)));
364
365        // not keywords
366        assert!(get_keyword_token("at").is_none());
367        assert!(get_keyword_token("unknown").is_none());
368    }
369
370    #[test]
371    fn test_with_param() {
372        assert!(TokenType(T_TOPK).is_aggregator_with_param());
373        assert!(TokenType(T_BOTTOMK).is_aggregator_with_param());
374        assert!(TokenType(T_COUNT_VALUES).is_aggregator_with_param());
375        assert!(TokenType(T_QUANTILE).is_aggregator_with_param());
376
377        assert!(!TokenType(T_MAX).is_aggregator_with_param());
378        assert!(!TokenType(T_MIN).is_aggregator_with_param());
379        assert!(!TokenType(T_AVG).is_aggregator_with_param());
380    }
381
382    #[test]
383    fn test_comparison_operator() {
384        assert!(TokenType(T_EQLC).is_comparison_operator());
385        assert!(TokenType(T_NEQ).is_comparison_operator());
386        assert!(TokenType(T_LTE).is_comparison_operator());
387        assert!(TokenType(T_LSS).is_comparison_operator());
388        assert!(TokenType(T_GTE).is_comparison_operator());
389        assert!(TokenType(T_GTR).is_comparison_operator());
390
391        assert!(!TokenType(T_ADD).is_comparison_operator());
392        assert!(!TokenType(T_LAND).is_comparison_operator());
393    }
394
395    #[test]
396    fn test_is_set_operator() {
397        assert!(TokenType(T_LAND).is_set_operator());
398        assert!(TokenType(T_LOR).is_set_operator());
399        assert!(TokenType(T_LUNLESS).is_set_operator());
400
401        assert!(!TokenType(T_ADD).is_set_operator());
402        assert!(!TokenType(T_MAX).is_set_operator());
403        assert!(!TokenType(T_NEQ).is_set_operator());
404    }
405
406    #[test]
407    fn test_is_operator() {
408        assert!(TokenType(T_ADD).is_operator());
409        assert!(TokenType(T_DIV).is_operator());
410        assert!(TokenType(T_EQLC).is_operator());
411        assert!(TokenType(T_EQL_REGEX).is_operator());
412        assert!(TokenType(T_GTE).is_operator());
413        assert!(TokenType(T_GTR).is_operator());
414        assert!(TokenType(T_LAND).is_operator());
415        assert!(TokenType(T_LOR).is_operator());
416        assert!(TokenType(T_LSS).is_operator());
417        assert!(TokenType(T_LTE).is_operator());
418        assert!(TokenType(T_LUNLESS).is_operator());
419        assert!(TokenType(T_MOD).is_operator());
420        assert!(TokenType(T_MUL).is_operator());
421        assert!(TokenType(T_NEQ).is_operator());
422        assert!(TokenType(T_NEQ_REGEX).is_operator());
423        assert!(TokenType(T_POW).is_operator());
424        assert!(TokenType(T_SUB).is_operator());
425        assert!(TokenType(T_AT).is_operator());
426        assert!(TokenType(T_ATAN2).is_operator());
427
428        assert!(!TokenType(T_SUM).is_operator());
429        assert!(!TokenType(T_OPERATORS_START).is_operator());
430        assert!(!TokenType(T_OPERATORS_END).is_operator());
431    }
432
433    #[test]
434    fn test_is_aggregator() {
435        assert!(TokenType(T_AVG).is_aggregator());
436        assert!(TokenType(T_BOTTOMK).is_aggregator());
437        assert!(TokenType(T_COUNT).is_aggregator());
438        assert!(TokenType(T_COUNT_VALUES).is_aggregator());
439        assert!(TokenType(T_GROUP).is_aggregator());
440        assert!(TokenType(T_MAX).is_aggregator());
441        assert!(TokenType(T_MIN).is_aggregator());
442        assert!(TokenType(T_QUANTILE).is_aggregator());
443        assert!(TokenType(T_STDDEV).is_aggregator());
444        assert!(TokenType(T_STDVAR).is_aggregator());
445        assert!(TokenType(T_SUM).is_aggregator());
446        assert!(TokenType(T_TOPK).is_aggregator());
447
448        assert!(!TokenType(T_LOR).is_aggregator());
449        assert!(!TokenType(T_ADD).is_aggregator());
450    }
451}