Skip to main content

rusty_promql_parser/lexer/
identifier.rs

1//! Identifier parsing for PromQL
2//!
3//! PromQL has several types of identifiers:
4//! - **Label names**: `[a-zA-Z_][a-zA-Z0-9_]*` - no colons allowed
5//! - **Metric names**: `[a-zA-Z_:][a-zA-Z0-9_:]*` - colons allowed (for recording rules)
6//!
7//! Keywords in PromQL are context-sensitive - they can be used as metric names
8//! or label names when not in a keyword position.
9
10use nom::{
11    IResult, Parser,
12    bytes::complete::{take_while, take_while1},
13    combinator::{recognize, verify},
14    sequence::pair,
15};
16
17/// Result of parsing an identifier - distinguishes between regular identifiers
18/// and metric identifiers (which contain colons)
19#[derive(Debug, Clone, PartialEq, Eq)]
20pub enum Identifier {
21    /// Regular identifier (no colons) - can be used as label name or metric name
22    Plain(String),
23    /// Metric identifier (contains colons) - only valid as metric name
24    Metric(String),
25}
26
27impl Identifier {
28    /// Get the identifier value as a string slice
29    pub fn as_str(&self) -> &str {
30        match self {
31            Identifier::Plain(s) => s,
32            Identifier::Metric(s) => s,
33        }
34    }
35
36    /// Check if this identifier contains a colon (metric identifier)
37    pub fn has_colon(&self) -> bool {
38        matches!(self, Identifier::Metric(_))
39    }
40
41    /// Convert to owned String
42    pub fn into_string(self) -> String {
43        match self {
44            Identifier::Plain(s) => s,
45            Identifier::Metric(s) => s,
46        }
47    }
48}
49
50impl std::fmt::Display for Identifier {
51    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
52        write!(f, "{}", self.as_str())
53    }
54}
55
56/// Check if a character is alphabetic or underscore (can start identifier)
57#[inline]
58fn is_alpha(c: char) -> bool {
59    c == '_' || c.is_ascii_alphabetic()
60}
61
62/// Check if a character is alphanumeric or underscore (can continue identifier)
63#[inline]
64fn is_alpha_numeric(c: char) -> bool {
65    c == '_' || c.is_ascii_alphanumeric()
66}
67
68/// Check if a character can start a metric identifier (alpha, underscore, or colon)
69#[inline]
70fn is_metric_start(c: char) -> bool {
71    c == '_' || c == ':' || c.is_ascii_alphabetic()
72}
73
74/// Check if a character can continue a metric identifier (alphanumeric, underscore, or colon)
75#[inline]
76fn is_metric_char(c: char) -> bool {
77    c == '_' || c == ':' || c.is_ascii_alphanumeric()
78}
79
80/// Parse a label name: `[a-zA-Z_][a-zA-Z0-9_]*`
81///
82/// Label names cannot contain colons. This is used for label names in
83/// label matchers like `{job="prometheus"}`.
84///
85/// # Examples
86///
87/// ```
88/// use rusty_promql_parser::lexer::identifier::label_name;
89///
90/// let (rest, name) = label_name("job").unwrap();
91/// assert_eq!(name, "job");
92/// assert!(rest.is_empty());
93///
94/// let (rest, name) = label_name("__name__").unwrap();
95/// assert_eq!(name, "__name__");
96/// ```
97pub fn label_name(input: &str) -> IResult<&str, &str> {
98    recognize(pair(
99        verify(take_while1(is_alpha), |s: &str| {
100            s.chars().next().is_some_and(is_alpha)
101        }),
102        take_while(is_alpha_numeric),
103    ))
104    .parse(input)
105}
106
107/// Parse a label name that appears in a keyword position.
108///
109/// Grouping clauses and vector matching label lists reserve PromQL keywords,
110/// so names like `on` or `group_left` must be rejected there even though they
111/// remain valid label names in selectors.
112pub(crate) fn clause_label_name(input: &str) -> IResult<&str, &str> {
113    verify(label_name, |name: &&str| lookup_keyword(name).is_none()).parse(input)
114}
115
116/// Parse a metric name: `[a-zA-Z_:][a-zA-Z0-9_:]*`
117///
118/// Metric names can contain colons (for recording rules).
119///
120/// # Examples
121///
122/// ```
123/// use rusty_promql_parser::lexer::identifier::metric_name;
124///
125/// let (rest, name) = metric_name("http_requests_total").unwrap();
126/// assert_eq!(name, "http_requests_total");
127///
128/// let (rest, name) = metric_name("job:request_rate:5m").unwrap();
129/// assert_eq!(name, "job:request_rate:5m");
130///
131/// // Can start with colon
132/// let (rest, name) = metric_name(":request_rate").unwrap();
133/// assert_eq!(name, ":request_rate");
134/// ```
135pub fn metric_name(input: &str) -> IResult<&str, &str> {
136    recognize(pair(
137        verify(take_while1(is_metric_start), |s: &str| {
138            s.chars().next().is_some_and(is_metric_start)
139        }),
140        take_while(is_metric_char),
141    ))
142    .parse(input)
143}
144
145/// Parse an identifier (either label name or metric identifier)
146///
147/// Returns `Identifier::Plain` for identifiers without colons,
148/// and `Identifier::Metric` for identifiers with colons.
149///
150/// # Examples
151///
152/// ```
153/// use rusty_promql_parser::lexer::identifier::{identifier, Identifier};
154///
155/// let (_, id) = identifier("foo").unwrap();
156/// assert_eq!(id, Identifier::Plain("foo".to_string()));
157///
158/// let (_, id) = identifier("foo:bar").unwrap();
159/// assert_eq!(id, Identifier::Metric("foo:bar".to_string()));
160/// ```
161pub fn identifier(input: &str) -> IResult<&str, Identifier> {
162    metric_name
163        .map(|name| {
164            if name.contains(':') {
165                Identifier::Metric(name.to_string())
166            } else {
167                Identifier::Plain(name.to_string())
168            }
169        })
170        .parse(input)
171}
172
173/// PromQL keywords
174///
175/// These keywords have special meaning in certain contexts but can also
176/// be used as identifiers (metric names, label names) in other contexts.
177#[derive(Debug, Clone, Copy, PartialEq, Eq)]
178pub enum Keyword {
179    // Aggregation operators
180    Sum,
181    Avg,
182    Count,
183    Min,
184    Max,
185    Group,
186    Stddev,
187    Stdvar,
188    Topk,
189    Bottomk,
190    CountValues,
191    Quantile,
192    Limitk,
193    LimitRatio,
194
195    // Set operators
196    And,
197    Or,
198    Unless,
199
200    // Binary operator
201    Atan2,
202
203    // Modifiers
204    Offset,
205    By,
206    Without,
207    On,
208    Ignoring,
209    GroupLeft,
210    GroupRight,
211    Bool,
212
213    // @ modifier preprocessors
214    Start,
215    End,
216    Step,
217}
218
219impl Keyword {
220    /// Get the keyword as a string slice (lowercase)
221    pub fn as_str(&self) -> &'static str {
222        match self {
223            Keyword::Sum => "sum",
224            Keyword::Avg => "avg",
225            Keyword::Count => "count",
226            Keyword::Min => "min",
227            Keyword::Max => "max",
228            Keyword::Group => "group",
229            Keyword::Stddev => "stddev",
230            Keyword::Stdvar => "stdvar",
231            Keyword::Topk => "topk",
232            Keyword::Bottomk => "bottomk",
233            Keyword::CountValues => "count_values",
234            Keyword::Quantile => "quantile",
235            Keyword::Limitk => "limitk",
236            Keyword::LimitRatio => "limit_ratio",
237            Keyword::And => "and",
238            Keyword::Or => "or",
239            Keyword::Unless => "unless",
240            Keyword::Atan2 => "atan2",
241            Keyword::Offset => "offset",
242            Keyword::By => "by",
243            Keyword::Without => "without",
244            Keyword::On => "on",
245            Keyword::Ignoring => "ignoring",
246            Keyword::GroupLeft => "group_left",
247            Keyword::GroupRight => "group_right",
248            Keyword::Bool => "bool",
249            Keyword::Start => "start",
250            Keyword::End => "end",
251            Keyword::Step => "step",
252        }
253    }
254
255    /// Check if this keyword is an aggregation operator
256    pub fn is_aggregation(&self) -> bool {
257        matches!(
258            self,
259            Keyword::Sum
260                | Keyword::Avg
261                | Keyword::Count
262                | Keyword::Min
263                | Keyword::Max
264                | Keyword::Group
265                | Keyword::Stddev
266                | Keyword::Stdvar
267                | Keyword::Topk
268                | Keyword::Bottomk
269                | Keyword::CountValues
270                | Keyword::Quantile
271                | Keyword::Limitk
272                | Keyword::LimitRatio
273        )
274    }
275
276    /// Check if this keyword is an aggregation that takes a parameter
277    pub fn is_aggregation_with_param(&self) -> bool {
278        matches!(
279            self,
280            Keyword::Topk
281                | Keyword::Bottomk
282                | Keyword::CountValues
283                | Keyword::Quantile
284                | Keyword::Limitk
285                | Keyword::LimitRatio
286        )
287    }
288
289    /// Check if this keyword is a set operator
290    pub fn is_set_operator(&self) -> bool {
291        matches!(self, Keyword::And | Keyword::Or | Keyword::Unless)
292    }
293}
294
295impl std::fmt::Display for Keyword {
296    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
297        write!(f, "{}", self.as_str())
298    }
299}
300
301/// Try to look up a keyword from a string (case-insensitive)
302fn lookup_keyword(s: &str) -> Option<Keyword> {
303    match s.to_ascii_lowercase().as_str() {
304        // Aggregation operators
305        "sum" => Some(Keyword::Sum),
306        "avg" => Some(Keyword::Avg),
307        "count" => Some(Keyword::Count),
308        "min" => Some(Keyword::Min),
309        "max" => Some(Keyword::Max),
310        "group" => Some(Keyword::Group),
311        "stddev" => Some(Keyword::Stddev),
312        "stdvar" => Some(Keyword::Stdvar),
313        "topk" => Some(Keyword::Topk),
314        "bottomk" => Some(Keyword::Bottomk),
315        "count_values" => Some(Keyword::CountValues),
316        "quantile" => Some(Keyword::Quantile),
317        "limitk" => Some(Keyword::Limitk),
318        "limit_ratio" => Some(Keyword::LimitRatio),
319        // Set operators
320        "and" => Some(Keyword::And),
321        "or" => Some(Keyword::Or),
322        "unless" => Some(Keyword::Unless),
323        // Binary operator
324        "atan2" => Some(Keyword::Atan2),
325        // Modifiers
326        "offset" => Some(Keyword::Offset),
327        "by" => Some(Keyword::By),
328        "without" => Some(Keyword::Without),
329        "on" => Some(Keyword::On),
330        "ignoring" => Some(Keyword::Ignoring),
331        "group_left" => Some(Keyword::GroupLeft),
332        "group_right" => Some(Keyword::GroupRight),
333        "bool" => Some(Keyword::Bool),
334        // @ modifier preprocessors
335        "start" => Some(Keyword::Start),
336        "end" => Some(Keyword::End),
337        "step" => Some(Keyword::Step),
338        _ => None,
339    }
340}
341
342/// Parse a keyword (case-insensitive)
343///
344/// Keywords are recognized only when they are complete words (not followed
345/// by alphanumeric characters).
346///
347/// # Examples
348///
349/// ```
350/// use rusty_promql_parser::lexer::identifier::{keyword, Keyword};
351///
352/// let (_, kw) = keyword("SUM").unwrap();
353/// assert_eq!(kw, Keyword::Sum);
354///
355/// let (_, kw) = keyword("count_values").unwrap();
356/// assert_eq!(kw, Keyword::CountValues);
357/// ```
358pub fn keyword(input: &str) -> IResult<&str, Keyword> {
359    // Parse as identifier first (no colons for keywords)
360    let (rest, word) = recognize(pair(
361        verify(take_while1(is_alpha), |s: &str| {
362            s.chars().next().is_some_and(is_alpha)
363        }),
364        take_while(is_alpha_numeric),
365    ))
366    .parse(input)?;
367
368    // Check if it's a keyword
369    if let Some(kw) = lookup_keyword(word) {
370        Ok((rest, kw))
371    } else {
372        Err(nom::Err::Error(nom::error::Error::new(
373            input,
374            nom::error::ErrorKind::Tag,
375        )))
376    }
377}
378
379/// Parse a keyword or identifier
380///
381/// This is the main entry point for lexing identifiers in PromQL.
382/// It first tries to match a keyword, and if that fails, parses as identifier.
383///
384/// # Examples
385///
386/// ```
387/// use rusty_promql_parser::lexer::identifier::{keyword_or_identifier, KeywordOrIdentifier, Keyword, Identifier};
388///
389/// // Keywords are recognized
390/// let (_, result) = keyword_or_identifier("sum").unwrap();
391/// assert_eq!(result, KeywordOrIdentifier::Keyword(Keyword::Sum));
392///
393/// // Regular identifiers
394/// let (_, result) = keyword_or_identifier("http_requests").unwrap();
395/// assert_eq!(result, KeywordOrIdentifier::Identifier(Identifier::Plain("http_requests".to_string())));
396///
397/// // Metric identifiers (with colon)
398/// let (_, result) = keyword_or_identifier("job:rate:5m").unwrap();
399/// assert_eq!(result, KeywordOrIdentifier::Identifier(Identifier::Metric("job:rate:5m".to_string())));
400/// ```
401#[derive(Debug, Clone, PartialEq, Eq)]
402pub enum KeywordOrIdentifier {
403    Keyword(Keyword),
404    Identifier(Identifier),
405}
406
407pub fn keyword_or_identifier(input: &str) -> IResult<&str, KeywordOrIdentifier> {
408    // First try to parse as keyword
409    if let Ok((rest, kw)) = keyword(input) {
410        return Ok((rest, KeywordOrIdentifier::Keyword(kw)));
411    }
412    // Otherwise parse as identifier
413    let (rest, id) = identifier(input)?;
414    Ok((rest, KeywordOrIdentifier::Identifier(id)))
415}
416
417/// Try to parse a specific aggregation operator (case-insensitive)
418pub fn aggregation_op(input: &str) -> IResult<&str, Keyword> {
419    let (rest, kw) = keyword(input)?;
420    if kw.is_aggregation() {
421        Ok((rest, kw))
422    } else {
423        Err(nom::Err::Error(nom::error::Error::new(
424            input,
425            nom::error::ErrorKind::Tag,
426        )))
427    }
428}
429
430/// Try to parse a set operator (and, or, unless) - case-insensitive
431pub fn set_operator(input: &str) -> IResult<&str, Keyword> {
432    let (rest, kw) = keyword(input)?;
433    if kw.is_set_operator() {
434        Ok((rest, kw))
435    } else {
436        Err(nom::Err::Error(nom::error::Error::new(
437            input,
438            nom::error::ErrorKind::Tag,
439        )))
440    }
441}
442
443#[cfg(test)]
444mod tests {
445    use super::*;
446
447    // Label name tests
448    #[test]
449    fn test_label_name_simple() {
450        let (rest, name) = label_name("foo").unwrap();
451        assert_eq!(name, "foo");
452        assert!(rest.is_empty());
453    }
454
455    #[test]
456    fn test_label_name_with_underscore() {
457        let (rest, name) = label_name("some_label").unwrap();
458        assert_eq!(name, "some_label");
459        assert!(rest.is_empty());
460    }
461
462    #[test]
463    fn test_label_name_starting_with_underscore() {
464        let (rest, name) = label_name("_label").unwrap();
465        assert_eq!(name, "_label");
466        assert!(rest.is_empty());
467    }
468
469    #[test]
470    fn test_label_name_reserved() {
471        let (rest, name) = label_name("__name__").unwrap();
472        assert_eq!(name, "__name__");
473        assert!(rest.is_empty());
474    }
475
476    #[test]
477    fn test_label_name_stops_at_colon() {
478        // Label names don't include colons
479        let (rest, name) = label_name("foo:bar").unwrap();
480        assert_eq!(name, "foo");
481        assert_eq!(rest, ":bar");
482    }
483
484    #[test]
485    fn test_label_name_fails_on_number_start() {
486        assert!(label_name("0foo").is_err());
487    }
488
489    // Metric name tests
490    #[test]
491    fn test_metric_name_simple() {
492        let (rest, name) = metric_name("http_requests").unwrap();
493        assert_eq!(name, "http_requests");
494        assert!(rest.is_empty());
495    }
496
497    #[test]
498    fn test_metric_name_with_colon() {
499        let (rest, name) = metric_name("job:request_rate:5m").unwrap();
500        assert_eq!(name, "job:request_rate:5m");
501        assert!(rest.is_empty());
502    }
503
504    #[test]
505    fn test_metric_name_starting_with_colon() {
506        let (rest, name) = metric_name(":request_rate").unwrap();
507        assert_eq!(name, ":request_rate");
508        assert!(rest.is_empty());
509    }
510
511    #[test]
512    fn test_metric_name_multiple_colons() {
513        let (rest, name) = metric_name("a:b:c:d").unwrap();
514        assert_eq!(name, "a:b:c:d");
515        assert!(rest.is_empty());
516    }
517
518    #[test]
519    fn test_metric_name_fails_on_number_start() {
520        assert!(metric_name("0metric").is_err());
521    }
522
523    // Identifier tests
524    #[test]
525    fn test_identifier_plain() {
526        let (rest, id) = identifier("foo").unwrap();
527        assert_eq!(id, Identifier::Plain("foo".to_string()));
528        assert!(rest.is_empty());
529    }
530
531    #[test]
532    fn test_identifier_metric() {
533        let (rest, id) = identifier("foo:bar").unwrap();
534        assert_eq!(id, Identifier::Metric("foo:bar".to_string()));
535        assert!(rest.is_empty());
536    }
537
538    #[test]
539    fn test_identifier_has_colon() {
540        let plain = Identifier::Plain("foo".to_string());
541        let metric = Identifier::Metric("foo:bar".to_string());
542        assert!(!plain.has_colon());
543        assert!(metric.has_colon());
544    }
545
546    // Keyword tests
547    #[test]
548    fn test_keyword_sum() {
549        let (rest, kw) = keyword("sum").unwrap();
550        assert_eq!(kw, Keyword::Sum);
551        assert!(rest.is_empty());
552    }
553
554    #[test]
555    fn test_keyword_case_insensitive() {
556        let (_, kw1) = keyword("SUM").unwrap();
557        let (_, kw2) = keyword("Sum").unwrap();
558        let (_, kw3) = keyword("sum").unwrap();
559        assert_eq!(kw1, Keyword::Sum);
560        assert_eq!(kw2, Keyword::Sum);
561        assert_eq!(kw3, Keyword::Sum);
562    }
563
564    #[test]
565    fn test_keyword_count_values() {
566        let (rest, kw) = keyword("count_values").unwrap();
567        assert_eq!(kw, Keyword::CountValues);
568        assert!(rest.is_empty());
569    }
570
571    #[test]
572    fn test_keyword_not_partial_match() {
573        // "summary" should not match "sum"
574        assert!(keyword("summary").is_err());
575    }
576
577    #[test]
578    fn test_keyword_with_following_paren() {
579        // "sum(" should match "sum" and leave "("
580        let (rest, kw) = keyword("sum(").unwrap();
581        assert_eq!(kw, Keyword::Sum);
582        assert_eq!(rest, "(");
583    }
584
585    #[test]
586    fn test_all_aggregation_keywords() {
587        let aggregations = [
588            ("sum", Keyword::Sum),
589            ("avg", Keyword::Avg),
590            ("count", Keyword::Count),
591            ("min", Keyword::Min),
592            ("max", Keyword::Max),
593            ("group", Keyword::Group),
594            ("stddev", Keyword::Stddev),
595            ("stdvar", Keyword::Stdvar),
596            ("topk", Keyword::Topk),
597            ("bottomk", Keyword::Bottomk),
598            ("count_values", Keyword::CountValues),
599            ("quantile", Keyword::Quantile),
600            ("limitk", Keyword::Limitk),
601            ("limit_ratio", Keyword::LimitRatio),
602        ];
603        for (input, expected) in aggregations {
604            let (_, kw) = keyword(input).unwrap();
605            assert_eq!(kw, expected);
606            assert!(kw.is_aggregation());
607        }
608    }
609
610    #[test]
611    fn test_set_operators() {
612        let (_, kw) = keyword("and").unwrap();
613        assert_eq!(kw, Keyword::And);
614        assert!(kw.is_set_operator());
615
616        let (_, kw) = keyword("or").unwrap();
617        assert_eq!(kw, Keyword::Or);
618        assert!(kw.is_set_operator());
619
620        let (_, kw) = keyword("unless").unwrap();
621        assert_eq!(kw, Keyword::Unless);
622        assert!(kw.is_set_operator());
623    }
624
625    #[test]
626    fn test_modifier_keywords() {
627        let modifiers = [
628            ("offset", Keyword::Offset),
629            ("by", Keyword::By),
630            ("without", Keyword::Without),
631            ("on", Keyword::On),
632            ("ignoring", Keyword::Ignoring),
633            ("group_left", Keyword::GroupLeft),
634            ("group_right", Keyword::GroupRight),
635            ("bool", Keyword::Bool),
636        ];
637        for (input, expected) in modifiers {
638            let (_, kw) = keyword(input).unwrap();
639            assert_eq!(kw, expected);
640        }
641    }
642
643    // keyword_or_identifier tests
644    #[test]
645    fn test_keyword_or_identifier_keyword() {
646        let (_, result) = keyword_or_identifier("sum").unwrap();
647        assert_eq!(result, KeywordOrIdentifier::Keyword(Keyword::Sum));
648    }
649
650    #[test]
651    fn test_keyword_or_identifier_plain() {
652        let (_, result) = keyword_or_identifier("http_requests").unwrap();
653        assert_eq!(
654            result,
655            KeywordOrIdentifier::Identifier(Identifier::Plain("http_requests".to_string()))
656        );
657    }
658
659    #[test]
660    fn test_keyword_or_identifier_metric() {
661        let (_, result) = keyword_or_identifier("job:rate:5m").unwrap();
662        assert_eq!(
663            result,
664            KeywordOrIdentifier::Identifier(Identifier::Metric("job:rate:5m".to_string()))
665        );
666    }
667
668    // aggregation_op tests
669    #[test]
670    fn test_aggregation_op() {
671        let (_, kw) = aggregation_op("sum").unwrap();
672        assert_eq!(kw, Keyword::Sum);
673    }
674
675    #[test]
676    fn test_aggregation_op_rejects_non_aggregation() {
677        assert!(aggregation_op("offset").is_err());
678    }
679
680    // set_operator tests
681    #[test]
682    fn test_set_operator_fn() {
683        let (_, kw) = set_operator("and").unwrap();
684        assert_eq!(kw, Keyword::And);
685    }
686
687    #[test]
688    fn test_set_operator_rejects_non_set_op() {
689        assert!(set_operator("sum").is_err());
690    }
691
692    // Edge cases
693    #[test]
694    fn test_nan_as_identifier() {
695        // NaN starting an identifier should be parsed as identifier, not literal
696        // But "NaN" alone is a float literal, handled by number parser
697        // "NaN123" should be an identifier
698        let (rest, id) = identifier("NaN123").unwrap();
699        assert_eq!(id, Identifier::Plain("NaN123".to_string()));
700        assert!(rest.is_empty());
701    }
702
703    #[test]
704    fn test_inf_as_identifier() {
705        // "Infoo" should be an identifier
706        let (rest, id) = identifier("Infoo").unwrap();
707        assert_eq!(id, Identifier::Plain("Infoo".to_string()));
708        assert!(rest.is_empty());
709    }
710
711    #[test]
712    fn test_keyword_as_part_of_identifier() {
713        // "summary" contains "sum" but should parse as full identifier
714        let (_, result) = keyword_or_identifier("summary").unwrap();
715        assert_eq!(
716            result,
717            KeywordOrIdentifier::Identifier(Identifier::Plain("summary".to_string()))
718        );
719    }
720
721    #[test]
722    fn test_aggregation_with_param() {
723        assert!(Keyword::Topk.is_aggregation_with_param());
724        assert!(Keyword::Bottomk.is_aggregation_with_param());
725        assert!(Keyword::CountValues.is_aggregation_with_param());
726        assert!(Keyword::Quantile.is_aggregation_with_param());
727        assert!(!Keyword::Sum.is_aggregation_with_param());
728    }
729}