rusty_promql_parser/lexer/
identifier.rs

1//! Identifier parsing for PromQL
2//!
3//! PromQL has several types of identifiers:
4//! - **Label names**: `[a-zA-Z_][a-zA-Z0-9_]*` - no colons allowed
5//! - **Metric names**: `[a-zA-Z_:][a-zA-Z0-9_:]*` - colons allowed (for recording rules)
6//!
7//! Keywords in PromQL are context-sensitive - they can be used as metric names
8//! or label names when not in a keyword position.
9
10use nom::{
11    IResult, Parser,
12    bytes::complete::{take_while, take_while1},
13    combinator::{recognize, verify},
14    sequence::pair,
15};
16
17/// Result of parsing an identifier - distinguishes between regular identifiers
18/// and metric identifiers (which contain colons)
19#[derive(Debug, Clone, PartialEq, Eq)]
20pub enum Identifier {
21    /// Regular identifier (no colons) - can be used as label name or metric name
22    Plain(String),
23    /// Metric identifier (contains colons) - only valid as metric name
24    Metric(String),
25}
26
27impl Identifier {
28    /// Get the identifier value as a string slice
29    pub fn as_str(&self) -> &str {
30        match self {
31            Identifier::Plain(s) => s,
32            Identifier::Metric(s) => s,
33        }
34    }
35
36    /// Check if this identifier contains a colon (metric identifier)
37    pub fn has_colon(&self) -> bool {
38        matches!(self, Identifier::Metric(_))
39    }
40
41    /// Convert to owned String
42    pub fn into_string(self) -> String {
43        match self {
44            Identifier::Plain(s) => s,
45            Identifier::Metric(s) => s,
46        }
47    }
48}
49
50impl std::fmt::Display for Identifier {
51    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
52        write!(f, "{}", self.as_str())
53    }
54}
55
56/// Check if a character is alphabetic or underscore (can start identifier)
57#[inline]
58fn is_alpha(c: char) -> bool {
59    c == '_' || c.is_ascii_alphabetic()
60}
61
62/// Check if a character is alphanumeric or underscore (can continue identifier)
63#[inline]
64fn is_alpha_numeric(c: char) -> bool {
65    c == '_' || c.is_ascii_alphanumeric()
66}
67
68/// Check if a character can start a metric identifier (alpha, underscore, or colon)
69#[inline]
70fn is_metric_start(c: char) -> bool {
71    c == '_' || c == ':' || c.is_ascii_alphabetic()
72}
73
74/// Check if a character can continue a metric identifier (alphanumeric, underscore, or colon)
75#[inline]
76fn is_metric_char(c: char) -> bool {
77    c == '_' || c == ':' || c.is_ascii_alphanumeric()
78}
79
80/// Parse a label name: `[a-zA-Z_][a-zA-Z0-9_]*`
81///
82/// Label names cannot contain colons. This is used for label names in
83/// label matchers like `{job="prometheus"}`.
84///
85/// # Examples
86///
87/// ```
88/// use rusty_promql_parser::lexer::identifier::label_name;
89///
90/// let (rest, name) = label_name("job").unwrap();
91/// assert_eq!(name, "job");
92/// assert!(rest.is_empty());
93///
94/// let (rest, name) = label_name("__name__").unwrap();
95/// assert_eq!(name, "__name__");
96/// ```
97pub fn label_name(input: &str) -> IResult<&str, &str> {
98    recognize(pair(
99        verify(take_while1(is_alpha), |s: &str| {
100            s.chars().next().is_some_and(is_alpha)
101        }),
102        take_while(is_alpha_numeric),
103    ))
104    .parse(input)
105}
106
107/// Parse a metric name: `[a-zA-Z_:][a-zA-Z0-9_:]*`
108///
109/// Metric names can contain colons (for recording rules).
110///
111/// # Examples
112///
113/// ```
114/// use rusty_promql_parser::lexer::identifier::metric_name;
115///
116/// let (rest, name) = metric_name("http_requests_total").unwrap();
117/// assert_eq!(name, "http_requests_total");
118///
119/// let (rest, name) = metric_name("job:request_rate:5m").unwrap();
120/// assert_eq!(name, "job:request_rate:5m");
121///
122/// // Can start with colon
123/// let (rest, name) = metric_name(":request_rate").unwrap();
124/// assert_eq!(name, ":request_rate");
125/// ```
126pub fn metric_name(input: &str) -> IResult<&str, &str> {
127    recognize(pair(
128        verify(take_while1(is_metric_start), |s: &str| {
129            s.chars().next().is_some_and(is_metric_start)
130        }),
131        take_while(is_metric_char),
132    ))
133    .parse(input)
134}
135
136/// Parse an identifier (either label name or metric identifier)
137///
138/// Returns `Identifier::Plain` for identifiers without colons,
139/// and `Identifier::Metric` for identifiers with colons.
140///
141/// # Examples
142///
143/// ```
144/// use rusty_promql_parser::lexer::identifier::{identifier, Identifier};
145///
146/// let (_, id) = identifier("foo").unwrap();
147/// assert_eq!(id, Identifier::Plain("foo".to_string()));
148///
149/// let (_, id) = identifier("foo:bar").unwrap();
150/// assert_eq!(id, Identifier::Metric("foo:bar".to_string()));
151/// ```
152pub fn identifier(input: &str) -> IResult<&str, Identifier> {
153    metric_name
154        .map(|name| {
155            if name.contains(':') {
156                Identifier::Metric(name.to_string())
157            } else {
158                Identifier::Plain(name.to_string())
159            }
160        })
161        .parse(input)
162}
163
164/// PromQL keywords
165///
166/// These keywords have special meaning in certain contexts but can also
167/// be used as identifiers (metric names, label names) in other contexts.
168#[derive(Debug, Clone, Copy, PartialEq, Eq)]
169pub enum Keyword {
170    // Aggregation operators
171    Sum,
172    Avg,
173    Count,
174    Min,
175    Max,
176    Group,
177    Stddev,
178    Stdvar,
179    Topk,
180    Bottomk,
181    CountValues,
182    Quantile,
183    Limitk,
184    LimitRatio,
185
186    // Set operators
187    And,
188    Or,
189    Unless,
190
191    // Binary operator
192    Atan2,
193
194    // Modifiers
195    Offset,
196    By,
197    Without,
198    On,
199    Ignoring,
200    GroupLeft,
201    GroupRight,
202    Bool,
203
204    // @ modifier preprocessors
205    Start,
206    End,
207    Step,
208}
209
210impl Keyword {
211    /// Get the keyword as a string slice (lowercase)
212    pub fn as_str(&self) -> &'static str {
213        match self {
214            Keyword::Sum => "sum",
215            Keyword::Avg => "avg",
216            Keyword::Count => "count",
217            Keyword::Min => "min",
218            Keyword::Max => "max",
219            Keyword::Group => "group",
220            Keyword::Stddev => "stddev",
221            Keyword::Stdvar => "stdvar",
222            Keyword::Topk => "topk",
223            Keyword::Bottomk => "bottomk",
224            Keyword::CountValues => "count_values",
225            Keyword::Quantile => "quantile",
226            Keyword::Limitk => "limitk",
227            Keyword::LimitRatio => "limit_ratio",
228            Keyword::And => "and",
229            Keyword::Or => "or",
230            Keyword::Unless => "unless",
231            Keyword::Atan2 => "atan2",
232            Keyword::Offset => "offset",
233            Keyword::By => "by",
234            Keyword::Without => "without",
235            Keyword::On => "on",
236            Keyword::Ignoring => "ignoring",
237            Keyword::GroupLeft => "group_left",
238            Keyword::GroupRight => "group_right",
239            Keyword::Bool => "bool",
240            Keyword::Start => "start",
241            Keyword::End => "end",
242            Keyword::Step => "step",
243        }
244    }
245
246    /// Check if this keyword is an aggregation operator
247    pub fn is_aggregation(&self) -> bool {
248        matches!(
249            self,
250            Keyword::Sum
251                | Keyword::Avg
252                | Keyword::Count
253                | Keyword::Min
254                | Keyword::Max
255                | Keyword::Group
256                | Keyword::Stddev
257                | Keyword::Stdvar
258                | Keyword::Topk
259                | Keyword::Bottomk
260                | Keyword::CountValues
261                | Keyword::Quantile
262                | Keyword::Limitk
263                | Keyword::LimitRatio
264        )
265    }
266
267    /// Check if this keyword is an aggregation that takes a parameter
268    pub fn is_aggregation_with_param(&self) -> bool {
269        matches!(
270            self,
271            Keyword::Topk
272                | Keyword::Bottomk
273                | Keyword::CountValues
274                | Keyword::Quantile
275                | Keyword::Limitk
276                | Keyword::LimitRatio
277        )
278    }
279
280    /// Check if this keyword is a set operator
281    pub fn is_set_operator(&self) -> bool {
282        matches!(self, Keyword::And | Keyword::Or | Keyword::Unless)
283    }
284}
285
286impl std::fmt::Display for Keyword {
287    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
288        write!(f, "{}", self.as_str())
289    }
290}
291
292/// Try to look up a keyword from a string (case-insensitive)
293fn lookup_keyword(s: &str) -> Option<Keyword> {
294    match s.to_ascii_lowercase().as_str() {
295        // Aggregation operators
296        "sum" => Some(Keyword::Sum),
297        "avg" => Some(Keyword::Avg),
298        "count" => Some(Keyword::Count),
299        "min" => Some(Keyword::Min),
300        "max" => Some(Keyword::Max),
301        "group" => Some(Keyword::Group),
302        "stddev" => Some(Keyword::Stddev),
303        "stdvar" => Some(Keyword::Stdvar),
304        "topk" => Some(Keyword::Topk),
305        "bottomk" => Some(Keyword::Bottomk),
306        "count_values" => Some(Keyword::CountValues),
307        "quantile" => Some(Keyword::Quantile),
308        "limitk" => Some(Keyword::Limitk),
309        "limit_ratio" => Some(Keyword::LimitRatio),
310        // Set operators
311        "and" => Some(Keyword::And),
312        "or" => Some(Keyword::Or),
313        "unless" => Some(Keyword::Unless),
314        // Binary operator
315        "atan2" => Some(Keyword::Atan2),
316        // Modifiers
317        "offset" => Some(Keyword::Offset),
318        "by" => Some(Keyword::By),
319        "without" => Some(Keyword::Without),
320        "on" => Some(Keyword::On),
321        "ignoring" => Some(Keyword::Ignoring),
322        "group_left" => Some(Keyword::GroupLeft),
323        "group_right" => Some(Keyword::GroupRight),
324        "bool" => Some(Keyword::Bool),
325        // @ modifier preprocessors
326        "start" => Some(Keyword::Start),
327        "end" => Some(Keyword::End),
328        "step" => Some(Keyword::Step),
329        _ => None,
330    }
331}
332
333/// Parse a keyword (case-insensitive)
334///
335/// Keywords are recognized only when they are complete words (not followed
336/// by alphanumeric characters).
337///
338/// # Examples
339///
340/// ```
341/// use rusty_promql_parser::lexer::identifier::{keyword, Keyword};
342///
343/// let (_, kw) = keyword("SUM").unwrap();
344/// assert_eq!(kw, Keyword::Sum);
345///
346/// let (_, kw) = keyword("count_values").unwrap();
347/// assert_eq!(kw, Keyword::CountValues);
348/// ```
349pub fn keyword(input: &str) -> IResult<&str, Keyword> {
350    // Parse as identifier first (no colons for keywords)
351    let (rest, word) = recognize(pair(
352        verify(take_while1(is_alpha), |s: &str| {
353            s.chars().next().is_some_and(is_alpha)
354        }),
355        take_while(is_alpha_numeric),
356    ))
357    .parse(input)?;
358
359    // Check if it's a keyword
360    if let Some(kw) = lookup_keyword(word) {
361        Ok((rest, kw))
362    } else {
363        Err(nom::Err::Error(nom::error::Error::new(
364            input,
365            nom::error::ErrorKind::Tag,
366        )))
367    }
368}
369
370/// Parse a keyword or identifier
371///
372/// This is the main entry point for lexing identifiers in PromQL.
373/// It first tries to match a keyword, and if that fails, parses as identifier.
374///
375/// # Examples
376///
377/// ```
378/// use rusty_promql_parser::lexer::identifier::{keyword_or_identifier, KeywordOrIdentifier, Keyword, Identifier};
379///
380/// // Keywords are recognized
381/// let (_, result) = keyword_or_identifier("sum").unwrap();
382/// assert_eq!(result, KeywordOrIdentifier::Keyword(Keyword::Sum));
383///
384/// // Regular identifiers
385/// let (_, result) = keyword_or_identifier("http_requests").unwrap();
386/// assert_eq!(result, KeywordOrIdentifier::Identifier(Identifier::Plain("http_requests".to_string())));
387///
388/// // Metric identifiers (with colon)
389/// let (_, result) = keyword_or_identifier("job:rate:5m").unwrap();
390/// assert_eq!(result, KeywordOrIdentifier::Identifier(Identifier::Metric("job:rate:5m".to_string())));
391/// ```
392#[derive(Debug, Clone, PartialEq, Eq)]
393pub enum KeywordOrIdentifier {
394    Keyword(Keyword),
395    Identifier(Identifier),
396}
397
398pub fn keyword_or_identifier(input: &str) -> IResult<&str, KeywordOrIdentifier> {
399    // First try to parse as keyword
400    if let Ok((rest, kw)) = keyword(input) {
401        return Ok((rest, KeywordOrIdentifier::Keyword(kw)));
402    }
403    // Otherwise parse as identifier
404    let (rest, id) = identifier(input)?;
405    Ok((rest, KeywordOrIdentifier::Identifier(id)))
406}
407
408/// Try to parse a specific aggregation operator (case-insensitive)
409pub fn aggregation_op(input: &str) -> IResult<&str, Keyword> {
410    let (rest, kw) = keyword(input)?;
411    if kw.is_aggregation() {
412        Ok((rest, kw))
413    } else {
414        Err(nom::Err::Error(nom::error::Error::new(
415            input,
416            nom::error::ErrorKind::Tag,
417        )))
418    }
419}
420
421/// Try to parse a set operator (and, or, unless) - case-insensitive
422pub fn set_operator(input: &str) -> IResult<&str, Keyword> {
423    let (rest, kw) = keyword(input)?;
424    if kw.is_set_operator() {
425        Ok((rest, kw))
426    } else {
427        Err(nom::Err::Error(nom::error::Error::new(
428            input,
429            nom::error::ErrorKind::Tag,
430        )))
431    }
432}
433
434#[cfg(test)]
435mod tests {
436    use super::*;
437
438    // Label name tests
439    #[test]
440    fn test_label_name_simple() {
441        let (rest, name) = label_name("foo").unwrap();
442        assert_eq!(name, "foo");
443        assert!(rest.is_empty());
444    }
445
446    #[test]
447    fn test_label_name_with_underscore() {
448        let (rest, name) = label_name("some_label").unwrap();
449        assert_eq!(name, "some_label");
450        assert!(rest.is_empty());
451    }
452
453    #[test]
454    fn test_label_name_starting_with_underscore() {
455        let (rest, name) = label_name("_label").unwrap();
456        assert_eq!(name, "_label");
457        assert!(rest.is_empty());
458    }
459
460    #[test]
461    fn test_label_name_reserved() {
462        let (rest, name) = label_name("__name__").unwrap();
463        assert_eq!(name, "__name__");
464        assert!(rest.is_empty());
465    }
466
467    #[test]
468    fn test_label_name_stops_at_colon() {
469        // Label names don't include colons
470        let (rest, name) = label_name("foo:bar").unwrap();
471        assert_eq!(name, "foo");
472        assert_eq!(rest, ":bar");
473    }
474
475    #[test]
476    fn test_label_name_fails_on_number_start() {
477        assert!(label_name("0foo").is_err());
478    }
479
480    // Metric name tests
481    #[test]
482    fn test_metric_name_simple() {
483        let (rest, name) = metric_name("http_requests").unwrap();
484        assert_eq!(name, "http_requests");
485        assert!(rest.is_empty());
486    }
487
488    #[test]
489    fn test_metric_name_with_colon() {
490        let (rest, name) = metric_name("job:request_rate:5m").unwrap();
491        assert_eq!(name, "job:request_rate:5m");
492        assert!(rest.is_empty());
493    }
494
495    #[test]
496    fn test_metric_name_starting_with_colon() {
497        let (rest, name) = metric_name(":request_rate").unwrap();
498        assert_eq!(name, ":request_rate");
499        assert!(rest.is_empty());
500    }
501
502    #[test]
503    fn test_metric_name_multiple_colons() {
504        let (rest, name) = metric_name("a:b:c:d").unwrap();
505        assert_eq!(name, "a:b:c:d");
506        assert!(rest.is_empty());
507    }
508
509    #[test]
510    fn test_metric_name_fails_on_number_start() {
511        assert!(metric_name("0metric").is_err());
512    }
513
514    // Identifier tests
515    #[test]
516    fn test_identifier_plain() {
517        let (rest, id) = identifier("foo").unwrap();
518        assert_eq!(id, Identifier::Plain("foo".to_string()));
519        assert!(rest.is_empty());
520    }
521
522    #[test]
523    fn test_identifier_metric() {
524        let (rest, id) = identifier("foo:bar").unwrap();
525        assert_eq!(id, Identifier::Metric("foo:bar".to_string()));
526        assert!(rest.is_empty());
527    }
528
529    #[test]
530    fn test_identifier_has_colon() {
531        let plain = Identifier::Plain("foo".to_string());
532        let metric = Identifier::Metric("foo:bar".to_string());
533        assert!(!plain.has_colon());
534        assert!(metric.has_colon());
535    }
536
537    // Keyword tests
538    #[test]
539    fn test_keyword_sum() {
540        let (rest, kw) = keyword("sum").unwrap();
541        assert_eq!(kw, Keyword::Sum);
542        assert!(rest.is_empty());
543    }
544
545    #[test]
546    fn test_keyword_case_insensitive() {
547        let (_, kw1) = keyword("SUM").unwrap();
548        let (_, kw2) = keyword("Sum").unwrap();
549        let (_, kw3) = keyword("sum").unwrap();
550        assert_eq!(kw1, Keyword::Sum);
551        assert_eq!(kw2, Keyword::Sum);
552        assert_eq!(kw3, Keyword::Sum);
553    }
554
555    #[test]
556    fn test_keyword_count_values() {
557        let (rest, kw) = keyword("count_values").unwrap();
558        assert_eq!(kw, Keyword::CountValues);
559        assert!(rest.is_empty());
560    }
561
562    #[test]
563    fn test_keyword_not_partial_match() {
564        // "summary" should not match "sum"
565        assert!(keyword("summary").is_err());
566    }
567
568    #[test]
569    fn test_keyword_with_following_paren() {
570        // "sum(" should match "sum" and leave "("
571        let (rest, kw) = keyword("sum(").unwrap();
572        assert_eq!(kw, Keyword::Sum);
573        assert_eq!(rest, "(");
574    }
575
576    #[test]
577    fn test_all_aggregation_keywords() {
578        let aggregations = [
579            ("sum", Keyword::Sum),
580            ("avg", Keyword::Avg),
581            ("count", Keyword::Count),
582            ("min", Keyword::Min),
583            ("max", Keyword::Max),
584            ("group", Keyword::Group),
585            ("stddev", Keyword::Stddev),
586            ("stdvar", Keyword::Stdvar),
587            ("topk", Keyword::Topk),
588            ("bottomk", Keyword::Bottomk),
589            ("count_values", Keyword::CountValues),
590            ("quantile", Keyword::Quantile),
591            ("limitk", Keyword::Limitk),
592            ("limit_ratio", Keyword::LimitRatio),
593        ];
594        for (input, expected) in aggregations {
595            let (_, kw) = keyword(input).unwrap();
596            assert_eq!(kw, expected);
597            assert!(kw.is_aggregation());
598        }
599    }
600
601    #[test]
602    fn test_set_operators() {
603        let (_, kw) = keyword("and").unwrap();
604        assert_eq!(kw, Keyword::And);
605        assert!(kw.is_set_operator());
606
607        let (_, kw) = keyword("or").unwrap();
608        assert_eq!(kw, Keyword::Or);
609        assert!(kw.is_set_operator());
610
611        let (_, kw) = keyword("unless").unwrap();
612        assert_eq!(kw, Keyword::Unless);
613        assert!(kw.is_set_operator());
614    }
615
616    #[test]
617    fn test_modifier_keywords() {
618        let modifiers = [
619            ("offset", Keyword::Offset),
620            ("by", Keyword::By),
621            ("without", Keyword::Without),
622            ("on", Keyword::On),
623            ("ignoring", Keyword::Ignoring),
624            ("group_left", Keyword::GroupLeft),
625            ("group_right", Keyword::GroupRight),
626            ("bool", Keyword::Bool),
627        ];
628        for (input, expected) in modifiers {
629            let (_, kw) = keyword(input).unwrap();
630            assert_eq!(kw, expected);
631        }
632    }
633
634    // keyword_or_identifier tests
635    #[test]
636    fn test_keyword_or_identifier_keyword() {
637        let (_, result) = keyword_or_identifier("sum").unwrap();
638        assert_eq!(result, KeywordOrIdentifier::Keyword(Keyword::Sum));
639    }
640
641    #[test]
642    fn test_keyword_or_identifier_plain() {
643        let (_, result) = keyword_or_identifier("http_requests").unwrap();
644        assert_eq!(
645            result,
646            KeywordOrIdentifier::Identifier(Identifier::Plain("http_requests".to_string()))
647        );
648    }
649
650    #[test]
651    fn test_keyword_or_identifier_metric() {
652        let (_, result) = keyword_or_identifier("job:rate:5m").unwrap();
653        assert_eq!(
654            result,
655            KeywordOrIdentifier::Identifier(Identifier::Metric("job:rate:5m".to_string()))
656        );
657    }
658
659    // aggregation_op tests
660    #[test]
661    fn test_aggregation_op() {
662        let (_, kw) = aggregation_op("sum").unwrap();
663        assert_eq!(kw, Keyword::Sum);
664    }
665
666    #[test]
667    fn test_aggregation_op_rejects_non_aggregation() {
668        assert!(aggregation_op("offset").is_err());
669    }
670
671    // set_operator tests
672    #[test]
673    fn test_set_operator_fn() {
674        let (_, kw) = set_operator("and").unwrap();
675        assert_eq!(kw, Keyword::And);
676    }
677
678    #[test]
679    fn test_set_operator_rejects_non_set_op() {
680        assert!(set_operator("sum").is_err());
681    }
682
683    // Edge cases
684    #[test]
685    fn test_nan_as_identifier() {
686        // NaN starting an identifier should be parsed as identifier, not literal
687        // But "NaN" alone is a float literal, handled by number parser
688        // "NaN123" should be an identifier
689        let (rest, id) = identifier("NaN123").unwrap();
690        assert_eq!(id, Identifier::Plain("NaN123".to_string()));
691        assert!(rest.is_empty());
692    }
693
694    #[test]
695    fn test_inf_as_identifier() {
696        // "Infoo" should be an identifier
697        let (rest, id) = identifier("Infoo").unwrap();
698        assert_eq!(id, Identifier::Plain("Infoo".to_string()));
699        assert!(rest.is_empty());
700    }
701
702    #[test]
703    fn test_keyword_as_part_of_identifier() {
704        // "summary" contains "sum" but should parse as full identifier
705        let (_, result) = keyword_or_identifier("summary").unwrap();
706        assert_eq!(
707            result,
708            KeywordOrIdentifier::Identifier(Identifier::Plain("summary".to_string()))
709        );
710    }
711
712    #[test]
713    fn test_aggregation_with_param() {
714        assert!(Keyword::Topk.is_aggregation_with_param());
715        assert!(Keyword::Bottomk.is_aggregation_with_param());
716        assert!(Keyword::CountValues.is_aggregation_with_param());
717        assert!(Keyword::Quantile.is_aggregation_with_param());
718        assert!(!Keyword::Sum.is_aggregation_with_param());
719    }
720}