Skip to main content

logdive_core/
query.rs

1//! Query language: tokenizer, AST, and recursive descent parser.
2//!
3//! Implements the grammar from the project doc's "Notes → Query language
4//! grammar" section, extended in v0.2.0 to support OR and in v0.3.0 to
5//! support explicit parenthesised grouping.
6//!
7//! This module owns *only* the parse step: `&str → QueryNode`. Translating
8//! a `QueryNode` into SQL and binding parameters is the executor's job.
9//! Resolving relative time ranges like `last 2h` against wall-clock time
10//! is also the executor's job; the AST just carries the raw spec.
11//!
12//! # Grammar (v0.3.0)
13//!
14//! ```text
15//! query     := or_expr
16//! or_expr   := and_expr (OR and_expr)*
17//! and_expr  := clause (AND clause)*
18//! clause    := field OP value
19//!            | field CONTAINS string
20//!            | TIME_RANGE
21//!            | "(" or_expr ")"
22//! field     := [a-zA-Z_][a-zA-Z0-9_.]*
23//! OP        := "=" | "!=" | ">" | "<"
24//! value     := string | number | bool
25//! string    := '"' .* '"' | bare_word
26//! TIME_RANGE := "last" duration | "since" datetime
27//! duration  := number ("m" | "h" | "d")
28//! ```
29//!
30//! AND binds tighter than OR. With parentheses users can override precedence:
31//! `(level=error OR level=warn) AND service=payments`.
32//!
33//! Without parens: `level=error AND service=payments OR level=warn` parses as
34//! `(level=error AND service=payments) OR level=warn`.
35
36use std::fmt;
37
38// ---------------------------------------------------------------------------
39// AST
40// ---------------------------------------------------------------------------
41
42/// The top-level query: a disjunction of one or more AND-groups.
43///
44/// A query with no OR (e.g. `level=error AND tag=api`) parses as a single-
45/// element vector containing one `AndGroup`, so the executor has exactly
46/// one code path. This mirrors the v0.1 design choice of always wrapping
47/// a single clause in `And(vec![clause])` — the same idea, lifted up one
48/// level of grammar.
49#[derive(Debug, Clone, PartialEq)]
50pub enum QueryNode {
51    Or(Vec<AndGroup>),
52}
53
54/// A conjunction of one or more clauses, joined by AND.
55///
56/// `clauses` is guaranteed by the parser to be non-empty.
57#[derive(Debug, Clone, PartialEq)]
58pub struct AndGroup {
59    pub clauses: Vec<Clause>,
60}
61
62/// A single clause — the atomic unit a query is built from.
63#[derive(Debug, Clone, PartialEq)]
64pub enum Clause {
65    /// `field OP value` — e.g. `level = error`, `req_id > 100`.
66    Compare {
67        field: String,
68        op: CompareOp,
69        value: QueryValue,
70    },
71    /// `field CONTAINS string` — substring match on a string column.
72    Contains { field: String, value: String },
73    /// `last <N><unit>` — relative time range ending at query time.
74    LastDuration(Duration),
75    /// `since <datetime>` — absolute time range starting at the given moment.
76    /// The string is opaque at the parse layer; the executor uses chrono to
77    /// resolve it (which allows us to accept multiple formats without
78    /// teaching the grammar about any particular one).
79    SinceDatetime(String),
80    /// `( or_expr )` — explicit grouping to override AND/OR precedence.
81    /// New in v0.3.0.
82    Group(Box<QueryNode>),
83}
84
85/// Comparison operator for `field OP value` clauses.
86#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub enum CompareOp {
88    Eq,
89    NotEq,
90    Gt,
91    Lt,
92}
93
94impl fmt::Display for CompareOp {
95    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
96        f.write_str(match self {
97            CompareOp::Eq => "=",
98            CompareOp::NotEq => "!=",
99            CompareOp::Gt => ">",
100            CompareOp::Lt => "<",
101        })
102    }
103}
104
105/// A literal value appearing on the right-hand side of a comparison.
106///
107/// The type distinction matters because the executor binds numbers and
108/// booleans with their native SQLite types so numeric comparison
109/// (`req_id > 100`) uses proper ordering rather than lexical.
110#[derive(Debug, Clone, PartialEq)]
111pub enum QueryValue {
112    String(String),
113    Integer(i64),
114    Float(f64),
115    Bool(bool),
116}
117
118/// A relative duration parsed from `last <N><unit>`.
119#[derive(Debug, Clone, Copy, PartialEq, Eq)]
120pub struct Duration {
121    pub amount: u64,
122    pub unit: DurationUnit,
123}
124
125#[derive(Debug, Clone, Copy, PartialEq, Eq)]
126pub enum DurationUnit {
127    Minutes,
128    Hours,
129    Days,
130}
131
132impl DurationUnit {
133    /// Total seconds for one unit. The executor multiplies by `amount` to
134    /// compute the cutoff timestamp against `now`.
135    pub fn seconds(self) -> i64 {
136        match self {
137            DurationUnit::Minutes => 60,
138            DurationUnit::Hours => 60 * 60,
139            DurationUnit::Days => 24 * 60 * 60,
140        }
141    }
142}
143
144// ---------------------------------------------------------------------------
145// Errors
146// ---------------------------------------------------------------------------
147
148/// Parse error with a byte offset into the original input.
149///
150/// Byte offsets (rather than line/column) are sufficient because queries
151/// are single-line. The CLI's pretty printer can slice the original input
152/// around `position` to render a caret.
153#[derive(Debug, Clone, PartialEq, Eq)]
154pub struct QueryParseError {
155    pub position: usize,
156    pub message: String,
157}
158
159impl fmt::Display for QueryParseError {
160    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
161        write!(
162            f,
163            "query parse error at position {}: {}",
164            self.position, self.message
165        )
166    }
167}
168
169impl std::error::Error for QueryParseError {}
170
171// ---------------------------------------------------------------------------
172// Tokens
173// ---------------------------------------------------------------------------
174
175#[derive(Debug, Clone, PartialEq)]
176enum Token {
177    /// A bare identifier — could be a field name, a bare-word value, or a
178    /// keyword depending on position. We resolve keywords at parse time
179    /// rather than at tokenization time because "last" used as a field name
180    /// (in the unlikely event a log has a field literally called "last")
181    /// should still work in `CONTAINS` contexts.
182    Ident(String),
183    /// A double-quoted string, with the quotes stripped.
184    QuotedString(String),
185    /// A literal number — stored as text so the parser can decide whether
186    /// it's an integer or float.
187    Number(String),
188    Eq,
189    NotEq,
190    Gt,
191    Lt,
192    LParen,
193    RParen,
194}
195
196#[derive(Debug, Clone)]
197struct SpannedToken {
198    token: Token,
199    position: usize,
200}
201
202// ---------------------------------------------------------------------------
203// Tokenizer
204// ---------------------------------------------------------------------------
205
206/// Return true if `b` is allowed *inside* an identifier (but not necessarily
207/// as the first byte). Matches the grammar's field rule plus the extra
208/// characters needed for bare-word values and datetime literals: `-` for
209/// hyphenated values like `x-request-id`, `:` for colon-separated values
210/// like time components, and `.` for both dotted field names and float-like
211/// version strings in values.
212fn is_ident_continuation(b: u8) -> bool {
213    b == b'_' || b == b'.' || b == b'-' || b == b':' || b.is_ascii_alphanumeric()
214}
215
216/// Split the input into a stream of tokens with byte-offset positions.
217///
218/// Whitespace is skipped. Unrecognized bytes produce a `QueryParseError`
219/// pointing at the offending character.
220fn tokenize(input: &str) -> Result<Vec<SpannedToken>, QueryParseError> {
221    let bytes = input.as_bytes();
222    let mut i = 0;
223    let mut out = Vec::new();
224
225    while i < bytes.len() {
226        let c = bytes[i];
227
228        // Whitespace.
229        if c.is_ascii_whitespace() {
230            i += 1;
231            continue;
232        }
233
234        // Operators — order matters: check `!=` before `!` would-be, and
235        // both before single `<`/`>`/`=`.
236        if c == b'!' {
237            if i + 1 < bytes.len() && bytes[i + 1] == b'=' {
238                out.push(SpannedToken {
239                    token: Token::NotEq,
240                    position: i,
241                });
242                i += 2;
243                continue;
244            }
245            return Err(QueryParseError {
246                position: i,
247                message: "unexpected '!' — did you mean '!='?".to_string(),
248            });
249        }
250        if c == b'=' {
251            out.push(SpannedToken {
252                token: Token::Eq,
253                position: i,
254            });
255            i += 1;
256            continue;
257        }
258        if c == b'>' {
259            out.push(SpannedToken {
260                token: Token::Gt,
261                position: i,
262            });
263            i += 1;
264            continue;
265        }
266        if c == b'<' {
267            out.push(SpannedToken {
268                token: Token::Lt,
269                position: i,
270            });
271            i += 1;
272            continue;
273        }
274
275        // Parentheses (v0.3.0).
276        if c == b'(' {
277            out.push(SpannedToken {
278                token: Token::LParen,
279                position: i,
280            });
281            i += 1;
282            continue;
283        }
284        if c == b')' {
285            out.push(SpannedToken {
286                token: Token::RParen,
287                position: i,
288            });
289            i += 1;
290            continue;
291        }
292
293        // Quoted string.
294        if c == b'"' {
295            let start = i;
296            i += 1; // consume opening quote
297            let content_start = i;
298            while i < bytes.len() && bytes[i] != b'"' {
299                // No escape handling in v1 — the grammar is `'"' .* '"'`
300                // and real log-query users don't embed quotes in values.
301                // If this becomes a pain we add escape handling later.
302                i += 1;
303            }
304            if i >= bytes.len() {
305                return Err(QueryParseError {
306                    position: start,
307                    message: "unterminated quoted string".to_string(),
308                });
309            }
310            let s = std::str::from_utf8(&bytes[content_start..i])
311                .expect("input is &str, slice is UTF-8")
312                .to_string();
313            i += 1; // consume closing quote
314            out.push(SpannedToken {
315                token: Token::QuotedString(s),
316                position: start,
317            });
318            continue;
319        }
320
321        // Digit-led token.
322        //
323        // Two possibilities:
324        //  - Pure-digit run (with optional fractional part) → Token::Number.
325        //    Example: `100`, `1.5`.
326        //  - Digit-led run that contains `-` or `:` → Token::Ident. This
327        //    supports bare datetime literals like `2024-01-01T10:00:00Z`
328        //    after `since`. Colon is included for completeness so time-
329        //    of-day literals don't need quoting either.
330        //
331        // The disambiguation happens at the first non-digit, non-dot byte:
332        // if that byte is `-` or `:`, we promote the whole run (and keep
333        // consuming continuation bytes) to an Ident. Otherwise we stop at
334        // the end of the numeric run and emit a Number.
335        if c.is_ascii_digit() {
336            let start = i;
337            let mut saw_dot = false;
338
339            // First phase: consume digits and at most one dot (only when
340            // the dot is followed by a digit, preserving the existing
341            // `1.5` behaviour). We peek at the next byte after each dot
342            // to decide.
343            while i < bytes.len() && (bytes[i].is_ascii_digit() || (bytes[i] == b'.' && !saw_dot)) {
344                if bytes[i] == b'.' {
345                    if i + 1 >= bytes.len() || !bytes[i + 1].is_ascii_digit() {
346                        break;
347                    }
348                    saw_dot = true;
349                }
350                i += 1;
351            }
352
353            // Second phase: if the next byte indicates this digit-led run
354            // is actually an ident (datetime, multi-dot version string),
355            // keep consuming all ident-continuation bytes and emit Ident.
356            //
357            // Promotion triggers:
358            //   `-` or `:` — datetime literals (`2024-01-01`, `10:30`)
359            //   `.`        — dotted strings beyond one fractional part
360            //                (`1.2.3`); the first phase stops at the
361            //                second dot due to its `!saw_dot` guard,
362            //                leaving `bytes[i]` on that second dot.
363            //
364            // Letters are intentionally NOT a promotion trigger: `30m`
365            // must tokenize as Number("30") + Ident("m") so the parser's
366            // `last <N><unit>` rule works. Users who want digit-led
367            // values with letter suffixes (`3beta`) must quote them.
368            if i < bytes.len() && (bytes[i] == b'-' || bytes[i] == b':' || bytes[i] == b'.') {
369                while i < bytes.len() && is_ident_continuation(bytes[i]) {
370                    i += 1;
371                }
372                let s = std::str::from_utf8(&bytes[start..i])
373                    .expect("input is &str, slice is UTF-8")
374                    .to_string();
375                out.push(SpannedToken {
376                    token: Token::Ident(s),
377                    position: start,
378                });
379                continue;
380            }
381
382            let s = std::str::from_utf8(&bytes[start..i])
383                .expect("ascii digits are UTF-8")
384                .to_string();
385            out.push(SpannedToken {
386                token: Token::Number(s),
387                position: start,
388            });
389            continue;
390        }
391
392        // Identifier / bare word: starts with letter or underscore,
393        // continues per `is_ident_continuation`. Hyphen and colon are
394        // allowed inside so bare-word values like `x-request-id` and
395        // colon-separated fragments work; `validate_field_name` later
396        // enforces the stricter field-name subset.
397        if c == b'_' || c.is_ascii_alphabetic() {
398            let start = i;
399            while i < bytes.len() && is_ident_continuation(bytes[i]) {
400                i += 1;
401            }
402            let s = std::str::from_utf8(&bytes[start..i])
403                .expect("input is &str, slice is UTF-8")
404                .to_string();
405            out.push(SpannedToken {
406                token: Token::Ident(s),
407                position: start,
408            });
409            continue;
410        }
411
412        return Err(QueryParseError {
413            position: i,
414            message: format!("unexpected character {:?}", c as char),
415        });
416    }
417
418    Ok(out)
419}
420
421// ---------------------------------------------------------------------------
422// Parser
423// ---------------------------------------------------------------------------
424
425/// Parse a query string into a `QueryNode`.
426///
427/// This is the only public entry point. Implements the v0.3.0 grammar
428/// top-down via recursive descent: `or_expr` at the outermost level,
429/// `and_expr` one level in, `clause` at the leaves (with optional paren
430/// recursion back to `or_expr`).
431pub fn parse(input: &str) -> Result<QueryNode, QueryParseError> {
432    let tokens = tokenize(input)?;
433    if tokens.is_empty() {
434        return Err(QueryParseError {
435            position: 0,
436            message: "empty query".to_string(),
437        });
438    }
439
440    let mut p = Parser {
441        tokens: &tokens,
442        cursor: 0,
443    };
444    let node = p.parse_or_expr()?;
445
446    // After a complete or_expr, the only acceptable state is end-of-input.
447    // If a token remains, the user wrote something the grammar doesn't
448    // accept (commonly a missing AND/OR between clauses, or an unmatched `)`).
449    if let Some(extra) = p.peek() {
450        let message = if matches!(extra.token, Token::RParen) {
451            "unexpected ')' — no matching '('".to_string()
452        } else {
453            "expected 'AND' or 'OR' between clauses".to_string()
454        };
455        return Err(QueryParseError {
456            position: extra.position,
457            message,
458        });
459    }
460
461    Ok(node)
462}
463
464struct Parser<'a> {
465    tokens: &'a [SpannedToken],
466    cursor: usize,
467}
468
469impl<'a> Parser<'a> {
470    fn peek(&self) -> Option<&'a SpannedToken> {
471        self.tokens.get(self.cursor)
472    }
473
474    fn advance(&mut self) -> Option<&'a SpannedToken> {
475        let t = self.tokens.get(self.cursor);
476        if t.is_some() {
477            self.cursor += 1;
478        }
479        t
480    }
481
482    /// Position to attribute to an error when the tokens are exhausted.
483    fn end_position(&self) -> usize {
484        self.tokens
485            .last()
486            .map(|t| t.position + token_len(&t.token))
487            .unwrap_or(0)
488    }
489
490    /// Top-level: one or more AND-groups separated by `OR`.
491    ///
492    /// AND binds tighter than OR. We always wrap the result in
493    /// `QueryNode::Or`, even for a query with no OR present — uniformity
494    /// keeps the executor simple.
495    fn parse_or_expr(&mut self) -> Result<QueryNode, QueryParseError> {
496        let mut groups = Vec::new();
497        groups.push(self.parse_and_expr()?);
498
499        while let Some(tok) = self.peek() {
500            match &tok.token {
501                Token::Ident(s) if s.eq_ignore_ascii_case("or") => {
502                    let or_pos = tok.position;
503                    self.advance();
504                    // Detect trailing-OR or doubled-OR before we recurse.
505                    // Without this the inner parse_and_expr would error
506                    // with a less helpful message.
507                    match self.peek() {
508                        None => {
509                            return Err(QueryParseError {
510                                position: or_pos,
511                                message: "expected a clause after 'OR'".to_string(),
512                            });
513                        }
514                        Some(next) => {
515                            if let Token::Ident(s2) = &next.token {
516                                if s2.eq_ignore_ascii_case("or") || s2.eq_ignore_ascii_case("and") {
517                                    return Err(QueryParseError {
518                                        position: next.position,
519                                        message: format!(
520                                            "expected a clause after 'OR', got '{}'",
521                                            s2.to_uppercase()
522                                        ),
523                                    });
524                                }
525                            }
526                        }
527                    }
528                    groups.push(self.parse_and_expr()?);
529                }
530                _ => break,
531            }
532        }
533
534        Ok(QueryNode::Or(groups))
535    }
536
537    /// One AND-group: one or more clauses joined by `AND`.
538    fn parse_and_expr(&mut self) -> Result<AndGroup, QueryParseError> {
539        let mut clauses = Vec::new();
540        clauses.push(self.parse_clause()?);
541
542        while let Some(tok) = self.peek() {
543            match &tok.token {
544                Token::Ident(s) if s.eq_ignore_ascii_case("and") => {
545                    let and_pos = tok.position;
546                    self.advance();
547                    // Same trailing/double check as for OR.
548                    match self.peek() {
549                        None => {
550                            return Err(QueryParseError {
551                                position: and_pos,
552                                message: "expected a clause after 'AND'".to_string(),
553                            });
554                        }
555                        Some(next) => {
556                            if let Token::Ident(s2) = &next.token {
557                                if s2.eq_ignore_ascii_case("and") || s2.eq_ignore_ascii_case("or") {
558                                    return Err(QueryParseError {
559                                        position: next.position,
560                                        message: format!(
561                                            "expected a clause after 'AND', got '{}'",
562                                            s2.to_uppercase()
563                                        ),
564                                    });
565                                }
566                            }
567                        }
568                    }
569                    clauses.push(self.parse_clause()?);
570                }
571                // OR ends this AND-group; the outer or_expr loop picks it up.
572                Token::Ident(s) if s.eq_ignore_ascii_case("or") => break,
573                // Anything else also ends this AND-group; the outer parser
574                // is responsible for deciding whether that's an error
575                // (extra unconsumed token) or success (end of input).
576                _ => break,
577            }
578        }
579
580        Ok(AndGroup { clauses })
581    }
582
583    fn parse_clause(&mut self) -> Result<Clause, QueryParseError> {
584        let tok = self.peek().ok_or_else(|| QueryParseError {
585            position: self.end_position(),
586            message: "expected a clause, got end of input".to_string(),
587        })?;
588
589        // Parenthesized subexpression: `(` or_expr `)`.
590        if matches!(&tok.token, Token::LParen) {
591            let open_pos = tok.position;
592            self.advance(); // consume `(`
593            let inner = self.parse_or_expr()?;
594            match self.peek() {
595                Some(close) if matches!(close.token, Token::RParen) => {
596                    self.advance(); // consume `)`
597                }
598                Some(close) => {
599                    return Err(QueryParseError {
600                        position: close.position,
601                        message: "expected ')' to close '('".to_string(),
602                    });
603                }
604                None => {
605                    return Err(QueryParseError {
606                        position: open_pos,
607                        message: "unclosed '(' — expected ')'".to_string(),
608                    });
609                }
610            }
611            return Ok(Clause::Group(Box::new(inner)));
612        }
613
614        // A `)` here without a matching `(` — error early with a targeted message.
615        if matches!(&tok.token, Token::RParen) {
616            return Err(QueryParseError {
617                position: tok.position,
618                message: "unexpected ')' — no matching '('".to_string(),
619            });
620        }
621
622        // Time-range clauses are keyword-led.
623        if let Token::Ident(s) = &tok.token {
624            if s.eq_ignore_ascii_case("last") {
625                self.advance();
626                return self.parse_last_duration();
627            }
628            if s.eq_ignore_ascii_case("since") {
629                self.advance();
630                return self.parse_since_datetime();
631            }
632            // A leading bare AND/OR here means the grammar was violated
633            // (e.g. the input started with "OR level=error").
634            if s.eq_ignore_ascii_case("and") || s.eq_ignore_ascii_case("or") {
635                return Err(QueryParseError {
636                    position: tok.position,
637                    message: format!("unexpected '{}' at start of clause", s.to_uppercase()),
638                });
639            }
640        }
641
642        // Otherwise: field-led clause (compare or contains).
643        self.parse_field_led_clause()
644    }
645
646    fn parse_last_duration(&mut self) -> Result<Clause, QueryParseError> {
647        let num_tok = self.advance().ok_or_else(|| QueryParseError {
648            position: self.end_position(),
649            message: "expected a number after 'last'".to_string(),
650        })?;
651        let num_str = match &num_tok.token {
652            Token::Number(s) => s,
653            _ => {
654                return Err(QueryParseError {
655                    position: num_tok.position,
656                    message: "expected a number after 'last'".to_string(),
657                });
658            }
659        };
660        if num_str.contains('.') {
661            return Err(QueryParseError {
662                position: num_tok.position,
663                message: "duration amount must be a whole number".to_string(),
664            });
665        }
666        let amount: u64 = num_str.parse().map_err(|_| QueryParseError {
667            position: num_tok.position,
668            message: format!("invalid duration amount {num_str:?}"),
669        })?;
670
671        let unit_tok = self.advance().ok_or_else(|| QueryParseError {
672            position: self.end_position(),
673            message: "expected a duration unit ('m', 'h', or 'd') after the number".to_string(),
674        })?;
675        let unit_str = match &unit_tok.token {
676            Token::Ident(s) => s,
677            _ => {
678                return Err(QueryParseError {
679                    position: unit_tok.position,
680                    message: "expected a duration unit ('m', 'h', or 'd')".to_string(),
681                });
682            }
683        };
684        let unit = match unit_str.as_str() {
685            "m" => DurationUnit::Minutes,
686            "h" => DurationUnit::Hours,
687            "d" => DurationUnit::Days,
688            other => {
689                return Err(QueryParseError {
690                    position: unit_tok.position,
691                    message: format!("unknown duration unit {other:?}, expected 'm', 'h', or 'd'"),
692                });
693            }
694        };
695
696        Ok(Clause::LastDuration(Duration { amount, unit }))
697    }
698
699    fn parse_since_datetime(&mut self) -> Result<Clause, QueryParseError> {
700        let tok = self.advance().ok_or_else(|| QueryParseError {
701            position: self.end_position(),
702            message: "expected a datetime after 'since'".to_string(),
703        })?;
704        let dt = match &tok.token {
705            Token::QuotedString(s) => s.clone(),
706            Token::Ident(s) => s.clone(),
707            Token::Number(s) => s.clone(),
708            _ => {
709                return Err(QueryParseError {
710                    position: tok.position,
711                    message: "expected a datetime after 'since'".to_string(),
712                });
713            }
714        };
715        Ok(Clause::SinceDatetime(dt))
716    }
717
718    fn parse_field_led_clause(&mut self) -> Result<Clause, QueryParseError> {
719        let field_tok = self.advance().expect("caller peeked a token");
720        let field = match &field_tok.token {
721            Token::Ident(s) => s.clone(),
722            _ => {
723                return Err(QueryParseError {
724                    position: field_tok.position,
725                    message: "expected a field name".to_string(),
726                });
727            }
728        };
729        validate_field_name(&field, field_tok.position)?;
730
731        let op_tok = self.advance().ok_or_else(|| QueryParseError {
732            position: self.end_position(),
733            message: "expected an operator after the field name".to_string(),
734        })?;
735
736        // CONTAINS is a keyword stored as Ident.
737        if let Token::Ident(s) = &op_tok.token {
738            if s.eq_ignore_ascii_case("contains") {
739                let val_tok = self.advance().ok_or_else(|| QueryParseError {
740                    position: self.end_position(),
741                    message: "expected a string after 'contains'".to_string(),
742                })?;
743                let s = match &val_tok.token {
744                    Token::QuotedString(s) => s.clone(),
745                    Token::Ident(s) => s.clone(),
746                    _ => {
747                        return Err(QueryParseError {
748                            position: val_tok.position,
749                            message: "'contains' requires a string value".to_string(),
750                        });
751                    }
752                };
753                return Ok(Clause::Contains { field, value: s });
754            }
755        }
756
757        let op = match &op_tok.token {
758            Token::Eq => CompareOp::Eq,
759            Token::NotEq => CompareOp::NotEq,
760            Token::Gt => CompareOp::Gt,
761            Token::Lt => CompareOp::Lt,
762            _ => {
763                return Err(QueryParseError {
764                    position: op_tok.position,
765                    message: "expected one of =, !=, >, <, or 'contains'".to_string(),
766                });
767            }
768        };
769
770        let val_tok = self.advance().ok_or_else(|| QueryParseError {
771            position: self.end_position(),
772            message: "expected a value after the operator".to_string(),
773        })?;
774        let value = token_to_query_value(val_tok)?;
775
776        Ok(Clause::Compare { field, op, value })
777    }
778}
779
780/// Enforce the grammar's field regex: `[a-zA-Z_][a-zA-Z0-9_.]*`.
781///
782/// The tokenizer is more permissive (it allows `-` and `:` inside idents
783/// so that bare-word *values* like `x-request-id` and datetime literals
784/// tokenize cleanly). We re-validate here because a field name is a
785/// stricter subset.
786fn validate_field_name(s: &str, position: usize) -> Result<(), QueryParseError> {
787    let mut chars = s.chars();
788    let first = chars.next().ok_or_else(|| QueryParseError {
789        position,
790        message: "empty field name".to_string(),
791    })?;
792    if !(first.is_ascii_alphabetic() || first == '_') {
793        return Err(QueryParseError {
794            position,
795            message: format!("invalid field name {s:?}: must start with a letter or underscore"),
796        });
797    }
798    for c in chars {
799        if !(c.is_ascii_alphanumeric() || c == '_' || c == '.') {
800            return Err(QueryParseError {
801                position,
802                message: format!(
803                    "invalid field name {s:?}: only letters, digits, underscores, and dots are allowed"
804                ),
805            });
806        }
807    }
808    Ok(())
809}
810
811fn token_to_query_value(tok: &SpannedToken) -> Result<QueryValue, QueryParseError> {
812    match &tok.token {
813        Token::QuotedString(s) => Ok(QueryValue::String(s.clone())),
814        Token::Number(s) => {
815            if s.contains('.') {
816                let f: f64 = s.parse().map_err(|_| QueryParseError {
817                    position: tok.position,
818                    message: format!("invalid number {s:?}"),
819                })?;
820                Ok(QueryValue::Float(f))
821            } else {
822                let n: i64 = s.parse().map_err(|_| QueryParseError {
823                    position: tok.position,
824                    message: format!("invalid integer {s:?}"),
825                })?;
826                Ok(QueryValue::Integer(n))
827            }
828        }
829        Token::Ident(s) => {
830            // Booleans as bare words.
831            if s.eq_ignore_ascii_case("true") {
832                Ok(QueryValue::Bool(true))
833            } else if s.eq_ignore_ascii_case("false") {
834                Ok(QueryValue::Bool(false))
835            } else {
836                Ok(QueryValue::String(s.clone()))
837            }
838        }
839        _ => Err(QueryParseError {
840            position: tok.position,
841            message: "expected a value (string, number, or boolean)".to_string(),
842        }),
843    }
844}
845
846fn token_len(t: &Token) -> usize {
847    match t {
848        Token::Ident(s) | Token::Number(s) => s.len(),
849        Token::QuotedString(s) => s.len() + 2, // approximate, for error positioning only
850        Token::Eq | Token::Gt | Token::Lt | Token::LParen | Token::RParen => 1,
851        Token::NotEq => 2,
852    }
853}
854
855// ---------------------------------------------------------------------------
856// Tests
857// ---------------------------------------------------------------------------
858
859#[cfg(test)]
860mod tests {
861    use super::*;
862
863    /// Build a single-AND-group QueryNode (no OR). This is what every v0.1
864    /// query parses into in the new AST shape.
865    fn one_group(clauses: Vec<Clause>) -> QueryNode {
866        QueryNode::Or(vec![AndGroup { clauses }])
867    }
868
869    /// Build a QueryNode with multiple AND-groups (OR present).
870    fn or_of(groups: Vec<Vec<Clause>>) -> QueryNode {
871        QueryNode::Or(
872            groups
873                .into_iter()
874                .map(|clauses| AndGroup { clauses })
875                .collect(),
876        )
877    }
878
879    fn cmp(field: &str, op: CompareOp, value: QueryValue) -> Clause {
880        Clause::Compare {
881            field: field.to_string(),
882            op,
883            value,
884        }
885    }
886
887    // -----------------------------------------------------------------
888    // Each operator parses correctly (carried forward from v0.1)
889    // -----------------------------------------------------------------
890
891    #[test]
892    fn eq_operator() {
893        assert_eq!(
894            parse("level=error").unwrap(),
895            one_group(vec![cmp(
896                "level",
897                CompareOp::Eq,
898                QueryValue::String("error".into())
899            )])
900        );
901    }
902
903    #[test]
904    fn not_eq_operator() {
905        assert_eq!(
906            parse("level!=info").unwrap(),
907            one_group(vec![cmp(
908                "level",
909                CompareOp::NotEq,
910                QueryValue::String("info".into())
911            )])
912        );
913    }
914
915    #[test]
916    fn gt_operator_with_integer() {
917        assert_eq!(
918            parse("req_id > 100").unwrap(),
919            one_group(vec![cmp("req_id", CompareOp::Gt, QueryValue::Integer(100))])
920        );
921    }
922
923    #[test]
924    fn lt_operator_with_float() {
925        assert_eq!(
926            parse("duration < 1.5").unwrap(),
927            one_group(vec![cmp("duration", CompareOp::Lt, QueryValue::Float(1.5))])
928        );
929    }
930
931    #[test]
932    fn contains_operator_with_quoted_string() {
933        assert_eq!(
934            parse(r#"message contains "database timeout""#).unwrap(),
935            one_group(vec![Clause::Contains {
936                field: "message".into(),
937                value: "database timeout".into(),
938            }])
939        );
940    }
941
942    #[test]
943    fn contains_operator_with_bare_word() {
944        assert_eq!(
945            parse("message contains timeout").unwrap(),
946            one_group(vec![Clause::Contains {
947                field: "message".into(),
948                value: "timeout".into(),
949            }])
950        );
951    }
952
953    #[test]
954    fn contains_is_case_insensitive() {
955        assert_eq!(
956            parse("message CONTAINS boom").unwrap(),
957            one_group(vec![Clause::Contains {
958                field: "message".into(),
959                value: "boom".into(),
960            }])
961        );
962    }
963
964    #[test]
965    fn boolean_value() {
966        assert_eq!(
967            parse("ok=true").unwrap(),
968            one_group(vec![cmp("ok", CompareOp::Eq, QueryValue::Bool(true))])
969        );
970        assert_eq!(
971            parse("ok=FALSE").unwrap(),
972            one_group(vec![cmp("ok", CompareOp::Eq, QueryValue::Bool(false))])
973        );
974    }
975
976    #[test]
977    fn quoted_string_value_preserves_spaces() {
978        assert_eq!(
979            parse(r#"service="payments gateway""#).unwrap(),
980            one_group(vec![cmp(
981                "service",
982                CompareOp::Eq,
983                QueryValue::String("payments gateway".into())
984            )])
985        );
986    }
987
988    #[test]
989    fn dotted_field_name_for_nested_json() {
990        assert_eq!(
991            parse("user.id=42").unwrap(),
992            one_group(vec![cmp("user.id", CompareOp::Eq, QueryValue::Integer(42))])
993        );
994    }
995
996    // -----------------------------------------------------------------
997    // Time ranges (carried forward from v0.1)
998    // -----------------------------------------------------------------
999
1000    #[test]
1001    fn last_minutes() {
1002        assert_eq!(
1003            parse("last 30m").unwrap(),
1004            one_group(vec![Clause::LastDuration(Duration {
1005                amount: 30,
1006                unit: DurationUnit::Minutes
1007            })])
1008        );
1009    }
1010
1011    #[test]
1012    fn last_hours() {
1013        assert_eq!(
1014            parse("last 2h").unwrap(),
1015            one_group(vec![Clause::LastDuration(Duration {
1016                amount: 2,
1017                unit: DurationUnit::Hours
1018            })])
1019        );
1020    }
1021
1022    #[test]
1023    fn last_days() {
1024        assert_eq!(
1025            parse("last 7d").unwrap(),
1026            one_group(vec![Clause::LastDuration(Duration {
1027                amount: 7,
1028                unit: DurationUnit::Days
1029            })])
1030        );
1031    }
1032
1033    #[test]
1034    fn since_datetime_is_opaque_string() {
1035        assert_eq!(
1036            parse("since 2024-01-01").unwrap(),
1037            one_group(vec![Clause::SinceDatetime("2024-01-01".into())])
1038        );
1039    }
1040
1041    #[test]
1042    fn since_datetime_can_be_quoted() {
1043        assert_eq!(
1044            parse(r#"since "2024-01-01T10:00:00Z""#).unwrap(),
1045            one_group(vec![Clause::SinceDatetime("2024-01-01T10:00:00Z".into())])
1046        );
1047    }
1048
1049    #[test]
1050    fn since_datetime_bare_with_time_component_parses() {
1051        assert_eq!(
1052            parse("since 2024-01-01T10:00:00Z").unwrap(),
1053            one_group(vec![Clause::SinceDatetime("2024-01-01T10:00:00Z".into())])
1054        );
1055    }
1056
1057    #[test]
1058    fn since_datetime_bare_followed_by_and_clause() {
1059        assert_eq!(
1060            parse("since 2024-01-01 AND level=error").unwrap(),
1061            one_group(vec![
1062                Clause::SinceDatetime("2024-01-01".into()),
1063                cmp("level", CompareOp::Eq, QueryValue::String("error".into())),
1064            ])
1065        );
1066    }
1067
1068    // -----------------------------------------------------------------
1069    // AND chaining (carried forward from v0.1, AST shape updated)
1070    // -----------------------------------------------------------------
1071
1072    #[test]
1073    fn two_clauses_with_and() {
1074        assert_eq!(
1075            parse("level=error AND service=payments").unwrap(),
1076            one_group(vec![
1077                cmp("level", CompareOp::Eq, QueryValue::String("error".into())),
1078                cmp(
1079                    "service",
1080                    CompareOp::Eq,
1081                    QueryValue::String("payments".into())
1082                ),
1083            ])
1084        );
1085    }
1086
1087    #[test]
1088    fn and_is_case_insensitive() {
1089        assert_eq!(
1090            parse("level=error and service=payments").unwrap(),
1091            one_group(vec![
1092                cmp("level", CompareOp::Eq, QueryValue::String("error".into())),
1093                cmp(
1094                    "service",
1095                    CompareOp::Eq,
1096                    QueryValue::String("payments".into())
1097                ),
1098            ])
1099        );
1100    }
1101
1102    #[test]
1103    fn three_clauses_with_time_range() {
1104        assert_eq!(
1105            parse("tag=api AND level=error AND last 30m").unwrap(),
1106            one_group(vec![
1107                cmp("tag", CompareOp::Eq, QueryValue::String("api".into())),
1108                cmp("level", CompareOp::Eq, QueryValue::String("error".into())),
1109                Clause::LastDuration(Duration {
1110                    amount: 30,
1111                    unit: DurationUnit::Minutes
1112                }),
1113            ])
1114        );
1115    }
1116
1117    // -----------------------------------------------------------------
1118    // OR — new in v0.2.0
1119    // -----------------------------------------------------------------
1120
1121    #[test]
1122    fn single_or_two_groups() {
1123        // Most common shape: same field, two values.
1124        assert_eq!(
1125            parse("level=error OR level=warn").unwrap(),
1126            or_of(vec![
1127                vec![cmp(
1128                    "level",
1129                    CompareOp::Eq,
1130                    QueryValue::String("error".into())
1131                )],
1132                vec![cmp(
1133                    "level",
1134                    CompareOp::Eq,
1135                    QueryValue::String("warn".into())
1136                )],
1137            ])
1138        );
1139    }
1140
1141    #[test]
1142    fn or_is_case_insensitive() {
1143        let lowered = parse("level=error or level=warn").unwrap();
1144        let upper = parse("level=error OR level=warn").unwrap();
1145        let mixed = parse("level=error Or level=warn").unwrap();
1146        assert_eq!(lowered, upper);
1147        assert_eq!(lowered, mixed);
1148    }
1149
1150    #[test]
1151    fn three_or_groups() {
1152        assert_eq!(
1153            parse("level=error OR level=warn OR level=fatal").unwrap(),
1154            or_of(vec![
1155                vec![cmp(
1156                    "level",
1157                    CompareOp::Eq,
1158                    QueryValue::String("error".into())
1159                )],
1160                vec![cmp(
1161                    "level",
1162                    CompareOp::Eq,
1163                    QueryValue::String("warn".into())
1164                )],
1165                vec![cmp(
1166                    "level",
1167                    CompareOp::Eq,
1168                    QueryValue::String("fatal".into())
1169                )],
1170            ])
1171        );
1172    }
1173
1174    #[test]
1175    fn or_with_mixed_clause_types() {
1176        // Each side of OR can be any kind of clause: compare, contains,
1177        // or time range. They mix freely.
1178        assert_eq!(
1179            parse(r#"level=error OR message contains "timeout" OR last 30m"#).unwrap(),
1180            or_of(vec![
1181                vec![cmp(
1182                    "level",
1183                    CompareOp::Eq,
1184                    QueryValue::String("error".into())
1185                )],
1186                vec![Clause::Contains {
1187                    field: "message".into(),
1188                    value: "timeout".into(),
1189                }],
1190                vec![Clause::LastDuration(Duration {
1191                    amount: 30,
1192                    unit: DurationUnit::Minutes
1193                })],
1194            ])
1195        );
1196    }
1197
1198    #[test]
1199    fn and_binds_tighter_than_or() {
1200        // `a=1 AND b=2 OR c=3` parses as `(a=1 AND b=2) OR (c=3)`.
1201        assert_eq!(
1202            parse("a=1 AND b=2 OR c=3").unwrap(),
1203            or_of(vec![
1204                vec![
1205                    cmp("a", CompareOp::Eq, QueryValue::Integer(1)),
1206                    cmp("b", CompareOp::Eq, QueryValue::Integer(2)),
1207                ],
1208                vec![cmp("c", CompareOp::Eq, QueryValue::Integer(3))],
1209            ])
1210        );
1211    }
1212
1213    #[test]
1214    fn or_then_and_groups_correctly() {
1215        // `a=1 OR b=2 AND c=3` parses as `(a=1) OR (b=2 AND c=3)`.
1216        assert_eq!(
1217            parse("a=1 OR b=2 AND c=3").unwrap(),
1218            or_of(vec![
1219                vec![cmp("a", CompareOp::Eq, QueryValue::Integer(1))],
1220                vec![
1221                    cmp("b", CompareOp::Eq, QueryValue::Integer(2)),
1222                    cmp("c", CompareOp::Eq, QueryValue::Integer(3)),
1223                ],
1224            ])
1225        );
1226    }
1227
1228    #[test]
1229    fn or_with_and_on_both_sides() {
1230        // `a=1 AND b=2 OR c=3 AND d=4` →
1231        //   `(a=1 AND b=2) OR (c=3 AND d=4)`.
1232        assert_eq!(
1233            parse("a=1 AND b=2 OR c=3 AND d=4").unwrap(),
1234            or_of(vec![
1235                vec![
1236                    cmp("a", CompareOp::Eq, QueryValue::Integer(1)),
1237                    cmp("b", CompareOp::Eq, QueryValue::Integer(2)),
1238                ],
1239                vec![
1240                    cmp("c", CompareOp::Eq, QueryValue::Integer(3)),
1241                    cmp("d", CompareOp::Eq, QueryValue::Integer(4)),
1242                ],
1243            ])
1244        );
1245    }
1246
1247    #[test]
1248    fn or_combines_with_time_ranges() {
1249        // Common in practice: "errors in the last hour OR fatal ever".
1250        assert_eq!(
1251            parse("level=error AND last 1h OR level=fatal").unwrap(),
1252            or_of(vec![
1253                vec![
1254                    cmp("level", CompareOp::Eq, QueryValue::String("error".into())),
1255                    Clause::LastDuration(Duration {
1256                        amount: 1,
1257                        unit: DurationUnit::Hours
1258                    }),
1259                ],
1260                vec![cmp(
1261                    "level",
1262                    CompareOp::Eq,
1263                    QueryValue::String("fatal".into())
1264                )],
1265            ])
1266        );
1267    }
1268
1269    #[test]
1270    fn no_or_present_still_wraps_in_or_node() {
1271        // The "always wrap" invariant — even a single AND-group is
1272        // structurally `Or(vec![one_group])`.
1273        match parse("level=error").unwrap() {
1274            QueryNode::Or(groups) => {
1275                assert_eq!(groups.len(), 1);
1276                assert_eq!(groups[0].clauses.len(), 1);
1277            }
1278        }
1279    }
1280
1281    // -----------------------------------------------------------------
1282    // OR error cases
1283    // -----------------------------------------------------------------
1284
1285    #[test]
1286    fn trailing_or_is_an_error() {
1287        let err = parse("level=error OR").unwrap_err();
1288        assert!(err.message.contains("OR"));
1289        assert!(err.message.contains("clause"));
1290    }
1291
1292    #[test]
1293    fn leading_or_is_an_error() {
1294        let err = parse("OR level=error").unwrap_err();
1295        assert!(err.message.contains("OR"));
1296    }
1297
1298    #[test]
1299    fn double_or_is_an_error() {
1300        let err = parse("level=error OR OR level=warn").unwrap_err();
1301        assert!(err.message.contains("clause"));
1302    }
1303
1304    #[test]
1305    fn or_followed_by_and_is_an_error() {
1306        let err = parse("level=error OR AND level=warn").unwrap_err();
1307        assert!(err.message.contains("clause"));
1308    }
1309
1310    #[test]
1311    fn trailing_and_is_an_error() {
1312        let err = parse("level=error AND").unwrap_err();
1313        assert!(err.message.contains("AND"));
1314        assert!(err.message.contains("clause"));
1315    }
1316
1317    #[test]
1318    fn leading_and_is_an_error() {
1319        let err = parse("AND level=error").unwrap_err();
1320        assert!(err.message.contains("AND"));
1321    }
1322
1323    #[test]
1324    fn double_and_is_an_error() {
1325        let err = parse("level=error AND AND service=api").unwrap_err();
1326        assert!(err.message.contains("clause"));
1327    }
1328
1329    #[test]
1330    fn and_followed_by_or_is_an_error() {
1331        let err = parse("level=error AND OR level=warn").unwrap_err();
1332        assert!(err.message.contains("clause"));
1333    }
1334
1335    // -----------------------------------------------------------------
1336    // Error cases carried forward from v0.1
1337    // -----------------------------------------------------------------
1338
1339    #[test]
1340    fn empty_query_is_an_error() {
1341        let err = parse("").unwrap_err();
1342        assert_eq!(err.position, 0);
1343        assert!(err.message.contains("empty"));
1344    }
1345
1346    #[test]
1347    fn whitespace_only_query_is_an_error() {
1348        let err = parse("   ").unwrap_err();
1349        assert!(err.message.contains("empty"));
1350    }
1351
1352    #[test]
1353    fn missing_value_after_operator() {
1354        let err = parse("level=").unwrap_err();
1355        assert!(err.message.contains("value"));
1356    }
1357
1358    #[test]
1359    fn missing_operator_after_field() {
1360        let err = parse("level").unwrap_err();
1361        assert!(err.message.contains("operator"));
1362    }
1363
1364    #[test]
1365    fn unknown_duration_unit_names_the_unit() {
1366        let err = parse("last 5y").unwrap_err();
1367        assert!(err.message.contains("unit"));
1368        assert!(err.message.contains("\"y\""));
1369    }
1370
1371    #[test]
1372    fn fractional_duration_rejected() {
1373        let err = parse("last 1.5h").unwrap_err();
1374        assert!(err.message.contains("whole number"));
1375    }
1376
1377    #[test]
1378    fn bang_without_equals_is_actionable() {
1379        let err = parse("level!error").unwrap_err();
1380        assert!(err.message.contains("!="));
1381    }
1382
1383    #[test]
1384    fn unterminated_quoted_string_points_at_opening_quote() {
1385        let input = r#"service="oops"#;
1386        let err = parse(input).unwrap_err();
1387        assert_eq!(err.position, input.find('"').unwrap());
1388        assert!(err.message.contains("unterminated"));
1389    }
1390
1391    #[test]
1392    fn contains_with_number_is_rejected() {
1393        let err = parse("message contains 42").unwrap_err();
1394        assert!(err.message.contains("string"));
1395    }
1396
1397    #[test]
1398    fn invalid_field_name_starting_with_digit() {
1399        let err = parse("3foo=x").unwrap_err();
1400        assert!(err.message.contains("field"));
1401    }
1402
1403    #[test]
1404    fn missing_and_or_or_between_clauses_is_actionable() {
1405        let err = parse("level=error service=payments").unwrap_err();
1406        // Updated message: parser now suggests AND or OR.
1407        assert!(err.message.contains("AND") || err.message.contains("OR"));
1408    }
1409
1410    #[test]
1411    fn last_without_number() {
1412        let err = parse("last h").unwrap_err();
1413        assert!(err.message.contains("number"));
1414    }
1415
1416    #[test]
1417    fn last_without_unit() {
1418        let err = parse("last 30").unwrap_err();
1419        assert!(err.message.contains("unit"));
1420    }
1421
1422    // -----------------------------------------------------------------
1423    // validate_field_name direct tests
1424    // -----------------------------------------------------------------
1425
1426    #[test]
1427    fn validate_field_name_rejects_hyphen() {
1428        // The tokenizer allows `-` inside identifiers so bare-word values like
1429        // `x-request-id` tokenize correctly. validate_field_name enforces the
1430        // stricter field-name subset that disallows it.
1431        assert!(validate_field_name("service-v2", 0).is_err());
1432    }
1433
1434    #[test]
1435    fn validate_field_name_rejects_colon() {
1436        // `:` is allowed by the tokenizer (datetime literals), rejected here.
1437        assert!(validate_field_name("a:b", 0).is_err());
1438    }
1439
1440    #[test]
1441    fn validate_field_name_rejects_bang() {
1442        assert!(validate_field_name("field!", 0).is_err());
1443    }
1444
1445    #[test]
1446    fn validate_field_name_allows_dotted_field() {
1447        assert!(validate_field_name("user.id", 0).is_ok());
1448    }
1449
1450    #[test]
1451    fn validate_field_name_allows_leading_underscore() {
1452        assert!(validate_field_name("_private", 0).is_ok());
1453    }
1454
1455    // -----------------------------------------------------------------
1456    // Tokenizer edge cases (carried forward from v0.1)
1457    // -----------------------------------------------------------------
1458
1459    #[test]
1460    fn tokens_survive_around_operators_with_no_spaces() {
1461        assert_eq!(
1462            parse("level=error").unwrap(),
1463            parse("level = error").unwrap()
1464        );
1465        assert_eq!(parse("req_id!=5").unwrap(), parse("req_id != 5").unwrap());
1466    }
1467
1468    #[test]
1469    fn hyphenated_bare_word_value_parses() {
1470        assert_eq!(
1471            parse("request_id=x-request-1").unwrap(),
1472            one_group(vec![cmp(
1473                "request_id",
1474                CompareOp::Eq,
1475                QueryValue::String("x-request-1".into())
1476            )])
1477        );
1478    }
1479
1480    #[test]
1481    fn digit_led_value_with_hyphen_is_string_not_number() {
1482        assert_eq!(
1483            parse("version=1.2.3-beta").unwrap(),
1484            one_group(vec![cmp(
1485                "version",
1486                CompareOp::Eq,
1487                QueryValue::String("1.2.3-beta".into())
1488            )])
1489        );
1490    }
1491
1492    #[test]
1493    fn dotted_version_string_is_not_a_number() {
1494        assert_eq!(
1495            parse("version=1.2.3").unwrap(),
1496            one_group(vec![cmp(
1497                "version",
1498                CompareOp::Eq,
1499                QueryValue::String("1.2.3".into())
1500            )])
1501        );
1502    }
1503
1504    #[test]
1505    fn pure_digit_run_is_still_a_number() {
1506        match &parse("req_id=100").unwrap() {
1507            QueryNode::Or(groups) => {
1508                assert_eq!(groups.len(), 1);
1509                match &groups[0].clauses[0] {
1510                    Clause::Compare {
1511                        value: QueryValue::Integer(n),
1512                        ..
1513                    } => assert_eq!(*n, 100),
1514                    other => panic!("expected Integer value, got {other:?}"),
1515                }
1516            }
1517        }
1518    }
1519
1520    // -----------------------------------------------------------------
1521    // Parenthesized expressions — new in v0.3.0
1522    // -----------------------------------------------------------------
1523
1524    #[test]
1525    fn paren_single_clause_wraps_in_group() {
1526        // `(level=error)` — one clause inside parens
1527        let node = parse("(level=error)").unwrap();
1528        match &node {
1529            QueryNode::Or(groups) => {
1530                assert_eq!(groups.len(), 1);
1531                assert_eq!(groups[0].clauses.len(), 1);
1532                assert!(matches!(&groups[0].clauses[0], Clause::Group(_)));
1533            }
1534        }
1535    }
1536
1537    #[test]
1538    fn paren_or_inside_and_produces_single_and_group_with_group_clause() {
1539        // `(level=error OR level=warn) AND service=payments`
1540        // The paren subexpr becomes a Clause::Group inside one AND-group.
1541        let node = parse("(level=error OR level=warn) AND service=payments").unwrap();
1542        match &node {
1543            QueryNode::Or(groups) => {
1544                assert_eq!(groups.len(), 1, "outer OR has one AND-group");
1545                let clauses = &groups[0].clauses;
1546                assert_eq!(clauses.len(), 2, "AND-group: Group + Compare");
1547                assert!(matches!(&clauses[0], Clause::Group(_)));
1548                assert!(matches!(&clauses[1], Clause::Compare { .. }));
1549                // Inner group has two OR branches.
1550                if let Clause::Group(inner) = &clauses[0] {
1551                    match inner.as_ref() {
1552                        QueryNode::Or(inner_groups) => assert_eq!(inner_groups.len(), 2),
1553                    }
1554                }
1555            }
1556        }
1557    }
1558
1559    #[test]
1560    fn paren_on_right_side_of_or_wraps_and_group() {
1561        // `level=error OR (level=warn AND service=payments)`
1562        let node = parse("level=error OR (level=warn AND service=payments)").unwrap();
1563        match &node {
1564            QueryNode::Or(groups) => {
1565                assert_eq!(groups.len(), 2, "two OR branches");
1566                // First branch: plain Compare
1567                assert_eq!(groups[0].clauses.len(), 1);
1568                assert!(matches!(&groups[0].clauses[0], Clause::Compare { .. }));
1569                // Second branch: single Group clause containing an AND
1570                assert_eq!(groups[1].clauses.len(), 1);
1571                assert!(matches!(&groups[1].clauses[0], Clause::Group(_)));
1572                if let Clause::Group(inner) = &groups[1].clauses[0] {
1573                    match inner.as_ref() {
1574                        QueryNode::Or(inner_groups) => {
1575                            assert_eq!(inner_groups.len(), 1);
1576                            assert_eq!(inner_groups[0].clauses.len(), 2);
1577                        }
1578                    }
1579                }
1580            }
1581        }
1582    }
1583
1584    #[test]
1585    fn nested_parens_parse_correctly() {
1586        // `((level=error))` — double-nested, legal
1587        assert!(parse("((level=error))").is_ok());
1588    }
1589
1590    #[test]
1591    fn paren_with_time_range_inside() {
1592        // `(level=error AND last 1h) OR level=warn`
1593        assert!(parse("(level=error AND last 1h) OR level=warn").is_ok());
1594    }
1595
1596    #[test]
1597    fn paren_keywords_inside_are_case_insensitive() {
1598        assert!(parse("(level=error OR level=warn)").is_ok());
1599        assert!(parse("(level=error or level=warn)").is_ok());
1600        assert!(parse("(level=error Or level=warn)").is_ok());
1601    }
1602
1603    #[test]
1604    fn unmatched_close_paren_is_error() {
1605        let err = parse("level=error)").unwrap_err();
1606        assert!(
1607            err.message.contains(')') || err.message.contains("matching"),
1608            "message was: {}",
1609            err.message
1610        );
1611    }
1612
1613    #[test]
1614    fn unclosed_open_paren_is_error() {
1615        let err = parse("(level=error").unwrap_err();
1616        assert!(
1617            err.message.contains(')')
1618                || err.message.contains("close")
1619                || err.message.contains("unclosed"),
1620            "message was: {}",
1621            err.message
1622        );
1623    }
1624
1625    #[test]
1626    fn empty_parens_is_error() {
1627        let err = parse("()").unwrap_err();
1628        assert!(!err.message.is_empty());
1629    }
1630
1631    #[test]
1632    fn paren_after_and_parses() {
1633        // `level=error AND (service=payments OR service=api)`
1634        assert!(parse("level=error AND (service=payments OR service=api)").is_ok());
1635    }
1636
1637    #[test]
1638    fn multiple_paren_groups_joined_by_and() {
1639        // `(a=1 OR b=2) AND (c=3 OR d=4)`
1640        let node = parse("(a=1 OR b=2) AND (c=3 OR d=4)").unwrap();
1641        match &node {
1642            QueryNode::Or(groups) => {
1643                assert_eq!(groups.len(), 1);
1644                assert_eq!(groups[0].clauses.len(), 2);
1645                assert!(matches!(&groups[0].clauses[0], Clause::Group(_)));
1646                assert!(matches!(&groups[0].clauses[1], Clause::Group(_)));
1647            }
1648        }
1649    }
1650}