Skip to main content

logdive_core/
query.rs

1//! Query language: tokenizer, AST, and recursive descent parser.
2//!
3//! Implements the grammar from the project doc's "Notes → Query language
4//! grammar (v1)" section verbatim. AND-only per the 2026-04-19 decisions
5//! log entry — OR is deferred to v2.
6//!
7//! This module owns *only* the parse step: `&str → QueryNode`. Translating
8//! a `QueryNode` into SQL and binding parameters is milestone 4's executor.
9//! Resolving relative time ranges like `last 2h` against wall-clock time
10//! is also the executor's job; the AST just carries the raw spec.
11//!
12//! # Grammar (from the doc, reproduced for reference)
13//!
14//! ```text
15//! query     := clause (AND clause)*
16//! clause    := field OP value
17//!            | field CONTAINS string
18//!            | TIME_RANGE
19//! field     := [a-zA-Z_][a-zA-Z0-9_.]*
20//! OP        := "=" | "!=" | ">" | "<"
21//! value     := string | number | bool
22//! string    := '"' .* '"' | bare_word
23//! TIME_RANGE := "last" duration | "since" datetime
24//! duration  := number ("m" | "h" | "d")
25//! ```
26
27use std::fmt;
28
29// ---------------------------------------------------------------------------
30// AST
31// ---------------------------------------------------------------------------
32
33/// The top-level query: one or more clauses joined by AND.
34///
35/// A query with a single clause parses as `And(vec![clause])` so the
36/// executor has exactly one code path.
37#[derive(Debug, Clone, PartialEq)]
38pub enum QueryNode {
39    And(Vec<Clause>),
40}
41
42/// A single clause — the atomic unit a query is built from.
43#[derive(Debug, Clone, PartialEq)]
44pub enum Clause {
45    /// `field OP value` — e.g. `level = error`, `req_id > 100`.
46    Compare {
47        field: String,
48        op: CompareOp,
49        value: QueryValue,
50    },
51    /// `field CONTAINS string` — substring match on a string column.
52    Contains { field: String, value: String },
53    /// `last <N><unit>` — relative time range ending at query time.
54    LastDuration(Duration),
55    /// `since <datetime>` — absolute time range starting at the given moment.
56    /// The string is opaque at the parse layer; the executor uses chrono to
57    /// resolve it (which allows us to accept multiple formats without
58    /// teaching the grammar about any particular one).
59    SinceDatetime(String),
60}
61
62/// Comparison operator for `field OP value` clauses.
63#[derive(Debug, Clone, Copy, PartialEq, Eq)]
64pub enum CompareOp {
65    Eq,
66    NotEq,
67    Gt,
68    Lt,
69}
70
71impl fmt::Display for CompareOp {
72    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
73        f.write_str(match self {
74            CompareOp::Eq => "=",
75            CompareOp::NotEq => "!=",
76            CompareOp::Gt => ">",
77            CompareOp::Lt => "<",
78        })
79    }
80}
81
82/// A literal value appearing on the right-hand side of a comparison.
83///
84/// The type distinction matters because milestone 4's executor binds
85/// numbers and booleans with their native SQLite types so numeric
86/// comparison (`req_id > 100`) uses proper ordering rather than lexical.
87#[derive(Debug, Clone, PartialEq)]
88pub enum QueryValue {
89    String(String),
90    Integer(i64),
91    Float(f64),
92    Bool(bool),
93}
94
95/// A relative duration parsed from `last <N><unit>`.
96#[derive(Debug, Clone, Copy, PartialEq, Eq)]
97pub struct Duration {
98    pub amount: u64,
99    pub unit: DurationUnit,
100}
101
102#[derive(Debug, Clone, Copy, PartialEq, Eq)]
103pub enum DurationUnit {
104    Minutes,
105    Hours,
106    Days,
107}
108
109impl DurationUnit {
110    /// Total seconds for one unit. The executor multiplies by `amount` to
111    /// compute the cutoff timestamp against `now`.
112    pub fn seconds(self) -> i64 {
113        match self {
114            DurationUnit::Minutes => 60,
115            DurationUnit::Hours => 60 * 60,
116            DurationUnit::Days => 24 * 60 * 60,
117        }
118    }
119}
120
121// ---------------------------------------------------------------------------
122// Errors
123// ---------------------------------------------------------------------------
124
125/// Parse error with a byte offset into the original input.
126///
127/// Byte offsets (rather than line/column) are sufficient because queries
128/// are single-line. The CLI's milestone 7 pretty printer can slice the
129/// original input around `position` to render a caret.
130#[derive(Debug, Clone, PartialEq, Eq)]
131pub struct QueryParseError {
132    pub position: usize,
133    pub message: String,
134}
135
136impl fmt::Display for QueryParseError {
137    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
138        write!(
139            f,
140            "query parse error at position {}: {}",
141            self.position, self.message
142        )
143    }
144}
145
146impl std::error::Error for QueryParseError {}
147
148// ---------------------------------------------------------------------------
149// Tokens
150// ---------------------------------------------------------------------------
151
152#[derive(Debug, Clone, PartialEq)]
153enum Token {
154    /// A bare identifier — could be a field name, a bare-word value, or a
155    /// keyword depending on position. We resolve keywords at parse time
156    /// rather than at tokenization time because "last" used as a field name
157    /// (in the unlikely event a log has a field literally called "last")
158    /// should still work in `CONTAINS` contexts.
159    Ident(String),
160    /// A double-quoted string, with the quotes stripped.
161    QuotedString(String),
162    /// A literal number — stored as text so the parser can decide whether
163    /// it's an integer or float.
164    Number(String),
165    Eq,
166    NotEq,
167    Gt,
168    Lt,
169}
170
171#[derive(Debug, Clone)]
172struct SpannedToken {
173    token: Token,
174    position: usize,
175}
176
177// ---------------------------------------------------------------------------
178// Tokenizer
179// ---------------------------------------------------------------------------
180
181/// Return true if `b` is allowed *inside* an identifier (but not necessarily
182/// as the first byte). Matches the grammar's field rule plus the extra
183/// characters needed for bare-word values and datetime literals: `-` for
184/// hyphenated values like `x-request-id`, `:` for colon-separated values
185/// like time components, and `.` for both dotted field names and float-like
186/// version strings in values.
187fn is_ident_continuation(b: u8) -> bool {
188    b == b'_' || b == b'.' || b == b'-' || b == b':' || b.is_ascii_alphanumeric()
189}
190
191/// Split the input into a stream of tokens with byte-offset positions.
192///
193/// Whitespace is skipped. Unrecognized bytes produce a `QueryParseError`
194/// pointing at the offending character.
195fn tokenize(input: &str) -> Result<Vec<SpannedToken>, QueryParseError> {
196    let bytes = input.as_bytes();
197    let mut i = 0;
198    let mut out = Vec::new();
199
200    while i < bytes.len() {
201        let c = bytes[i];
202
203        // Whitespace.
204        if c.is_ascii_whitespace() {
205            i += 1;
206            continue;
207        }
208
209        // Operators — order matters: check `!=` before `!` would-be, and
210        // both before single `<`/`>`/`=`.
211        if c == b'!' {
212            if i + 1 < bytes.len() && bytes[i + 1] == b'=' {
213                out.push(SpannedToken {
214                    token: Token::NotEq,
215                    position: i,
216                });
217                i += 2;
218                continue;
219            }
220            return Err(QueryParseError {
221                position: i,
222                message: "unexpected '!' — did you mean '!='?".to_string(),
223            });
224        }
225        if c == b'=' {
226            out.push(SpannedToken {
227                token: Token::Eq,
228                position: i,
229            });
230            i += 1;
231            continue;
232        }
233        if c == b'>' {
234            out.push(SpannedToken {
235                token: Token::Gt,
236                position: i,
237            });
238            i += 1;
239            continue;
240        }
241        if c == b'<' {
242            out.push(SpannedToken {
243                token: Token::Lt,
244                position: i,
245            });
246            i += 1;
247            continue;
248        }
249
250        // Quoted string.
251        if c == b'"' {
252            let start = i;
253            i += 1; // consume opening quote
254            let content_start = i;
255            while i < bytes.len() && bytes[i] != b'"' {
256                // No escape handling in v1 — the grammar is `'"' .* '"'`
257                // and real log-query users don't embed quotes in values.
258                // If this becomes a pain we add escape handling in v2.
259                i += 1;
260            }
261            if i >= bytes.len() {
262                return Err(QueryParseError {
263                    position: start,
264                    message: "unterminated quoted string".to_string(),
265                });
266            }
267            let s = std::str::from_utf8(&bytes[content_start..i])
268                .expect("input is &str, slice is UTF-8")
269                .to_string();
270            i += 1; // consume closing quote
271            out.push(SpannedToken {
272                token: Token::QuotedString(s),
273                position: start,
274            });
275            continue;
276        }
277
278        // Digit-led token.
279        //
280        // Two possibilities:
281        //  - Pure-digit run (with optional fractional part) → Token::Number.
282        //    Example: `100`, `1.5`.
283        //  - Digit-led run that contains `-` or `:` → Token::Ident. This
284        //    supports bare datetime literals like `2024-01-01T10:00:00Z`
285        //    after `since`, per the 2026-04-22 decision to let bare dates
286        //    tokenize as identifiers. Colon is included for completeness
287        //    so time-of-day literals don't need quoting either.
288        //
289        // The disambiguation happens at the first non-digit, non-dot byte:
290        // if that byte is `-` or `:`, we promote the whole run (and keep
291        // consuming continuation bytes) to an Ident. Otherwise we stop at
292        // the end of the numeric run and emit a Number.
293        if c.is_ascii_digit() {
294            let start = i;
295            let mut saw_dot = false;
296
297            // First phase: consume digits and at most one dot (only when
298            // the dot is followed by a digit, preserving the existing
299            // `1.5` behaviour). We peek at the next byte after each dot
300            // to decide.
301            while i < bytes.len() && (bytes[i].is_ascii_digit() || (bytes[i] == b'.' && !saw_dot)) {
302                if bytes[i] == b'.' {
303                    if i + 1 >= bytes.len() || !bytes[i + 1].is_ascii_digit() {
304                        break;
305                    }
306                    saw_dot = true;
307                }
308                i += 1;
309            }
310
311            // Second phase: if the next byte indicates this digit-led run
312            // is actually an ident (datetime, dotted version string,
313            // alphanumeric suffix, etc.), keep consuming all ident-
314            // continuation bytes and emit an Ident.
315            //
316            // Promotion triggers:
317            //   `-` or `:` — datetime literals (`2024-01-01`, `10:30`)
318            //   `.`        — dotted strings beyond one fractional part
319            //                (`1.2.3`, which can't be a valid Number)
320            //   letter     — alphanumeric suffixes (`3beta`, `v1rc2`)
321            //
322            // Note: the first phase stops at a *second* dot because the
323            // `!saw_dot` guard fires, leaving `bytes[i]` on that second
324            // dot — hence `.` being a valid trigger here.
325            // Second phase: if the next byte indicates this digit-led run
326            // is actually an ident (datetime, multi-dot version string),
327            // keep consuming all ident-continuation bytes and emit Ident.
328            //
329            // Promotion triggers:
330            //   `-` or `:` — datetime literals (`2024-01-01`, `10:30`)
331            //   `.`        — dotted strings beyond one fractional part
332            //                (`1.2.3`); the first phase stops at the
333            //                second dot due to its `!saw_dot` guard,
334            //                leaving `bytes[i]` on that second dot.
335            //
336            // Letters are intentionally NOT a promotion trigger: `30m`
337            // must tokenize as Number("30") + Ident("m") so the parser's
338            // `last <N><unit>` rule works. Users who want digit-led
339            // values with letter suffixes (`3beta`) must quote them.
340            if i < bytes.len() && (bytes[i] == b'-' || bytes[i] == b':' || bytes[i] == b'.') {
341                while i < bytes.len() && is_ident_continuation(bytes[i]) {
342                    i += 1;
343                }
344                let s = std::str::from_utf8(&bytes[start..i])
345                    .expect("input is &str, slice is UTF-8")
346                    .to_string();
347                out.push(SpannedToken {
348                    token: Token::Ident(s),
349                    position: start,
350                });
351                continue;
352            }
353
354            let s = std::str::from_utf8(&bytes[start..i])
355                .expect("ascii digits are UTF-8")
356                .to_string();
357            out.push(SpannedToken {
358                token: Token::Number(s),
359                position: start,
360            });
361            continue;
362        }
363
364        // Identifier / bare word: starts with letter or underscore,
365        // continues per `is_ident_continuation`. Hyphen and colon are
366        // allowed inside so bare-word values like `x-request-id` and
367        // colon-separated fragments work; `validate_field_name` later
368        // enforces the stricter field-name subset.
369        if c == b'_' || c.is_ascii_alphabetic() {
370            let start = i;
371            while i < bytes.len() && is_ident_continuation(bytes[i]) {
372                i += 1;
373            }
374            let s = std::str::from_utf8(&bytes[start..i])
375                .expect("input is &str, slice is UTF-8")
376                .to_string();
377            out.push(SpannedToken {
378                token: Token::Ident(s),
379                position: start,
380            });
381            continue;
382        }
383
384        return Err(QueryParseError {
385            position: i,
386            message: format!("unexpected character {:?}", c as char),
387        });
388    }
389
390    Ok(out)
391}
392
393// ---------------------------------------------------------------------------
394// Parser
395// ---------------------------------------------------------------------------
396
397/// Parse a query string into a `QueryNode`.
398///
399/// This is the only public entry point. Implements the grammar from the
400/// project doc top-down via recursive descent, with AND chaining at the
401/// outermost level.
402pub fn parse(input: &str) -> Result<QueryNode, QueryParseError> {
403    let tokens = tokenize(input)?;
404    if tokens.is_empty() {
405        return Err(QueryParseError {
406            position: 0,
407            message: "empty query".to_string(),
408        });
409    }
410
411    let mut p = Parser {
412        tokens: &tokens,
413        cursor: 0,
414    };
415    let mut clauses = Vec::new();
416    clauses.push(p.parse_clause()?);
417
418    while let Some(tok) = p.peek() {
419        // AND is a keyword stored as an Ident. Case-insensitive.
420        match &tok.token {
421            Token::Ident(s) if s.eq_ignore_ascii_case("and") => {
422                p.advance();
423                clauses.push(p.parse_clause()?);
424            }
425            Token::Ident(s) if s.eq_ignore_ascii_case("or") => {
426                // Specific, actionable error per the doc's emphasis on good messages.
427                return Err(QueryParseError {
428                    position: tok.position,
429                    message: "OR is not supported in v1; only AND. See project doc decisions log."
430                        .to_string(),
431                });
432            }
433            _ => {
434                return Err(QueryParseError {
435                    position: tok.position,
436                    message: "expected 'AND' between clauses".to_string(),
437                });
438            }
439        }
440    }
441
442    Ok(QueryNode::And(clauses))
443}
444
445struct Parser<'a> {
446    tokens: &'a [SpannedToken],
447    cursor: usize,
448}
449
450impl<'a> Parser<'a> {
451    fn peek(&self) -> Option<&'a SpannedToken> {
452        self.tokens.get(self.cursor)
453    }
454
455    fn advance(&mut self) -> Option<&'a SpannedToken> {
456        let t = self.tokens.get(self.cursor);
457        if t.is_some() {
458            self.cursor += 1;
459        }
460        t
461    }
462
463    /// Position to attribute to an error when the tokens are exhausted.
464    fn end_position(&self) -> usize {
465        self.tokens
466            .last()
467            .map(|t| t.position + token_len(&t.token))
468            .unwrap_or(0)
469    }
470
471    fn parse_clause(&mut self) -> Result<Clause, QueryParseError> {
472        let tok = self.peek().ok_or_else(|| QueryParseError {
473            position: self.end_position(),
474            message: "expected a clause, got end of input".to_string(),
475        })?;
476
477        // Time-range clauses are keyword-led.
478        if let Token::Ident(s) = &tok.token {
479            if s.eq_ignore_ascii_case("last") {
480                self.advance();
481                return self.parse_last_duration();
482            }
483            if s.eq_ignore_ascii_case("since") {
484                self.advance();
485                return self.parse_since_datetime();
486            }
487        }
488
489        // Otherwise: field-led clause (compare or contains).
490        self.parse_field_led_clause()
491    }
492
493    fn parse_last_duration(&mut self) -> Result<Clause, QueryParseError> {
494        let num_tok = self.advance().ok_or_else(|| QueryParseError {
495            position: self.end_position(),
496            message: "expected a number after 'last'".to_string(),
497        })?;
498        let num_str = match &num_tok.token {
499            Token::Number(s) => s,
500            _ => {
501                return Err(QueryParseError {
502                    position: num_tok.position,
503                    message: "expected a number after 'last'".to_string(),
504                });
505            }
506        };
507        if num_str.contains('.') {
508            return Err(QueryParseError {
509                position: num_tok.position,
510                message: "duration amount must be a whole number".to_string(),
511            });
512        }
513        let amount: u64 = num_str.parse().map_err(|_| QueryParseError {
514            position: num_tok.position,
515            message: format!("invalid duration amount {num_str:?}"),
516        })?;
517
518        let unit_tok = self.advance().ok_or_else(|| QueryParseError {
519            position: self.end_position(),
520            message: "expected a duration unit ('m', 'h', or 'd') after the number".to_string(),
521        })?;
522        let unit_str = match &unit_tok.token {
523            Token::Ident(s) => s,
524            _ => {
525                return Err(QueryParseError {
526                    position: unit_tok.position,
527                    message: "expected a duration unit ('m', 'h', or 'd')".to_string(),
528                });
529            }
530        };
531        let unit = match unit_str.as_str() {
532            "m" => DurationUnit::Minutes,
533            "h" => DurationUnit::Hours,
534            "d" => DurationUnit::Days,
535            other => {
536                return Err(QueryParseError {
537                    position: unit_tok.position,
538                    message: format!("unknown duration unit {other:?}, expected 'm', 'h', or 'd'"),
539                });
540            }
541        };
542
543        Ok(Clause::LastDuration(Duration { amount, unit }))
544    }
545
546    fn parse_since_datetime(&mut self) -> Result<Clause, QueryParseError> {
547        let tok = self.advance().ok_or_else(|| QueryParseError {
548            position: self.end_position(),
549            message: "expected a datetime after 'since'".to_string(),
550        })?;
551        let dt = match &tok.token {
552            Token::QuotedString(s) => s.clone(),
553            Token::Ident(s) => s.clone(),
554            Token::Number(s) => s.clone(),
555            _ => {
556                return Err(QueryParseError {
557                    position: tok.position,
558                    message: "expected a datetime after 'since'".to_string(),
559                });
560            }
561        };
562        Ok(Clause::SinceDatetime(dt))
563    }
564
565    fn parse_field_led_clause(&mut self) -> Result<Clause, QueryParseError> {
566        let field_tok = self.advance().expect("caller peeked a token");
567        let field = match &field_tok.token {
568            Token::Ident(s) => s.clone(),
569            _ => {
570                return Err(QueryParseError {
571                    position: field_tok.position,
572                    message: "expected a field name".to_string(),
573                });
574            }
575        };
576        validate_field_name(&field, field_tok.position)?;
577
578        let op_tok = self.advance().ok_or_else(|| QueryParseError {
579            position: self.end_position(),
580            message: "expected an operator after the field name".to_string(),
581        })?;
582
583        // CONTAINS is a keyword stored as Ident.
584        if let Token::Ident(s) = &op_tok.token {
585            if s.eq_ignore_ascii_case("contains") {
586                let val_tok = self.advance().ok_or_else(|| QueryParseError {
587                    position: self.end_position(),
588                    message: "expected a string after 'contains'".to_string(),
589                })?;
590                let s = match &val_tok.token {
591                    Token::QuotedString(s) => s.clone(),
592                    Token::Ident(s) => s.clone(),
593                    _ => {
594                        return Err(QueryParseError {
595                            position: val_tok.position,
596                            message: "'contains' requires a string value".to_string(),
597                        });
598                    }
599                };
600                return Ok(Clause::Contains { field, value: s });
601            }
602        }
603
604        let op = match &op_tok.token {
605            Token::Eq => CompareOp::Eq,
606            Token::NotEq => CompareOp::NotEq,
607            Token::Gt => CompareOp::Gt,
608            Token::Lt => CompareOp::Lt,
609            _ => {
610                return Err(QueryParseError {
611                    position: op_tok.position,
612                    message: "expected one of =, !=, >, <, or 'contains'".to_string(),
613                });
614            }
615        };
616
617        let val_tok = self.advance().ok_or_else(|| QueryParseError {
618            position: self.end_position(),
619            message: "expected a value after the operator".to_string(),
620        })?;
621        let value = token_to_query_value(val_tok)?;
622
623        Ok(Clause::Compare { field, op, value })
624    }
625}
626
627/// Enforce the grammar's field regex: `[a-zA-Z_][a-zA-Z0-9_.]*`.
628///
629/// The tokenizer is more permissive (it allows `-` and `:` inside idents
630/// so that bare-word *values* like `x-request-id` and datetime literals
631/// tokenize cleanly). We re-validate here because a field name is a
632/// stricter subset.
633fn validate_field_name(s: &str, position: usize) -> Result<(), QueryParseError> {
634    let mut chars = s.chars();
635    let first = chars.next().ok_or_else(|| QueryParseError {
636        position,
637        message: "empty field name".to_string(),
638    })?;
639    if !(first.is_ascii_alphabetic() || first == '_') {
640        return Err(QueryParseError {
641            position,
642            message: format!("invalid field name {s:?}: must start with a letter or underscore"),
643        });
644    }
645    for c in chars {
646        if !(c.is_ascii_alphanumeric() || c == '_' || c == '.') {
647            return Err(QueryParseError {
648                position,
649                message: format!(
650                    "invalid field name {s:?}: only letters, digits, underscores, and dots are allowed"
651                ),
652            });
653        }
654    }
655    Ok(())
656}
657
658fn token_to_query_value(tok: &SpannedToken) -> Result<QueryValue, QueryParseError> {
659    match &tok.token {
660        Token::QuotedString(s) => Ok(QueryValue::String(s.clone())),
661        Token::Number(s) => {
662            if s.contains('.') {
663                let f: f64 = s.parse().map_err(|_| QueryParseError {
664                    position: tok.position,
665                    message: format!("invalid number {s:?}"),
666                })?;
667                Ok(QueryValue::Float(f))
668            } else {
669                let n: i64 = s.parse().map_err(|_| QueryParseError {
670                    position: tok.position,
671                    message: format!("invalid integer {s:?}"),
672                })?;
673                Ok(QueryValue::Integer(n))
674            }
675        }
676        Token::Ident(s) => {
677            // Booleans as bare words.
678            if s.eq_ignore_ascii_case("true") {
679                Ok(QueryValue::Bool(true))
680            } else if s.eq_ignore_ascii_case("false") {
681                Ok(QueryValue::Bool(false))
682            } else {
683                Ok(QueryValue::String(s.clone()))
684            }
685        }
686        _ => Err(QueryParseError {
687            position: tok.position,
688            message: "expected a value (string, number, or boolean)".to_string(),
689        }),
690    }
691}
692
693fn token_len(t: &Token) -> usize {
694    match t {
695        Token::Ident(s) | Token::Number(s) => s.len(),
696        Token::QuotedString(s) => s.len() + 2, // approximate, for error positioning only
697        Token::Eq | Token::Gt | Token::Lt => 1,
698        Token::NotEq => 2,
699    }
700}
701
702// ---------------------------------------------------------------------------
703// Tests
704// ---------------------------------------------------------------------------
705
706#[cfg(test)]
707mod tests {
708    use super::*;
709
710    fn and_of(clauses: Vec<Clause>) -> QueryNode {
711        QueryNode::And(clauses)
712    }
713
714    fn cmp(field: &str, op: CompareOp, value: QueryValue) -> Clause {
715        Clause::Compare {
716            field: field.to_string(),
717            op,
718            value,
719        }
720    }
721
722    // --- Each operator parses correctly ---
723
724    #[test]
725    fn eq_operator() {
726        assert_eq!(
727            parse("level=error").unwrap(),
728            and_of(vec![cmp(
729                "level",
730                CompareOp::Eq,
731                QueryValue::String("error".into())
732            )])
733        );
734    }
735
736    #[test]
737    fn not_eq_operator() {
738        assert_eq!(
739            parse("level!=info").unwrap(),
740            and_of(vec![cmp(
741                "level",
742                CompareOp::NotEq,
743                QueryValue::String("info".into())
744            )])
745        );
746    }
747
748    #[test]
749    fn gt_operator_with_integer() {
750        assert_eq!(
751            parse("req_id > 100").unwrap(),
752            and_of(vec![cmp("req_id", CompareOp::Gt, QueryValue::Integer(100))])
753        );
754    }
755
756    #[test]
757    fn lt_operator_with_float() {
758        assert_eq!(
759            parse("duration < 1.5").unwrap(),
760            and_of(vec![cmp("duration", CompareOp::Lt, QueryValue::Float(1.5))])
761        );
762    }
763
764    #[test]
765    fn contains_operator_with_quoted_string() {
766        assert_eq!(
767            parse(r#"message contains "database timeout""#).unwrap(),
768            and_of(vec![Clause::Contains {
769                field: "message".into(),
770                value: "database timeout".into(),
771            }])
772        );
773    }
774
775    #[test]
776    fn contains_operator_with_bare_word() {
777        assert_eq!(
778            parse("message contains timeout").unwrap(),
779            and_of(vec![Clause::Contains {
780                field: "message".into(),
781                value: "timeout".into(),
782            }])
783        );
784    }
785
786    #[test]
787    fn contains_is_case_insensitive() {
788        assert_eq!(
789            parse("message CONTAINS boom").unwrap(),
790            and_of(vec![Clause::Contains {
791                field: "message".into(),
792                value: "boom".into(),
793            }])
794        );
795    }
796
797    #[test]
798    fn boolean_value() {
799        assert_eq!(
800            parse("ok=true").unwrap(),
801            and_of(vec![cmp("ok", CompareOp::Eq, QueryValue::Bool(true))])
802        );
803        assert_eq!(
804            parse("ok=FALSE").unwrap(),
805            and_of(vec![cmp("ok", CompareOp::Eq, QueryValue::Bool(false))])
806        );
807    }
808
809    #[test]
810    fn quoted_string_value_preserves_spaces() {
811        assert_eq!(
812            parse(r#"service="payments gateway""#).unwrap(),
813            and_of(vec![cmp(
814                "service",
815                CompareOp::Eq,
816                QueryValue::String("payments gateway".into())
817            )])
818        );
819    }
820
821    #[test]
822    fn dotted_field_name_for_nested_json() {
823        assert_eq!(
824            parse("user.id=42").unwrap(),
825            and_of(vec![cmp("user.id", CompareOp::Eq, QueryValue::Integer(42))])
826        );
827    }
828
829    // --- Time ranges ---
830
831    #[test]
832    fn last_minutes() {
833        assert_eq!(
834            parse("last 30m").unwrap(),
835            and_of(vec![Clause::LastDuration(Duration {
836                amount: 30,
837                unit: DurationUnit::Minutes
838            })])
839        );
840    }
841
842    #[test]
843    fn last_hours() {
844        assert_eq!(
845            parse("last 2h").unwrap(),
846            and_of(vec![Clause::LastDuration(Duration {
847                amount: 2,
848                unit: DurationUnit::Hours
849            })])
850        );
851    }
852
853    #[test]
854    fn last_days() {
855        assert_eq!(
856            parse("last 7d").unwrap(),
857            and_of(vec![Clause::LastDuration(Duration {
858                amount: 7,
859                unit: DurationUnit::Days
860            })])
861        );
862    }
863
864    #[test]
865    fn since_datetime_is_opaque_string() {
866        assert_eq!(
867            parse("since 2024-01-01").unwrap(),
868            and_of(vec![Clause::SinceDatetime("2024-01-01".into())])
869        );
870    }
871
872    #[test]
873    fn since_datetime_can_be_quoted() {
874        assert_eq!(
875            parse(r#"since "2024-01-01T10:00:00Z""#).unwrap(),
876            and_of(vec![Clause::SinceDatetime("2024-01-01T10:00:00Z".into())])
877        );
878    }
879
880    #[test]
881    fn since_datetime_bare_with_time_component_parses() {
882        // Regression: digit-led tokens containing `-` or `:` must tokenize
883        // as Ident, not blow up mid-number.
884        assert_eq!(
885            parse("since 2024-01-01T10:00:00Z").unwrap(),
886            and_of(vec![Clause::SinceDatetime("2024-01-01T10:00:00Z".into())])
887        );
888    }
889
890    #[test]
891    fn since_datetime_bare_followed_by_and_clause() {
892        // The datetime must terminate at whitespace so the AND chain still works.
893        assert_eq!(
894            parse("since 2024-01-01 AND level=error").unwrap(),
895            and_of(vec![
896                Clause::SinceDatetime("2024-01-01".into()),
897                cmp("level", CompareOp::Eq, QueryValue::String("error".into())),
898            ])
899        );
900    }
901
902    // --- AND chaining ---
903
904    #[test]
905    fn two_clauses_with_and() {
906        assert_eq!(
907            parse("level=error AND service=payments").unwrap(),
908            and_of(vec![
909                cmp("level", CompareOp::Eq, QueryValue::String("error".into())),
910                cmp(
911                    "service",
912                    CompareOp::Eq,
913                    QueryValue::String("payments".into())
914                ),
915            ])
916        );
917    }
918
919    #[test]
920    fn and_is_case_insensitive() {
921        assert_eq!(
922            parse("level=error and service=payments").unwrap(),
923            and_of(vec![
924                cmp("level", CompareOp::Eq, QueryValue::String("error".into())),
925                cmp(
926                    "service",
927                    CompareOp::Eq,
928                    QueryValue::String("payments".into())
929                ),
930            ])
931        );
932    }
933
934    #[test]
935    fn three_clauses_with_time_range() {
936        assert_eq!(
937            parse("tag=api AND level=error AND last 30m").unwrap(),
938            and_of(vec![
939                cmp("tag", CompareOp::Eq, QueryValue::String("api".into())),
940                cmp("level", CompareOp::Eq, QueryValue::String("error".into())),
941                Clause::LastDuration(Duration {
942                    amount: 30,
943                    unit: DurationUnit::Minutes
944                }),
945            ])
946        );
947    }
948
949    // --- Error cases: invalid input produces descriptive messages ---
950
951    #[test]
952    fn empty_query_is_an_error() {
953        let err = parse("").unwrap_err();
954        assert_eq!(err.position, 0);
955        assert!(err.message.contains("empty"));
956    }
957
958    #[test]
959    fn whitespace_only_query_is_an_error() {
960        let err = parse("   ").unwrap_err();
961        assert!(err.message.contains("empty"));
962    }
963
964    #[test]
965    fn missing_value_after_operator() {
966        let err = parse("level=").unwrap_err();
967        assert!(err.message.contains("value"));
968    }
969
970    #[test]
971    fn missing_operator_after_field() {
972        let err = parse("level").unwrap_err();
973        assert!(err.message.contains("operator"));
974    }
975
976    #[test]
977    fn unknown_duration_unit_names_the_unit() {
978        let err = parse("last 5y").unwrap_err();
979        assert!(err.message.contains("unit"));
980        assert!(err.message.contains("\"y\""));
981    }
982
983    #[test]
984    fn fractional_duration_rejected() {
985        let err = parse("last 1.5h").unwrap_err();
986        assert!(err.message.contains("whole number"));
987    }
988
989    #[test]
990    fn or_operator_suggests_v2_deferral() {
991        let err = parse("level=error OR level=warn").unwrap_err();
992        assert!(err.message.contains("OR"));
993        assert!(err.message.contains("AND"));
994    }
995
996    #[test]
997    fn bang_without_equals_is_actionable() {
998        let err = parse("level!error").unwrap_err();
999        assert!(err.message.contains("!="));
1000    }
1001
1002    #[test]
1003    fn unterminated_quoted_string_points_at_opening_quote() {
1004        let input = r#"service="oops"#;
1005        let err = parse(input).unwrap_err();
1006        assert_eq!(err.position, input.find('"').unwrap());
1007        assert!(err.message.contains("unterminated"));
1008    }
1009
1010    #[test]
1011    fn contains_with_number_is_rejected() {
1012        // The grammar says `field CONTAINS string` — a bare number is not a string.
1013        let err = parse("message contains 42").unwrap_err();
1014        assert!(err.message.contains("string"));
1015    }
1016
1017    #[test]
1018    fn invalid_field_name_starting_with_digit() {
1019        // The tokenizer turns `3foo` into a Number followed by an Ident,
1020        // so the parser sees a number in field position and complains.
1021        let err = parse("3foo=x").unwrap_err();
1022        assert!(err.message.contains("field"));
1023    }
1024
1025    #[test]
1026    fn missing_and_between_clauses_is_actionable() {
1027        let err = parse("level=error service=payments").unwrap_err();
1028        assert!(err.message.contains("AND"));
1029    }
1030
1031    #[test]
1032    fn last_without_number() {
1033        let err = parse("last h").unwrap_err();
1034        assert!(err.message.contains("number"));
1035    }
1036
1037    #[test]
1038    fn last_without_unit() {
1039        let err = parse("last 30").unwrap_err();
1040        assert!(err.message.contains("unit"));
1041    }
1042
1043    // --- Sanity checks on tokenizer edge cases ---
1044
1045    #[test]
1046    fn tokens_survive_around_operators_with_no_spaces() {
1047        assert_eq!(
1048            parse("level=error").unwrap(),
1049            parse("level = error").unwrap()
1050        );
1051        assert_eq!(parse("req_id!=5").unwrap(), parse("req_id != 5").unwrap());
1052    }
1053
1054    #[test]
1055    fn hyphenated_bare_word_value_parses() {
1056        assert_eq!(
1057            parse("request_id=x-request-1").unwrap(),
1058            and_of(vec![cmp(
1059                "request_id",
1060                CompareOp::Eq,
1061                QueryValue::String("x-request-1".into())
1062            )])
1063        );
1064    }
1065
1066    #[test]
1067    fn digit_led_value_with_hyphen_is_string_not_number() {
1068        // `version=1.2.3-beta` — regression guard: this should be a string
1069        // value, not a parse error from trying to be a number.
1070        assert_eq!(
1071            parse("version=1.2.3-beta").unwrap(),
1072            and_of(vec![cmp(
1073                "version",
1074                CompareOp::Eq,
1075                QueryValue::String("1.2.3-beta".into())
1076            )])
1077        );
1078    }
1079
1080    #[test]
1081    fn dotted_version_string_is_not_a_number() {
1082        // `version=1.2.3` — more than one dot means it can't be a float.
1083        // Must tokenize as a single Ident/String, not a Number followed
1084        // by an unexpected `.`.
1085        assert_eq!(
1086            parse("version=1.2.3").unwrap(),
1087            and_of(vec![cmp(
1088                "version",
1089                CompareOp::Eq,
1090                QueryValue::String("1.2.3".into())
1091            )])
1092        );
1093    }
1094
1095    #[test]
1096    fn pure_digit_run_is_still_a_number() {
1097        // Belt-and-braces: the digit-promotion logic must not accidentally
1098        // turn `100` into an Ident.
1099        match &parse("req_id=100").unwrap() {
1100            QueryNode::And(clauses) => match &clauses[0] {
1101                Clause::Compare {
1102                    value: QueryValue::Integer(n),
1103                    ..
1104                } => assert_eq!(*n, 100),
1105                other => panic!("expected Integer value, got {other:?}"),
1106            },
1107        }
1108    }
1109}