Skip to main content

cqlite_core/query/
select_parser.rs

1//! Advanced CQL SELECT Parser
2//!
3//! This module implements the FIRST EVER CQL SELECT parser for direct SSTable access.
4//! It provides comprehensive parsing for complex SELECT statements including:
5//! - Advanced WHERE clauses with all operators
6//! - Aggregation functions and GROUP BY
7//! - ORDER BY and LIMIT clauses
8//! - Collection operations
9//! - Subqueries and JOINs (future)
10
11// CQL (Cassandra Query Language) Reference:
12// https://cassandra.apache.org/doc/latest/cassandra/developing/cql/cql_singlefile.html
13//
14// This implements CQL v3.4.3+ for Apache Cassandra 5.0+
15// CQL is NOT SQL - it's a query language specifically designed for Cassandra's distributed architecture.
16
17use super::select_ast::*;
18use crate::{Error, Result, TableId, Value};
19
20/// Advanced CQL SELECT parser
21#[derive(Debug)]
22pub struct SelectParser {
23    /// Current token being parsed (always `Some` after construction; `Token::Eof` marks end)
24    current_token: Option<Token>,
25    /// Tokenizer for the input
26    tokenizer: Tokenizer,
27}
28
29/// Token types for CQL parsing
30#[derive(Debug, Clone, PartialEq)]
31pub enum Token {
32    // Keywords
33    Select,
34    Distinct,
35    From,
36    Where,
37    GroupBy,
38    Having,
39    OrderBy,
40    Limit,
41    Offset,
42    And,
43    Or,
44    Not,
45    Like,
46    In,
47    Between,
48    As,
49    Asc,
50    Desc,
51    Allow,
52    Filtering,
53    Count,
54    Sum,
55    Avg,
56    Min,
57    Max,
58    // JOIN operations are NOT supported in Cassandra CQL
59    Is,
60    Null,
61    Contains,
62    Key,
63
64    // Operators
65    Equal,            // =
66    NotEqual,         // != or <>
67    LessThan,         // <
68    LessThanEqual,    // <=
69    GreaterThan,      // >
70    GreaterThanEqual, // >=
71    Plus,             // +
72    Minus,            // -
73    Multiply,         // *
74    Divide,           // /
75    Modulo,           // %
76
77    // Literals
78    Integer(i64),
79    Float(f64),
80    String(String),
81    Boolean(bool),
82
83    // Identifiers
84    Identifier(String),
85
86    // Punctuation
87    LeftParen,    // (
88    RightParen,   // )
89    LeftBracket,  // [
90    RightBracket, // ]
91    LeftBrace,    // {
92    RightBrace,   // }
93    Comma,        // ,
94    Semicolon,    // ;
95    Dot,          // .
96    Question,     // ? (for parameters)
97
98    // Special
99    Eof,
100    Newline,
101    Whitespace,
102}
103
104/// Map an already-read identifier to its keyword token, or `None` for a plain identifier.
105///
106/// Uses ASCII-case-insensitive comparison so we never have to allocate an
107/// uppercase copy of the source text.
108fn keyword_for(ident: &str) -> Option<Token> {
109    // Sorted roughly by expected frequency / first-letter group for readability.
110    const KEYWORDS: &[(&str, Token)] = &[
111        ("SELECT", Token::Select),
112        ("DISTINCT", Token::Distinct),
113        ("FROM", Token::From),
114        ("WHERE", Token::Where),
115        ("HAVING", Token::Having),
116        ("LIMIT", Token::Limit),
117        ("OFFSET", Token::Offset),
118        ("AND", Token::And),
119        ("OR", Token::Or),
120        ("NOT", Token::Not),
121        ("LIKE", Token::Like),
122        ("IN", Token::In),
123        ("BETWEEN", Token::Between),
124        ("AS", Token::As),
125        ("ASC", Token::Asc),
126        ("DESC", Token::Desc),
127        ("ALLOW", Token::Allow),
128        ("FILTERING", Token::Filtering),
129        ("COUNT", Token::Count),
130        ("SUM", Token::Sum),
131        ("AVG", Token::Avg),
132        ("MIN", Token::Min),
133        ("MAX", Token::Max),
134        ("IS", Token::Is),
135        ("NULL", Token::Null),
136        ("CONTAINS", Token::Contains),
137        ("KEY", Token::Key),
138        ("TRUE", Token::Boolean(true)),
139        ("FALSE", Token::Boolean(false)),
140    ];
141
142    KEYWORDS
143        .iter()
144        .find(|(kw, _)| ident.eq_ignore_ascii_case(kw))
145        .map(|(_, tok)| tok.clone())
146}
147
148/// Map an aggregate keyword token to its AST type, if any.
149fn aggregate_for(token: &Token) -> Option<AggregateType> {
150    match token {
151        Token::Count => Some(AggregateType::Count),
152        Token::Sum => Some(AggregateType::Sum),
153        Token::Avg => Some(AggregateType::Avg),
154        Token::Min => Some(AggregateType::Min),
155        Token::Max => Some(AggregateType::Max),
156        _ => None,
157    }
158}
159
160/// Simple tokenizer for CQL
161#[derive(Debug)]
162pub struct Tokenizer {
163    input: Vec<char>,
164    position: usize,
165    current_char: Option<char>,
166}
167
168impl Tokenizer {
169    pub fn new(input: &str) -> Self {
170        let chars: Vec<char> = input.chars().collect();
171        let current_char = chars.first().copied();
172
173        Self {
174            input: chars,
175            position: 0,
176            current_char,
177        }
178    }
179
180    fn advance(&mut self) {
181        self.position += 1;
182        self.current_char = self.input.get(self.position).copied();
183    }
184
185    fn peek(&self) -> Option<char> {
186        self.input.get(self.position + 1).copied()
187    }
188
189    fn skip_whitespace(&mut self) {
190        while let Some(ch) = self.current_char {
191            if ch.is_whitespace() {
192                self.advance();
193            } else {
194                break;
195            }
196        }
197    }
198
199    fn read_string(&mut self, quote_char: char) -> Result<String> {
200        let mut value = String::new();
201        self.advance(); // Skip opening quote
202
203        while let Some(ch) = self.current_char {
204            if ch == quote_char {
205                self.advance(); // Skip closing quote
206                return Ok(value);
207            } else if ch == '\\' {
208                self.advance();
209                if let Some(escaped) = self.current_char {
210                    let mapped = match escaped {
211                        'n' => Some('\n'),
212                        't' => Some('\t'),
213                        'r' => Some('\r'),
214                        '\\' => Some('\\'),
215                        '\'' => Some('\''),
216                        '"' => Some('"'),
217                        _ => None,
218                    };
219                    match mapped {
220                        Some(c) => value.push(c),
221                        None => {
222                            value.push('\\');
223                            value.push(escaped);
224                        }
225                    }
226                    self.advance();
227                }
228            } else {
229                value.push(ch);
230                self.advance();
231            }
232        }
233
234        Err(Error::cql_parse("Unterminated string literal"))
235    }
236
237    fn read_number(&mut self) -> Result<Token> {
238        let mut value = String::new();
239        let mut has_dot = false;
240
241        while let Some(ch) = self.current_char {
242            if ch.is_ascii_digit() {
243                value.push(ch);
244                self.advance();
245            } else if ch == '.' && !has_dot {
246                has_dot = true;
247                value.push(ch);
248                self.advance();
249            } else {
250                break;
251            }
252        }
253
254        if has_dot {
255            value
256                .parse::<f64>()
257                .map(Token::Float)
258                .map_err(|_| Error::cql_parse(format!("Invalid float: {}", value)))
259        } else {
260            value
261                .parse::<i64>()
262                .map(Token::Integer)
263                .map_err(|_| Error::cql_parse(format!("Invalid integer: {}", value)))
264        }
265    }
266
267    fn read_identifier(&mut self) -> String {
268        let mut value = String::new();
269
270        while let Some(ch) = self.current_char {
271            if ch.is_alphanumeric() || ch == '_' {
272                value.push(ch);
273                self.advance();
274            } else {
275                break;
276            }
277        }
278
279        value
280    }
281
282    /// Consume the literal keyword `BY` (case-insensitive) following GROUP/ORDER.
283    fn expect_by_keyword(&mut self, after: &str) -> Result<()> {
284        self.skip_whitespace();
285        let next = self.read_identifier();
286        if next.eq_ignore_ascii_case("BY") {
287            Ok(())
288        } else {
289            Err(Error::cql_parse(format!("Expected BY after {}", after)))
290        }
291    }
292
293    pub fn next_token(&mut self) -> Result<Token> {
294        loop {
295            let ch = match self.current_char {
296                None => return Ok(Token::Eof),
297                Some(c) => c,
298            };
299
300            // Single-character punctuation / operators that don't need lookahead.
301            let single = match ch {
302                '(' => Some(Token::LeftParen),
303                ')' => Some(Token::RightParen),
304                '[' => Some(Token::LeftBracket),
305                ']' => Some(Token::RightBracket),
306                '{' => Some(Token::LeftBrace),
307                '}' => Some(Token::RightBrace),
308                ',' => Some(Token::Comma),
309                ';' => Some(Token::Semicolon),
310                '.' => Some(Token::Dot),
311                '?' => Some(Token::Question),
312                '+' => Some(Token::Plus),
313                '-' => Some(Token::Minus),
314                '*' => Some(Token::Multiply),
315                '/' => Some(Token::Divide),
316                '%' => Some(Token::Modulo),
317                '=' => Some(Token::Equal),
318                _ => None,
319            };
320            if let Some(tok) = single {
321                self.advance();
322                return Ok(tok);
323            }
324
325            match ch {
326                c if c.is_whitespace() => self.skip_whitespace(),
327                '!' => {
328                    if self.peek() == Some('=') {
329                        self.advance();
330                        self.advance();
331                        return Ok(Token::NotEqual);
332                    }
333                    return Err(Error::cql_parse("Unexpected character: !"));
334                }
335                '<' => {
336                    return Ok(match self.peek() {
337                        Some('=') => {
338                            self.advance();
339                            self.advance();
340                            Token::LessThanEqual
341                        }
342                        Some('>') => {
343                            self.advance();
344                            self.advance();
345                            Token::NotEqual
346                        }
347                        _ => {
348                            self.advance();
349                            Token::LessThan
350                        }
351                    });
352                }
353                '>' => {
354                    return Ok(if self.peek() == Some('=') {
355                        self.advance();
356                        self.advance();
357                        Token::GreaterThanEqual
358                    } else {
359                        self.advance();
360                        Token::GreaterThan
361                    });
362                }
363                '\'' | '"' => return self.read_string(ch).map(Token::String),
364                c if c.is_ascii_digit() => return self.read_number(),
365                c if c.is_alphabetic() || c == '_' => {
366                    let identifier = self.read_identifier();
367                    // GROUP BY / ORDER BY are two-word keywords; resolve here so
368                    // the parser only ever sees a single GroupBy / OrderBy token.
369                    if identifier.eq_ignore_ascii_case("GROUP") {
370                        self.expect_by_keyword("GROUP")?;
371                        return Ok(Token::GroupBy);
372                    }
373                    if identifier.eq_ignore_ascii_case("ORDER") {
374                        self.expect_by_keyword("ORDER")?;
375                        return Ok(Token::OrderBy);
376                    }
377                    return Ok(keyword_for(&identifier).unwrap_or(Token::Identifier(identifier)));
378                }
379                other => return Err(Error::cql_parse(format!("Unexpected character: {}", other))),
380            }
381        }
382    }
383}
384
385impl SelectParser {
386    /// Create a new SELECT parser
387    pub fn new(cql: &str) -> Result<Self> {
388        let mut tokenizer = Tokenizer::new(cql);
389        let current_token = Some(tokenizer.next_token()?);
390        Ok(Self {
391            current_token,
392            tokenizer,
393        })
394    }
395
396    /// Advance to the next token
397    fn advance(&mut self) -> Result<()> {
398        self.current_token = Some(self.tokenizer.next_token()?);
399        Ok(())
400    }
401
402    /// Borrow the current token. Returns `&Token::Eof` if for some reason the
403    /// stream is exhausted (in practice `current_token` is always `Some`).
404    fn peek(&self) -> &Token {
405        self.current_token.as_ref().unwrap_or(&Token::Eof)
406    }
407
408    /// True if the current token equals `tok` (by discriminant, not payload).
409    fn at(&self, tok: &Token) -> bool {
410        self.current_token
411            .as_ref()
412            .is_some_and(|cur| std::mem::discriminant(cur) == std::mem::discriminant(tok))
413    }
414
415    /// Consume the current token if it matches `tok` (by discriminant); return whether it did.
416    fn eat(&mut self, tok: &Token) -> Result<bool> {
417        if self.at(tok) {
418            self.advance()?;
419            Ok(true)
420        } else {
421            Ok(false)
422        }
423    }
424
425    /// Check if current token matches expected token
426    fn expect(&mut self, expected: Token) -> Result<()> {
427        if let Some(ref current) = self.current_token {
428            if std::mem::discriminant(current) == std::mem::discriminant(&expected) {
429                self.advance()?;
430                Ok(())
431            } else {
432                Err(Error::cql_parse(format!(
433                    "Expected {:?}, found {:?}",
434                    expected, current
435                )))
436            }
437        } else {
438            Err(Error::cql_parse("Unexpected end of input"))
439        }
440    }
441
442    /// Consume an integer literal token (used by LIMIT and OFFSET parsers).
443    fn expect_integer(&mut self, context: &str) -> Result<i64> {
444        if let Some(Token::Integer(n)) = self.current_token {
445            self.advance()?;
446            Ok(n)
447        } else {
448            Err(Error::cql_parse(format!(
449                "Expected integer after {}",
450                context
451            )))
452        }
453    }
454
455    /// Parse `name` or `name . column` into a [`ColumnRef`], assuming the
456    /// current token is the leading identifier.
457    fn parse_column_ref(&mut self, table_or_column: String) -> Result<ColumnRef> {
458        // Caller has already consumed the leading identifier.
459        if !self.eat(&Token::Dot)? {
460            return Ok(ColumnRef::new(table_or_column));
461        }
462        if let Some(Token::Identifier(column)) = self.current_token.clone() {
463            self.advance()?;
464            Ok(ColumnRef::qualified(table_or_column, column))
465        } else {
466            Err(Error::cql_parse(
467                "Expected column name after table qualifier",
468            ))
469        }
470    }
471
472    /// Parse a complete SELECT statement
473    pub fn parse_select_statement(&mut self) -> Result<SelectStatement> {
474        self.expect(Token::Select)?;
475        let select_clause = self.parse_select_clause()?;
476
477        let from_clause = if self.eat(&Token::From)? {
478            Some(self.parse_from_clause()?)
479        } else {
480            None
481        };
482
483        let where_clause = if self.eat(&Token::Where)? {
484            Some(self.parse_where_expression()?)
485        } else {
486            None
487        };
488
489        let group_by = if self.eat(&Token::GroupBy)? {
490            Some(self.parse_group_by_clause()?)
491        } else {
492            None
493        };
494
495        let having_clause = if self.eat(&Token::Having)? {
496            Some(self.parse_where_expression()?)
497        } else {
498            None
499        };
500
501        let order_by = if self.eat(&Token::OrderBy)? {
502            Some(self.parse_order_by_clause()?)
503        } else {
504            None
505        };
506
507        let limit = if self.eat(&Token::Limit)? {
508            Some(self.parse_limit_clause()?)
509        } else {
510            None
511        };
512
513        let offset = if self.eat(&Token::Offset)? {
514            Some(self.expect_integer("OFFSET")? as u64)
515        } else {
516            None
517        };
518
519        let allow_filtering = if self.eat(&Token::Allow)? {
520            self.expect(Token::Filtering)?;
521            true
522        } else {
523            false
524        };
525
526        Ok(SelectStatement {
527            select_clause,
528            from_clause,
529            where_clause,
530            group_by,
531            having_clause,
532            order_by,
533            limit,
534            offset,
535            allow_filtering,
536        })
537    }
538
539    /// Parse SELECT clause
540    fn parse_select_clause(&mut self) -> Result<SelectClause> {
541        let distinct = self.eat(&Token::Distinct)?;
542
543        if self.eat(&Token::Multiply)? {
544            return Ok(SelectClause::All);
545        }
546
547        let mut expressions = Vec::new();
548        loop {
549            expressions.push(self.parse_select_expression()?);
550            if !self.eat(&Token::Comma)? {
551                break;
552            }
553        }
554
555        if distinct {
556            Ok(SelectClause::Distinct(expressions))
557        } else {
558            Ok(SelectClause::Columns(expressions))
559        }
560    }
561
562    /// Parse a single SELECT expression
563    fn parse_select_expression(&mut self) -> Result<SelectExpression> {
564        let expr = self.parse_primary_expression()?;
565
566        // Check for AS alias
567        if self.eat(&Token::As)? {
568            if let Some(Token::Identifier(alias)) = self.current_token.clone() {
569                self.advance()?;
570                return Ok(SelectExpression::Aliased(Box::new(expr), alias));
571            }
572            return Err(Error::cql_parse("Expected alias name after AS"));
573        }
574
575        Ok(expr)
576    }
577
578    /// Parse primary expression (column, function, literal, etc.)
579    fn parse_primary_expression(&mut self) -> Result<SelectExpression> {
580        if let Some(agg) = aggregate_for(self.peek()) {
581            self.advance()?;
582            return self.parse_aggregate_function(agg);
583        }
584
585        // Take ownership/copy of literal payloads up front so we can call &mut self.
586        match self.current_token.clone() {
587            Some(Token::Identifier(name)) => {
588                self.advance()?;
589
590                // Function call: identifier ( args )
591                if self.eat(&Token::LeftParen)? {
592                    let mut args = Vec::new();
593                    if !self.at(&Token::RightParen) {
594                        loop {
595                            args.push(self.parse_select_expression()?);
596                            if !self.eat(&Token::Comma)? {
597                                break;
598                            }
599                        }
600                    }
601                    self.expect(Token::RightParen)?;
602                    return Ok(SelectExpression::Function(FunctionCall { name, args }));
603                }
604
605                // Either bare column or qualified table.column.
606                let col = self.parse_column_ref(name)?;
607                Ok(SelectExpression::Column(col))
608            }
609            Some(Token::Integer(n)) => {
610                self.advance()?;
611                Ok(SelectExpression::Literal(Value::BigInt(n)))
612            }
613            Some(Token::Float(f)) => {
614                self.advance()?;
615                Ok(SelectExpression::Literal(Value::Float(f)))
616            }
617            Some(Token::String(s)) => {
618                self.advance()?;
619                Ok(SelectExpression::Literal(Value::Text(s)))
620            }
621            Some(Token::Boolean(b)) => {
622                self.advance()?;
623                Ok(SelectExpression::Literal(Value::Boolean(b)))
624            }
625            Some(Token::Null) => {
626                self.advance()?;
627                Ok(SelectExpression::Literal(Value::Null))
628            }
629            Some(Token::LeftParen) => {
630                self.advance()?;
631                let expr = self.parse_select_expression()?;
632                self.expect(Token::RightParen)?;
633                Ok(expr)
634            }
635            other => Err(Error::cql_parse(format!(
636                "Unexpected token in expression: {:?}",
637                other
638            ))),
639        }
640    }
641
642    /// Parse aggregate function
643    fn parse_aggregate_function(&mut self, agg_type: AggregateType) -> Result<SelectExpression> {
644        self.expect(Token::LeftParen)?;
645
646        let distinct = self.eat(&Token::Distinct)?;
647        let mut args = Vec::new();
648
649        if !self.at(&Token::RightParen) {
650            // COUNT(*) is the only place `*` is valid as an aggregate arg; treat it as a wildcard column.
651            if self.eat(&Token::Multiply)? {
652                args.push(SelectExpression::Column(ColumnRef::new("*".to_string())));
653            } else {
654                loop {
655                    args.push(self.parse_select_expression()?);
656                    if !self.eat(&Token::Comma)? {
657                        break;
658                    }
659                }
660            }
661        }
662
663        self.expect(Token::RightParen)?;
664
665        Ok(SelectExpression::Aggregate(AggregateFunction {
666            function: agg_type,
667            args,
668            distinct,
669        }))
670    }
671
672    /// Parse FROM clause
673    fn parse_from_clause(&mut self) -> Result<FromClause> {
674        // Cassandra CQL only supports single table queries - NO JOINS
675        let Some(Token::Identifier(first_identifier)) = self.current_token.clone() else {
676            return Err(Error::cql_parse("Expected table name in FROM clause"));
677        };
678        self.advance()?;
679
680        // Qualified name: keyspace.table
681        let table_name = if self.eat(&Token::Dot)? {
682            if let Some(Token::Identifier(actual_table)) = self.current_token.clone() {
683                self.advance()?;
684                format!("{}.{}", first_identifier, actual_table)
685            } else {
686                return Err(Error::cql_parse("Expected table name after keyspace"));
687            }
688        } else {
689            first_identifier
690        };
691
692        let table = TableId::new(table_name);
693
694        // Optional alias - but only if the next identifier isn't a clause keyword
695        // that the lookahead-free tokenizer would otherwise hand us as a plain identifier.
696        // (In practice clause keywords already tokenize as their own variants, but we
697        // keep this defensive check to preserve historical behavior.)
698        const CLAUSE_KEYWORDS: &[&str] = &["WHERE", "GROUP", "ORDER", "HAVING", "LIMIT"];
699        if let Some(Token::Identifier(alias)) = self.current_token.clone() {
700            let is_clause_kw = CLAUSE_KEYWORDS
701                .iter()
702                .any(|kw| alias.eq_ignore_ascii_case(kw));
703            if !is_clause_kw {
704                self.advance()?;
705                return Ok(FromClause::TableAlias(table, alias));
706            }
707        }
708
709        Ok(FromClause::Table(table))
710    }
711
712    /// Parse WHERE expression
713    fn parse_where_expression(&mut self) -> Result<WhereExpression> {
714        self.parse_or_expression()
715    }
716
717    /// Parse OR expression
718    fn parse_or_expression(&mut self) -> Result<WhereExpression> {
719        let first = self.parse_and_expression()?;
720        let mut or_exprs = vec![first];
721        while self.eat(&Token::Or)? {
722            or_exprs.push(self.parse_and_expression()?);
723        }
724        Ok(unwrap_singleton(or_exprs, WhereExpression::Or))
725    }
726
727    /// Parse AND expression
728    fn parse_and_expression(&mut self) -> Result<WhereExpression> {
729        let first = self.parse_not_expression()?;
730        let mut and_exprs = vec![first];
731        while self.eat(&Token::And)? {
732            and_exprs.push(self.parse_not_expression()?);
733        }
734        Ok(unwrap_singleton(and_exprs, WhereExpression::And))
735    }
736
737    /// Parse NOT expression
738    fn parse_not_expression(&mut self) -> Result<WhereExpression> {
739        if self.eat(&Token::Not)? {
740            let expr = self.parse_comparison_expression()?;
741            Ok(WhereExpression::Not(Box::new(expr)))
742        } else {
743            self.parse_comparison_expression()
744        }
745    }
746
747    /// Parse comparison expression
748    fn parse_comparison_expression(&mut self) -> Result<WhereExpression> {
749        if self.eat(&Token::LeftParen)? {
750            let expr = self.parse_where_expression()?;
751            self.expect(Token::RightParen)?;
752            return Ok(WhereExpression::Parentheses(Box::new(expr)));
753        }
754
755        let left = self.parse_select_expression()?;
756
757        // Map a "simple" binary comparison token to its operator. For operators
758        // with bespoke right-hand-side parsing (IN, BETWEEN, IS, CONTAINS) we
759        // handle them in the match below and return early.
760        let simple_op = match self.peek() {
761            Token::Equal => Some(ComparisonOperator::Equal),
762            Token::NotEqual => Some(ComparisonOperator::NotEqual),
763            Token::LessThan => Some(ComparisonOperator::LessThan),
764            Token::LessThanEqual => Some(ComparisonOperator::LessThanOrEqual),
765            Token::GreaterThan => Some(ComparisonOperator::GreaterThan),
766            Token::GreaterThanEqual => Some(ComparisonOperator::GreaterThanOrEqual),
767            Token::Like => Some(ComparisonOperator::Like),
768            _ => None,
769        };
770
771        if let Some(op) = simple_op {
772            self.advance()?;
773            let right = ComparisonRightSide::Value(self.parse_select_expression()?);
774            return Ok(WhereExpression::Comparison(ComparisonExpression {
775                left,
776                operator: op,
777                right,
778            }));
779        }
780
781        let operator = match self.peek() {
782            Token::In => {
783                self.advance()?;
784                let right = self.parse_in_expression()?;
785                return Ok(WhereExpression::Comparison(ComparisonExpression {
786                    left,
787                    operator: ComparisonOperator::In,
788                    right,
789                }));
790            }
791            Token::Between => {
792                self.advance()?;
793                let start = self.parse_select_expression()?;
794                self.expect(Token::And)?;
795                let end = self.parse_select_expression()?;
796                return Ok(WhereExpression::Comparison(ComparisonExpression {
797                    left,
798                    operator: ComparisonOperator::Between,
799                    right: ComparisonRightSide::Range(start, end),
800                }));
801            }
802            Token::Is => {
803                self.advance()?;
804                let op = if self.eat(&Token::Not)? {
805                    ComparisonOperator::IsNotNull
806                } else {
807                    ComparisonOperator::IsNull
808                };
809                self.expect(Token::Null)?;
810                op
811            }
812            Token::Contains => {
813                self.advance()?;
814                if self.eat(&Token::Key)? {
815                    ComparisonOperator::ContainsKey
816                } else {
817                    ComparisonOperator::Contains
818                }
819            }
820            other => {
821                return Err(Error::cql_parse(format!(
822                    "Expected comparison operator, found {:?}",
823                    other
824                )));
825            }
826        };
827
828        // Only IS NULL / IS NOT NULL / CONTAINS / CONTAINS KEY reach here.
829        let right = match operator {
830            ComparisonOperator::IsNull | ComparisonOperator::IsNotNull => {
831                ComparisonRightSide::Value(SelectExpression::Literal(Value::Null))
832            }
833            _ => ComparisonRightSide::Value(self.parse_select_expression()?),
834        };
835
836        Ok(WhereExpression::Comparison(ComparisonExpression {
837            left,
838            operator,
839            right,
840        }))
841    }
842
843    /// Parse IN expression value list
844    fn parse_in_expression(&mut self) -> Result<ComparisonRightSide> {
845        self.expect(Token::LeftParen)?;
846        let mut values = Vec::new();
847
848        if !self.at(&Token::RightParen) {
849            loop {
850                values.push(self.parse_select_expression()?);
851                if !self.eat(&Token::Comma)? {
852                    break;
853                }
854            }
855        }
856
857        self.expect(Token::RightParen)?;
858        Ok(ComparisonRightSide::ValueList(values))
859    }
860
861    /// Parse GROUP BY clause
862    fn parse_group_by_clause(&mut self) -> Result<GroupByClause> {
863        let mut columns = Vec::new();
864
865        loop {
866            let Some(Token::Identifier(name)) = self.current_token.clone() else {
867                return Err(Error::cql_parse("Expected column name in GROUP BY"));
868            };
869            self.advance()?;
870            columns.push(self.parse_column_ref(name)?);
871
872            if !self.eat(&Token::Comma)? {
873                break;
874            }
875        }
876
877        Ok(GroupByClause { columns })
878    }
879
880    /// Parse ORDER BY clause
881    fn parse_order_by_clause(&mut self) -> Result<OrderByClause> {
882        let mut items = Vec::new();
883
884        loop {
885            let expression = self.parse_select_expression()?;
886
887            let direction = if self.eat(&Token::Desc)? {
888                SortDirection::Descending
889            } else if self.eat(&Token::Asc)? {
890                SortDirection::Ascending
891            } else {
892                SortDirection::Ascending
893            };
894
895            items.push(OrderByItem {
896                expression,
897                direction,
898            });
899
900            if !self.eat(&Token::Comma)? {
901                break;
902            }
903        }
904
905        Ok(OrderByClause { items })
906    }
907
908    /// Parse LIMIT clause
909    fn parse_limit_clause(&mut self) -> Result<LimitClause> {
910        let count = self.expect_integer("LIMIT")? as u64;
911        Ok(LimitClause {
912            count,
913            per_partition: false, // TODO: Add PER PARTITION support
914        })
915    }
916}
917
918/// If `exprs` has a single element, return it; otherwise wrap with `wrap`
919/// (typically `WhereExpression::And` / `WhereExpression::Or`). The vector is
920/// guaranteed non-empty by callers that always push at least one element.
921fn unwrap_singleton<F>(mut exprs: Vec<WhereExpression>, wrap: F) -> WhereExpression
922where
923    F: FnOnce(Vec<WhereExpression>) -> WhereExpression,
924{
925    if exprs.len() == 1 {
926        exprs.pop().expect("checked len == 1")
927    } else {
928        wrap(exprs)
929    }
930}
931
932/// Main parsing function for SELECT statements
933pub fn parse_select(cql: &str) -> Result<SelectStatement> {
934    let mut parser = SelectParser::new(cql)?;
935    parser.parse_select_statement()
936}
937
938#[cfg(all(test, feature = "state_machine"))]
939mod tests {
940    use super::*;
941
942    #[test]
943    fn test_simple_select() {
944        let stmt = parse_select("SELECT * FROM users").unwrap();
945        assert_eq!(stmt.select_clause, SelectClause::All);
946        if let Some(FromClause::Table(table)) = stmt.from_clause {
947            assert_eq!(table.name(), "users");
948        } else {
949            panic!("Expected Table in FROM clause");
950        }
951    }
952
953    #[test]
954    fn test_select_with_columns() {
955        let stmt = parse_select("SELECT id, name, email FROM users").unwrap();
956        if let SelectClause::Columns(exprs) = stmt.select_clause {
957            assert_eq!(exprs.len(), 3);
958        } else {
959            panic!("Expected Columns in SELECT clause");
960        }
961    }
962
963    #[test]
964    fn test_select_constant() {
965        let stmt = parse_select("SELECT 1").unwrap();
966        assert!(stmt.from_clause.is_none());
967        if let SelectClause::Columns(exprs) = stmt.select_clause {
968            assert_eq!(exprs.len(), 1);
969            if let SelectExpression::Literal(Value::BigInt(1)) = &exprs[0] {
970                // Success
971            } else {
972                panic!("Expected literal BigInt 1, got: {:?}", &exprs[0]);
973            }
974        } else {
975            panic!("Expected Columns in SELECT clause");
976        }
977    }
978
979    #[test]
980    fn test_select_with_where() {
981        let stmt = parse_select("SELECT * FROM users WHERE id = 123").unwrap();
982        assert!(stmt.where_clause.is_some());
983    }
984
985    #[test]
986    fn test_select_with_aggregates() {
987        let stmt = parse_select("SELECT COUNT(*), AVG(age) FROM users GROUP BY city").unwrap();
988        assert!(stmt.requires_aggregation());
989        assert!(stmt.group_by.is_some());
990    }
991
992    #[test]
993    fn test_complex_where_clause() {
994        let stmt =
995            parse_select("SELECT * FROM users WHERE age > 21 AND (city = 'NYC' OR city = 'LA')")
996                .unwrap();
997        assert!(stmt.where_clause.is_some());
998    }
999
1000    #[test]
1001    fn test_order_by_and_limit() {
1002        let stmt = parse_select("SELECT * FROM users ORDER BY created_at DESC, name ASC LIMIT 10")
1003            .unwrap();
1004        assert!(stmt.order_by.is_some());
1005        assert!(stmt.limit.is_some());
1006
1007        if let Some(limit) = stmt.limit {
1008            assert_eq!(limit.count, 10);
1009        }
1010    }
1011
1012    #[test]
1013    fn test_in_clause() {
1014        let stmt =
1015            parse_select("SELECT * FROM users WHERE status IN ('active', 'pending', 'suspended')")
1016                .unwrap();
1017        assert!(stmt.where_clause.is_some());
1018    }
1019
1020    #[test]
1021    fn test_between_clause() {
1022        let stmt = parse_select(
1023            "SELECT * FROM events WHERE created_at BETWEEN '2024-01-01' AND '2024-12-31'",
1024        )
1025        .unwrap();
1026        assert!(stmt.where_clause.is_some());
1027    }
1028}