Skip to main content

sql_cli/sql/
recursive_parser.rs

1// Keep chrono imports for the parser implementation
2
3// Re-exports for backward compatibility - these serve as both imports and re-exports
4pub use super::parser::ast::{
5    CTEType, Comment, Condition, DataFormat, FileCTESpec, FrameBound, FrameUnit, HttpMethod,
6    IntoTable, JoinClause, JoinCondition, JoinOperator, JoinType, LogicalOp, OrderByColumn,
7    OrderByItem, PivotAggregate, SelectItem, SelectStatement, SetOperation, SingleJoinCondition,
8    SortDirection, SqlExpression, TableFunction, TableSource, WebCTESpec, WhenBranch, WhereClause,
9    WindowFrame, WindowSpec, CTE,
10};
11pub use super::parser::legacy::{ParseContext, ParseState, Schema, SqlParser, SqlToken, TableInfo};
12pub use super::parser::lexer::{Lexer, LexerMode, Token};
13pub use super::parser::ParserConfig;
14
15// Re-export formatting functions for backward compatibility
16pub use super::parser::formatter::{format_ast_tree, format_sql_pretty, format_sql_pretty_compact};
17
18// New AST-based formatter
19pub use super::parser::ast_formatter::{format_sql_ast, format_sql_ast_with_config, FormatConfig};
20
21// Import the new expression modules
22use super::parser::expressions::arithmetic::{
23    parse_additive as parse_additive_expr, parse_multiplicative as parse_multiplicative_expr,
24    ParseArithmetic,
25};
26use super::parser::expressions::case::{parse_case_expression as parse_case_expr, ParseCase};
27use super::parser::expressions::comparison::{
28    parse_comparison as parse_comparison_expr, parse_in_operator, ParseComparison,
29};
30use super::parser::expressions::logical::{
31    parse_logical_and as parse_logical_and_expr, parse_logical_or as parse_logical_or_expr,
32    ParseLogical,
33};
34use super::parser::expressions::primary::{
35    parse_primary as parse_primary_expr, ParsePrimary, PrimaryExpressionContext,
36};
37use super::parser::expressions::ExpressionParser;
38
39// Import function registry to check for function existence
40use crate::sql::functions::{FunctionCategory, FunctionRegistry};
41use crate::sql::generators::GeneratorRegistry;
42use std::sync::Arc;
43
44// Import Web CTE parser
45use super::parser::file_cte_parser::FileCteParser;
46use super::parser::web_cte_parser::WebCteParser;
47
48/// Parser mode - controls whether comments are preserved in AST
49#[derive(Debug, Clone, Copy, PartialEq)]
50pub enum ParserMode {
51    /// Standard parsing - skip comments (current behavior, backward compatible)
52    Standard,
53    /// Preserve comments in AST (opt-in for formatters)
54    PreserveComments,
55}
56
57impl Default for ParserMode {
58    fn default() -> Self {
59        ParserMode::Standard
60    }
61}
62
63pub struct Parser {
64    lexer: Lexer,
65    pub current_token: Token,    // Made public for web_cte_parser access
66    in_method_args: bool,        // Track if we're parsing method arguments
67    columns: Vec<String>,        // Known column names for context-aware parsing
68    paren_depth: i32,            // Track parentheses nesting depth
69    paren_depth_stack: Vec<i32>, // Stack to save/restore paren depth for nested contexts
70    _config: ParserConfig,       // Parser configuration including case sensitivity
71    debug_trace: bool,           // Enable detailed token-by-token trace
72    trace_depth: usize,          // Track recursion depth for indented trace
73    function_registry: Arc<FunctionRegistry>, // Function registry for validation
74    generator_registry: Arc<GeneratorRegistry>, // Generator registry for table functions
75    mode: ParserMode,            // Parser mode for comment preservation
76}
77
78impl Parser {
79    #[must_use]
80    pub fn new(input: &str) -> Self {
81        Self::with_mode(input, ParserMode::default())
82    }
83
84    /// Create a new parser with explicit mode for comment preservation
85    #[must_use]
86    pub fn with_mode(input: &str, mode: ParserMode) -> Self {
87        // Choose lexer mode based on parser mode
88        let lexer_mode = match mode {
89            ParserMode::Standard => LexerMode::SkipComments,
90            ParserMode::PreserveComments => LexerMode::PreserveComments,
91        };
92
93        let mut lexer = Lexer::with_mode(input, lexer_mode);
94        let current_token = lexer.next_token();
95        Self {
96            lexer,
97            current_token,
98            in_method_args: false,
99            columns: Vec::new(),
100            paren_depth: 0,
101            paren_depth_stack: Vec::new(),
102            _config: ParserConfig::default(),
103            debug_trace: false,
104            trace_depth: 0,
105            function_registry: Arc::new(FunctionRegistry::new()),
106            generator_registry: Arc::new(GeneratorRegistry::new()),
107            mode,
108        }
109    }
110
111    #[must_use]
112    pub fn with_config(input: &str, config: ParserConfig) -> Self {
113        let mut lexer = Lexer::new(input);
114        let current_token = lexer.next_token();
115        Self {
116            lexer,
117            current_token,
118            in_method_args: false,
119            columns: Vec::new(),
120            paren_depth: 0,
121            paren_depth_stack: Vec::new(),
122            _config: config,
123            debug_trace: false,
124            trace_depth: 0,
125            function_registry: Arc::new(FunctionRegistry::new()),
126            generator_registry: Arc::new(GeneratorRegistry::new()),
127            mode: ParserMode::default(),
128        }
129    }
130
131    #[must_use]
132    pub fn with_columns(mut self, columns: Vec<String>) -> Self {
133        self.columns = columns;
134        self
135    }
136
137    #[must_use]
138    pub fn with_debug_trace(mut self, enabled: bool) -> Self {
139        self.debug_trace = enabled;
140        self
141    }
142
143    #[must_use]
144    pub fn with_function_registry(mut self, registry: Arc<FunctionRegistry>) -> Self {
145        self.function_registry = registry;
146        self
147    }
148
149    #[must_use]
150    pub fn with_generator_registry(mut self, registry: Arc<GeneratorRegistry>) -> Self {
151        self.generator_registry = registry;
152        self
153    }
154
155    fn trace_enter(&mut self, context: &str) {
156        if self.debug_trace {
157            let indent = "  ".repeat(self.trace_depth);
158            eprintln!("{}→ {} | Token: {:?}", indent, context, self.current_token);
159            self.trace_depth += 1;
160        }
161    }
162
163    fn trace_exit(&mut self, context: &str, result: &Result<impl std::fmt::Debug, String>) {
164        if self.debug_trace {
165            self.trace_depth = self.trace_depth.saturating_sub(1);
166            let indent = "  ".repeat(self.trace_depth);
167            match result {
168                Ok(val) => eprintln!("{}← {} ✓ | Result: {:?}", indent, context, val),
169                Err(e) => eprintln!("{}← {} ✗ | Error: {}", indent, context, e),
170            }
171        }
172    }
173
174    fn trace_token(&self, action: &str) {
175        if self.debug_trace {
176            let indent = "  ".repeat(self.trace_depth);
177            eprintln!("{}  {} | Token: {:?}", indent, action, self.current_token);
178        }
179    }
180
181    #[allow(dead_code)]
182    fn peek_token(&self) -> Option<Token> {
183        // Alternative peek that returns owned token
184        let mut temp_lexer = self.lexer.clone();
185        let next_token = temp_lexer.next_token();
186        if matches!(next_token, Token::Eof) {
187            None
188        } else {
189            Some(next_token)
190        }
191    }
192
193    /// Check if current token is one of the reserved keywords that should stop parsing
194    /// Check if an identifier string is a reserved keyword (for backward compatibility)
195    /// This is used when the lexer hasn't properly tokenized keywords and they come through
196    /// as Token::Identifier instead of their proper token types
197    fn is_identifier_reserved(id: &str) -> bool {
198        let id_upper = id.to_uppercase();
199        matches!(
200            id_upper.as_str(),
201            "ORDER" | "HAVING" | "LIMIT" | "OFFSET" | "UNION" | "INTERSECT" | "EXCEPT"
202        )
203    }
204
205    /// Get comparison operator string representation (for autocomplete context)
206    const COMPARISON_OPERATORS: [&'static str; 6] = [" > ", " < ", " >= ", " <= ", " = ", " != "];
207
208    pub fn consume(&mut self, expected: Token) -> Result<(), String> {
209        self.trace_token(&format!("Consuming expected {:?}", expected));
210        if std::mem::discriminant(&self.current_token) == std::mem::discriminant(&expected) {
211            // Track parentheses depth
212            self.update_paren_depth(&expected)?;
213
214            self.current_token = self.lexer.next_token();
215            Ok(())
216        } else {
217            // Provide better error messages for common cases
218            let error_msg = match (&expected, &self.current_token) {
219                (Token::RightParen, Token::Eof) if self.paren_depth > 0 => {
220                    format!(
221                        "Unclosed parenthesis - missing {} closing parenthes{}",
222                        self.paren_depth,
223                        if self.paren_depth == 1 { "is" } else { "es" }
224                    )
225                }
226                (Token::RightParen, _) if self.paren_depth > 0 => {
227                    format!(
228                        "Expected closing parenthesis but found {:?} (currently {} unclosed parenthes{})",
229                        self.current_token,
230                        self.paren_depth,
231                        if self.paren_depth == 1 { "is" } else { "es" }
232                    )
233                }
234                _ => format!("Expected {:?}, found {:?}", expected, self.current_token),
235            };
236            Err(error_msg)
237        }
238    }
239
240    pub fn advance(&mut self) {
241        // Track parentheses depth when advancing
242        match &self.current_token {
243            Token::LeftParen => self.paren_depth += 1,
244            Token::RightParen => {
245                self.paren_depth -= 1;
246                // Note: We don't check for < 0 here because advance() is used
247                // in contexts where we're not necessarily expecting a right paren
248            }
249            _ => {}
250        }
251        let old_token = self.current_token.clone();
252        self.current_token = self.lexer.next_token();
253        if self.debug_trace {
254            let indent = "  ".repeat(self.trace_depth);
255            eprintln!(
256                "{}  Advanced: {:?} → {:?}",
257                indent, old_token, self.current_token
258            );
259        }
260    }
261
262    /// Collect all leading comments before a SQL construct
263    /// This consumes comment tokens and returns them as a Vec<Comment>
264    fn collect_leading_comments(&mut self) -> Vec<Comment> {
265        let mut comments = Vec::new();
266        loop {
267            match &self.current_token {
268                Token::LineComment(text) => {
269                    comments.push(Comment::line(text.clone()));
270                    self.advance();
271                }
272                Token::BlockComment(text) => {
273                    comments.push(Comment::block(text.clone()));
274                    self.advance();
275                }
276                _ => break,
277            }
278        }
279        comments
280    }
281
282    /// Collect a trailing inline comment (on the same line)
283    /// This consumes a single comment token if present
284    fn collect_trailing_comment(&mut self) -> Option<Comment> {
285        match &self.current_token {
286            Token::LineComment(text) => {
287                let comment = Some(Comment::line(text.clone()));
288                self.advance();
289                comment
290            }
291            Token::BlockComment(text) => {
292                let comment = Some(Comment::block(text.clone()));
293                self.advance();
294                comment
295            }
296            _ => None,
297        }
298    }
299
300    fn push_paren_depth(&mut self) {
301        self.paren_depth_stack.push(self.paren_depth);
302        self.paren_depth = 0;
303    }
304
305    fn pop_paren_depth(&mut self) {
306        if let Some(depth) = self.paren_depth_stack.pop() {
307            // Ignore the internal depth - just restore the saved value
308            self.paren_depth = depth;
309        }
310    }
311
312    pub fn parse(&mut self) -> Result<SelectStatement, String> {
313        self.trace_enter("parse");
314
315        // Collect leading comments FIRST (before checking for WITH or SELECT)
316        // This allows comments before WITH clauses to be preserved
317        let leading_comments = if self.mode == ParserMode::PreserveComments {
318            self.collect_leading_comments()
319        } else {
320            vec![]
321        };
322
323        // Now check for WITH clause (after consuming comments)
324        let result = if matches!(self.current_token, Token::With) {
325            let mut stmt = self.parse_with_clause()?;
326            // Attach the leading comments we collected
327            stmt.leading_comments = leading_comments;
328            stmt
329        } else {
330            // For SELECT without WITH, pass comments to inner parser
331            let stmt = self.parse_select_statement_with_comments_public(leading_comments)?;
332            self.check_balanced_parentheses()?;
333            stmt
334        };
335
336        self.trace_exit("parse", &Ok(&result));
337        Ok(result)
338    }
339
340    /// Public wrapper that accepts pre-collected comments and checks parens
341    fn parse_select_statement_with_comments_public(
342        &mut self,
343        comments: Vec<Comment>,
344    ) -> Result<SelectStatement, String> {
345        self.parse_select_statement_with_comments(comments)
346    }
347
348    fn parse_with_clause(&mut self) -> Result<SelectStatement, String> {
349        self.consume(Token::With)?;
350        let ctes = self.parse_cte_list()?;
351
352        // Parse the main SELECT statement - use inner version since we're already tracking parens
353        let mut main_query = self.parse_select_statement_inner_no_comments()?;
354        main_query.ctes = ctes;
355
356        // Check for balanced parentheses at the end of parsing
357        self.check_balanced_parentheses()?;
358
359        Ok(main_query)
360    }
361
362    fn parse_with_clause_inner(&mut self) -> Result<SelectStatement, String> {
363        self.consume(Token::With)?;
364        let ctes = self.parse_cte_list()?;
365
366        // Parse the main SELECT statement (without parenthesis checking for subqueries)
367        let mut main_query = self.parse_select_statement_inner()?;
368        main_query.ctes = ctes;
369
370        Ok(main_query)
371    }
372
373    // Helper function to parse CTE list - eliminates duplication
374    fn parse_cte_list(&mut self) -> Result<Vec<CTE>, String> {
375        let mut ctes = Vec::new();
376
377        // Parse CTEs
378        loop {
379            // Check for WEB keyword before the CTE name (WEB uses an outer marker).
380            // FILE CTEs are detected *inside* the parens after AS, since the design
381            // doc uses `WITH name AS (FILE PATH '...')` rather than `WITH FILE name ...`.
382            let is_web = if matches!(&self.current_token, Token::Web) {
383                self.trace_token("Found WEB keyword for CTE");
384                self.advance();
385                true
386            } else {
387                false
388            };
389
390            // Parse CTE name - allow keywords as CTE names since they're valid identifiers in this context
391            let name = match &self.current_token {
392                Token::Identifier(name) => name.clone(),
393                token => {
394                    // Check if this is a keyword that can be used as an identifier
395                    if let Some(keyword) = token.as_keyword_str() {
396                        // Allow keywords as CTE names (they're valid in this context)
397                        keyword.to_lowercase()
398                    } else {
399                        return Err(format!(
400                            "Expected CTE name after {}",
401                            if is_web { "WEB" } else { "WITH or comma" }
402                        ));
403                    }
404                }
405            };
406            self.advance();
407
408            // Optional column list: WITH t(col1, col2) AS ...
409            let column_list = if matches!(self.current_token, Token::LeftParen) {
410                self.advance();
411                let cols = self.parse_identifier_list()?;
412                self.consume(Token::RightParen)?;
413                Some(cols)
414            } else {
415                None
416            };
417
418            // Expect AS
419            self.consume(Token::As)?;
420
421            let cte_type = if is_web {
422                // Expect opening parenthesis for WEB CTE
423                self.consume(Token::LeftParen)?;
424                // Parse WEB CTE specification using dedicated parser
425                let web_spec = WebCteParser::parse(self)?;
426                // Consume closing parenthesis for WEB CTE
427                self.consume(Token::RightParen)?;
428                CTEType::Web(web_spec)
429            } else {
430                // Push depth BEFORE consuming the opening paren, matching the
431                // original standard-CTE flow. This keeps the `(`...`)` pair
432                // balanced inside the inner context.
433                self.push_paren_depth();
434                self.consume(Token::LeftParen)?;
435
436                let result = if matches!(&self.current_token, Token::File) {
437                    self.trace_token("Found FILE keyword inside CTE parens");
438                    self.advance();
439                    let file_spec = FileCteParser::parse(self)?;
440                    CTEType::File(file_spec)
441                } else {
442                    let query = self.parse_select_statement_inner()?;
443                    CTEType::Standard(query)
444                };
445
446                // Expect closing parenthesis while still in CTE context
447                self.consume(Token::RightParen)?;
448                // Now pop to restore outer depth after consuming both parens
449                self.pop_paren_depth();
450                result
451            };
452
453            ctes.push(CTE {
454                name,
455                column_list,
456                cte_type,
457            });
458
459            // Check for more CTEs
460            if !matches!(self.current_token, Token::Comma) {
461                break;
462            }
463            self.advance();
464        }
465
466        Ok(ctes)
467    }
468
469    /// Helper function to parse an optional table alias (with or without AS keyword)
470    fn parse_optional_alias(&mut self) -> Result<Option<String>, String> {
471        if matches!(self.current_token, Token::As) {
472            self.advance();
473            match &self.current_token {
474                Token::Identifier(name) => {
475                    let alias = name.clone();
476                    self.advance();
477                    Ok(Some(alias))
478                }
479                token => {
480                    // Check if it's a reserved keyword - provide helpful error
481                    if let Some(keyword) = token.as_keyword_str() {
482                        Err(format!(
483                            "Reserved keyword '{}' cannot be used as column alias. Use a different name or quote it with double quotes: \"{}\"",
484                            keyword,
485                            keyword.to_lowercase()
486                        ))
487                    } else {
488                        Err("Expected alias name after AS".to_string())
489                    }
490                }
491            }
492        } else if let Token::Identifier(name) = &self.current_token {
493            // AS is optional for table aliases
494            let alias = name.clone();
495            self.advance();
496            Ok(Some(alias))
497        } else {
498            Ok(None)
499        }
500    }
501
502    /// Helper function to check if an identifier is valid (quoted or regular)
503    fn is_valid_identifier(name: &str) -> bool {
504        if name.starts_with('"') && name.ends_with('"') {
505            // Quoted identifier - always valid
506            true
507        } else {
508            // Regular identifier - check if it's alphanumeric or underscore
509            name.chars().all(|c| c.is_alphanumeric() || c == '_')
510        }
511    }
512
513    /// Helper function to update parentheses depth tracking
514    fn update_paren_depth(&mut self, token: &Token) -> Result<(), String> {
515        match token {
516            Token::LeftParen => self.paren_depth += 1,
517            Token::RightParen => {
518                self.paren_depth -= 1;
519                // Check for extra closing parenthesis
520                if self.paren_depth < 0 {
521                    return Err(
522                        "Unexpected closing parenthesis - no matching opening parenthesis"
523                            .to_string(),
524                    );
525                }
526            }
527            _ => {}
528        }
529        Ok(())
530    }
531
532    /// Helper function to parse comma-separated argument list
533    fn parse_argument_list(&mut self) -> Result<Vec<SqlExpression>, String> {
534        let mut args = Vec::new();
535
536        if !matches!(self.current_token, Token::RightParen) {
537            loop {
538                args.push(self.parse_expression()?);
539
540                if matches!(self.current_token, Token::Comma) {
541                    self.advance();
542                } else {
543                    break;
544                }
545            }
546        }
547
548        Ok(args)
549    }
550
551    /// Helper function to check for balanced parentheses at the end of parsing
552    fn check_balanced_parentheses(&self) -> Result<(), String> {
553        if self.paren_depth > 0 {
554            Err(format!(
555                "Unclosed parenthesis - missing {} closing parenthes{}",
556                self.paren_depth,
557                if self.paren_depth == 1 { "is" } else { "es" }
558            ))
559        } else if self.paren_depth < 0 {
560            Err("Extra closing parenthesis found - no matching opening parenthesis".to_string())
561        } else {
562            Ok(())
563        }
564    }
565
566    /// Check if an expression contains aggregate functions (COUNT, SUM, AVG, etc.)
567    /// This is used to detect unsupported patterns in HAVING clause
568    fn contains_aggregate_function(expr: &SqlExpression) -> bool {
569        match expr {
570            SqlExpression::FunctionCall { name, args, .. } => {
571                // Check if this is an aggregate function
572                let upper_name = name.to_uppercase();
573                let is_aggregate = matches!(
574                    upper_name.as_str(),
575                    "COUNT" | "SUM" | "AVG" | "MIN" | "MAX" | "GROUP_CONCAT" | "STRING_AGG"
576                );
577
578                // If this is an aggregate, return true
579                // Otherwise, recursively check arguments
580                is_aggregate || args.iter().any(Self::contains_aggregate_function)
581            }
582            // Recursively check nested expressions
583            SqlExpression::BinaryOp { left, right, .. } => {
584                Self::contains_aggregate_function(left) || Self::contains_aggregate_function(right)
585            }
586            SqlExpression::Not { expr } => Self::contains_aggregate_function(expr),
587            SqlExpression::MethodCall { args, .. } => {
588                args.iter().any(Self::contains_aggregate_function)
589            }
590            SqlExpression::ChainedMethodCall { base, args, .. } => {
591                Self::contains_aggregate_function(base)
592                    || args.iter().any(Self::contains_aggregate_function)
593            }
594            SqlExpression::CaseExpression {
595                when_branches,
596                else_branch,
597            } => {
598                when_branches.iter().any(|branch| {
599                    Self::contains_aggregate_function(&branch.condition)
600                        || Self::contains_aggregate_function(&branch.result)
601                }) || else_branch
602                    .as_ref()
603                    .map_or(false, |e| Self::contains_aggregate_function(e))
604            }
605            SqlExpression::SimpleCaseExpression {
606                expr,
607                when_branches,
608                else_branch,
609            } => {
610                Self::contains_aggregate_function(expr)
611                    || when_branches.iter().any(|branch| {
612                        Self::contains_aggregate_function(&branch.value)
613                            || Self::contains_aggregate_function(&branch.result)
614                    })
615                    || else_branch
616                        .as_ref()
617                        .map_or(false, |e| Self::contains_aggregate_function(e))
618            }
619            SqlExpression::ScalarSubquery { query } => {
620                // Subqueries can have their own aggregates, but that's fine
621                // We're only checking the outer HAVING clause
622                query
623                    .having
624                    .as_ref()
625                    .map_or(false, |h| Self::contains_aggregate_function(h))
626            }
627            // Leaf nodes - no aggregates
628            SqlExpression::Column(_)
629            | SqlExpression::StringLiteral(_)
630            | SqlExpression::NumberLiteral(_)
631            | SqlExpression::BooleanLiteral(_)
632            | SqlExpression::Null
633            | SqlExpression::DateTimeConstructor { .. }
634            | SqlExpression::DateTimeToday { .. } => false,
635
636            // Window functions contain aggregates by definition
637            SqlExpression::WindowFunction { .. } => true,
638
639            // Between has three parts to check
640            SqlExpression::Between { expr, lower, upper } => {
641                Self::contains_aggregate_function(expr)
642                    || Self::contains_aggregate_function(lower)
643                    || Self::contains_aggregate_function(upper)
644            }
645
646            // IN list - check expr and all values
647            SqlExpression::InList { expr, values } | SqlExpression::NotInList { expr, values } => {
648                Self::contains_aggregate_function(expr)
649                    || values.iter().any(Self::contains_aggregate_function)
650            }
651
652            // IN subquery - check expr and subquery
653            SqlExpression::InSubquery { expr, subquery }
654            | SqlExpression::NotInSubquery { expr, subquery } => {
655                Self::contains_aggregate_function(expr)
656                    || subquery
657                        .having
658                        .as_ref()
659                        .map_or(false, |h| Self::contains_aggregate_function(h))
660            }
661
662            // Tuple IN/NOT IN subquery - check each expr and subquery
663            SqlExpression::InSubqueryTuple { exprs, subquery }
664            | SqlExpression::NotInSubqueryTuple { exprs, subquery } => {
665                exprs.iter().any(Self::contains_aggregate_function)
666                    || subquery
667                        .having
668                        .as_ref()
669                        .map_or(false, |h| Self::contains_aggregate_function(h))
670            }
671
672            // UNNEST - check column expression
673            SqlExpression::Unnest { column, .. } => Self::contains_aggregate_function(column),
674        }
675    }
676
677    fn parse_select_statement(&mut self) -> Result<SelectStatement, String> {
678        self.trace_enter("parse_select_statement");
679        let result = self.parse_select_statement_inner()?;
680
681        // Check for balanced parentheses at the end of parsing
682        self.check_balanced_parentheses()?;
683
684        Ok(result)
685    }
686
687    fn parse_select_statement_inner(&mut self) -> Result<SelectStatement, String> {
688        // Collect leading comments ONLY in PreserveComments mode
689        let leading_comments = if self.mode == ParserMode::PreserveComments {
690            self.collect_leading_comments()
691        } else {
692            vec![]
693        };
694
695        self.parse_select_statement_with_comments(leading_comments)
696    }
697
698    /// Parse SELECT statement without collecting leading comments
699    /// Used when comments were already collected (e.g., before WITH clause)
700    fn parse_select_statement_inner_no_comments(&mut self) -> Result<SelectStatement, String> {
701        self.parse_select_statement_with_comments(vec![])
702    }
703
704    /// Core SELECT parsing logic - takes pre-collected comments
705    fn parse_select_statement_with_comments(
706        &mut self,
707        leading_comments: Vec<Comment>,
708    ) -> Result<SelectStatement, String> {
709        self.consume(Token::Select)?;
710
711        // Check for DISTINCT keyword
712        let distinct = if matches!(self.current_token, Token::Distinct) {
713            self.advance();
714            true
715        } else {
716            false
717        };
718
719        // Parse SELECT items (supports computed expressions)
720        let select_items = self.parse_select_items()?;
721
722        // Create legacy columns vector for backward compatibility
723        let columns = select_items
724            .iter()
725            .map(|item| match item {
726                SelectItem::Star { .. } => "*".to_string(),
727                SelectItem::StarExclude { .. } => "*".to_string(), // Treated as * in legacy columns
728                SelectItem::Column {
729                    column: col_ref, ..
730                } => col_ref.name.clone(),
731                SelectItem::Expression { alias, .. } => alias.clone(),
732            })
733            .collect();
734
735        // Parse INTO clause (for temporary tables) - comes immediately after SELECT items
736        let into_table = if matches!(self.current_token, Token::Into) {
737            self.advance();
738            Some(self.parse_into_clause()?)
739        } else {
740            None
741        };
742
743        // Parse FROM clause - can be a table name, subquery, or table function
744        let (from_table, from_subquery, from_function, from_alias) = if matches!(
745            self.current_token,
746            Token::From
747        ) {
748            self.advance();
749
750            // Check for table function like RANGE()
751            // Also handle keywords that could be table/CTE names
752            let table_or_function_name = match &self.current_token {
753                Token::Identifier(name) => Some(name.clone()),
754                token => {
755                    // Check if it's a keyword that can be used as table/CTE name
756                    token.as_keyword_str().map(|k| k.to_lowercase())
757                }
758            };
759
760            if let Some(name) = table_or_function_name {
761                // Check if this is a table function by consulting the registry
762                // We need to lookahead to see if there's a parenthesis to distinguish
763                // between a function call and a table with the same name
764                let has_paren = self.peek_token() == Some(Token::LeftParen);
765                if self.debug_trace {
766                    eprintln!(
767                        "  Checking {} for table function, has_paren={}",
768                        name, has_paren
769                    );
770                }
771
772                // Check if it's a known table function or generator
773                // In FROM clause context, prioritize generators over scalar functions
774                let is_table_function = if has_paren {
775                    // First check generator registry (for FROM clause context)
776                    if self.debug_trace {
777                        eprintln!("  Checking generator registry for {}", name.to_uppercase());
778                    }
779                    if let Some(_gen) = self.generator_registry.get(&name.to_uppercase()) {
780                        if self.debug_trace {
781                            eprintln!("  Found {} in generator registry", name);
782                        }
783                        self.trace_token(&format!("Found generator: {}", name));
784                        true
785                    } else {
786                        // Then check if it's a table function in the function registry
787                        if let Some(func) = self.function_registry.get(&name.to_uppercase()) {
788                            let sig = func.signature();
789                            let is_table_fn = sig.category == FunctionCategory::TableFunction;
790                            if self.debug_trace {
791                                eprintln!(
792                                    "  Found {} in function registry, is_table_function={}",
793                                    name, is_table_fn
794                                );
795                            }
796                            if is_table_fn {
797                                self.trace_token(&format!(
798                                    "Found table function in function registry: {}",
799                                    name
800                                ));
801                            }
802                            is_table_fn
803                        } else {
804                            if self.debug_trace {
805                                eprintln!("  {} not found in either registry", name);
806                                self.trace_token(&format!(
807                                    "Not found as generator or table function: {}",
808                                    name
809                                ));
810                            }
811                            // `name(` in a FROM clause is unambiguously a table
812                            // function call (table/CTE names are never followed
813                            // by parens). If we don't recognise it, erroring here
814                            // beats silently treating `name` as a missing table
815                            // and degrading to the DUAL placeholder row.
816                            return Err(format!(
817                                "Unknown table function '{}'. Run --list-functions to see available table functions (e.g. READ_CSV, READ_JSON, READ_JSONL, RANGE).",
818                                name
819                            ));
820                        }
821                    }
822                } else {
823                    if self.debug_trace {
824                        eprintln!("  No parenthesis after {}, treating as table", name);
825                    }
826                    false
827                };
828
829                if is_table_function {
830                    // Parse table function
831                    let function_name = name.clone();
832                    self.advance(); // Skip function name
833
834                    // Parse arguments
835                    self.consume(Token::LeftParen)?;
836                    let args = self.parse_argument_list()?;
837                    self.consume(Token::RightParen)?;
838
839                    // Optional alias
840                    let alias = if matches!(self.current_token, Token::As) {
841                        self.advance();
842                        match &self.current_token {
843                            Token::Identifier(name) => {
844                                let alias = name.clone();
845                                self.advance();
846                                Some(alias)
847                            }
848                            token => {
849                                if let Some(keyword) = token.as_keyword_str() {
850                                    return Err(format!(
851                                            "Reserved keyword '{}' cannot be used as column alias. Use a different name or quote it with double quotes: \"{}\"",
852                                            keyword,
853                                            keyword.to_lowercase()
854                                        ));
855                                } else {
856                                    return Err("Expected alias name after AS".to_string());
857                                }
858                            }
859                        }
860                    } else if let Token::Identifier(name) = &self.current_token {
861                        let alias = name.clone();
862                        self.advance();
863                        Some(alias)
864                    } else {
865                        None
866                    };
867
868                    (
869                        None,
870                        None,
871                        Some(TableFunction::Generator {
872                            name: function_name,
873                            args,
874                        }),
875                        alias,
876                    )
877                } else {
878                    // Not a RANGE, SPLIT, or generator function, so it's a regular table name
879                    let table_name = name.clone();
880                    self.advance();
881
882                    // Check for optional alias
883                    let alias = self.parse_optional_alias()?;
884
885                    (Some(table_name), None, None, alias)
886                }
887            } else if matches!(self.current_token, Token::LeftParen) {
888                // Check for subquery: FROM (SELECT ...) or FROM (WITH ... SELECT ...)
889                self.advance();
890
891                // Parse the subquery - it might start with WITH
892                let subquery = if matches!(self.current_token, Token::With) {
893                    self.parse_with_clause_inner()?
894                } else {
895                    self.parse_select_statement_inner()?
896                };
897
898                self.consume(Token::RightParen)?;
899
900                // Subqueries must have an alias
901                let alias = if matches!(self.current_token, Token::As) {
902                    self.advance();
903                    match &self.current_token {
904                        Token::Identifier(name) => {
905                            let alias = name.clone();
906                            self.advance();
907                            alias
908                        }
909                        token => {
910                            if let Some(keyword) = token.as_keyword_str() {
911                                return Err(format!(
912                                        "Reserved keyword '{}' cannot be used as subquery alias. Use a different name or quote it with double quotes: \"{}\"",
913                                        keyword,
914                                        keyword.to_lowercase()
915                                    ));
916                            } else {
917                                return Err("Expected alias name after AS".to_string());
918                            }
919                        }
920                    }
921                } else {
922                    // AS is optional, but alias is required
923                    match &self.current_token {
924                        Token::Identifier(name) => {
925                            let alias = name.clone();
926                            self.advance();
927                            alias
928                        }
929                        _ => {
930                            return Err(
931                                "Subquery in FROM must have an alias (e.g., AS t)".to_string()
932                            )
933                        }
934                    }
935                };
936
937                (None, Some(Box::new(subquery)), None, Some(alias))
938            } else {
939                // Regular table name - handle identifiers and keywords
940                let table_name = match &self.current_token {
941                    Token::Identifier(table) => table.clone(),
942                    Token::QuotedIdentifier(table) => table.clone(),
943                    token => {
944                        // Check if it's a keyword that can be used as table/CTE name
945                        if let Some(keyword) = token.as_keyword_str() {
946                            keyword.to_lowercase()
947                        } else {
948                            return Err("Expected table name or subquery after FROM".to_string());
949                        }
950                    }
951                };
952
953                self.advance();
954
955                // Check for optional alias
956                let alias = self.parse_optional_alias()?;
957
958                (Some(table_name), None, None, alias)
959            }
960        } else {
961            (None, None, None, None)
962        };
963
964        // Check for PIVOT after FROM table source
965        // PIVOT wraps the FROM table/subquery before JOINs are processed
966        // This creates a PIVOT TableSource that will be processed by PivotExpander transformer
967        let pivot_source = if matches!(self.current_token, Token::Pivot) {
968            // Build a TableSource from the current FROM clause
969            let source = if let Some(ref table_name) = from_table {
970                TableSource::Table(table_name.clone())
971            } else if let Some(ref subquery) = from_subquery {
972                TableSource::DerivedTable {
973                    query: subquery.clone(),
974                    alias: from_alias.clone().unwrap_or_default(),
975                }
976            } else {
977                return Err("PIVOT requires a table or subquery source".to_string());
978            };
979
980            // Parse the PIVOT clause - this wraps the source in a Pivot TableSource
981            let pivoted = self.parse_pivot_clause(source)?;
982            Some(pivoted)
983        } else {
984            None
985        };
986
987        // Parse JOIN clauses
988        let mut joins = Vec::new();
989        while self.is_join_token() {
990            joins.push(self.parse_join_clause()?);
991        }
992
993        let where_clause = if matches!(self.current_token, Token::Where) {
994            self.advance();
995            Some(self.parse_where_clause()?)
996        } else {
997            None
998        };
999
1000        let group_by = if matches!(self.current_token, Token::GroupBy) {
1001            self.advance();
1002            // Parse expressions instead of just identifiers for GROUP BY
1003            // This allows GROUP BY TIME_BUCKET(...), CASE ..., etc.
1004            Some(self.parse_expression_list()?)
1005        } else {
1006            None
1007        };
1008
1009        // Parse HAVING clause (must come after GROUP BY)
1010        let having = if matches!(self.current_token, Token::Having) {
1011            if group_by.is_none() {
1012                return Err("HAVING clause requires GROUP BY".to_string());
1013            }
1014            self.advance();
1015            let having_expr = self.parse_expression()?;
1016
1017            // Note: Aggregate functions in HAVING are now supported via the
1018            // HavingAliasTransformer preprocessing step, which automatically
1019            // adds aliases and rewrites the HAVING clause to use them.
1020
1021            Some(having_expr)
1022        } else {
1023            None
1024        };
1025
1026        // Parse QUALIFY clause (Snowflake-style window function filtering)
1027        // QUALIFY filters on window function results without needing a subquery
1028        // Example: SELECT *, ROW_NUMBER() OVER (...) AS rn FROM t QUALIFY rn <= 3
1029        let qualify = if matches!(self.current_token, Token::Qualify) {
1030            self.advance();
1031            let qualify_expr = self.parse_expression()?;
1032
1033            // Note: QUALIFY is handled by the QualifyToWhereTransformer preprocessing step
1034            // which converts it to WHERE after window functions are lifted to CTEs
1035
1036            Some(qualify_expr)
1037        } else {
1038            None
1039        };
1040
1041        // Parse ORDER BY clause (comes after GROUP BY, HAVING, and QUALIFY)
1042        let order_by = if matches!(self.current_token, Token::OrderBy) {
1043            self.trace_token("Found OrderBy token");
1044            self.advance();
1045            Some(self.parse_order_by_list()?)
1046        } else if let Token::Identifier(s) = &self.current_token {
1047            // This shouldn't happen if the lexer properly tokenizes ORDER BY
1048            // But keeping as fallback for compatibility
1049            if Self::is_identifier_reserved(s) && s.to_uppercase() == "ORDER" {
1050                self.trace_token("Warning: ORDER as identifier instead of OrderBy token");
1051                self.advance(); // consume ORDER
1052                if matches!(&self.current_token, Token::By) {
1053                    self.advance(); // consume BY
1054                    Some(self.parse_order_by_list()?)
1055                } else {
1056                    return Err("Expected BY after ORDER".to_string());
1057                }
1058            } else {
1059                None
1060            }
1061        } else {
1062            None
1063        };
1064
1065        // Parse LIMIT clause
1066        let limit = if matches!(self.current_token, Token::Limit) {
1067            self.advance();
1068            match &self.current_token {
1069                Token::NumberLiteral(num) => {
1070                    let limit_val = num
1071                        .parse::<usize>()
1072                        .map_err(|_| format!("Invalid LIMIT value: {num}"))?;
1073                    self.advance();
1074                    Some(limit_val)
1075                }
1076                _ => return Err("Expected number after LIMIT".to_string()),
1077            }
1078        } else {
1079            None
1080        };
1081
1082        // Parse OFFSET clause
1083        let offset = if matches!(self.current_token, Token::Offset) {
1084            self.advance();
1085            match &self.current_token {
1086                Token::NumberLiteral(num) => {
1087                    let offset_val = num
1088                        .parse::<usize>()
1089                        .map_err(|_| format!("Invalid OFFSET value: {num}"))?;
1090                    self.advance();
1091                    Some(offset_val)
1092                }
1093                _ => return Err("Expected number after OFFSET".to_string()),
1094            }
1095        } else {
1096            None
1097        };
1098
1099        // Parse INTO clause (alternative position - SQL Server also supports INTO after all clauses)
1100        // This handles: SELECT * FROM table WHERE x > 5 INTO #temp
1101        // If INTO was already parsed after SELECT, this will be None (can't have two INTOs)
1102        let into_table = if into_table.is_none() && matches!(self.current_token, Token::Into) {
1103            self.advance();
1104            Some(self.parse_into_clause()?)
1105        } else {
1106            into_table // Keep the one from after SELECT if it exists
1107        };
1108
1109        // Parse UNION/INTERSECT/EXCEPT operations
1110        let set_operations = self.parse_set_operations()?;
1111
1112        // Collect trailing comment ONLY in PreserveComments mode
1113        let trailing_comment = if self.mode == ParserMode::PreserveComments {
1114            self.collect_trailing_comment()
1115        } else {
1116            None
1117        };
1118
1119        // Build unified from_source from parsed components
1120        // PIVOT takes precedence if it exists (it already wraps the base source)
1121        let from_source = if let Some(pivot) = pivot_source {
1122            Some(pivot)
1123        } else if let Some(ref table_name) = from_table {
1124            Some(TableSource::Table(table_name.clone()))
1125        } else if let Some(ref subquery) = from_subquery {
1126            Some(TableSource::DerivedTable {
1127                query: subquery.clone(),
1128                alias: from_alias.clone().unwrap_or_default(),
1129            })
1130        } else if let Some(ref _func) = from_function {
1131            // Table functions don't use TableSource yet, keep as None for now
1132            // TODO: Add TableFunction variant to TableSource
1133            None
1134        } else {
1135            None
1136        };
1137
1138        Ok(SelectStatement {
1139            distinct,
1140            columns,
1141            select_items,
1142            from_source,
1143            #[allow(deprecated)]
1144            from_table,
1145            #[allow(deprecated)]
1146            from_subquery,
1147            #[allow(deprecated)]
1148            from_function,
1149            #[allow(deprecated)]
1150            from_alias,
1151            joins,
1152            where_clause,
1153            order_by,
1154            group_by,
1155            having,
1156            qualify,
1157            limit,
1158            offset,
1159            ctes: Vec::new(), // Will be populated by WITH clause parser
1160            into_table,
1161            set_operations,
1162            leading_comments,
1163            trailing_comment,
1164        })
1165    }
1166
1167    /// Parse UNION/INTERSECT/EXCEPT operations
1168    /// Returns a vector of (operation, select_statement) pairs
1169    fn parse_set_operations(
1170        &mut self,
1171    ) -> Result<Vec<(SetOperation, Box<SelectStatement>)>, String> {
1172        let mut operations = Vec::new();
1173
1174        while matches!(
1175            self.current_token,
1176            Token::Union | Token::Intersect | Token::Except
1177        ) {
1178            // Determine the operation type
1179            let operation = match &self.current_token {
1180                Token::Union => {
1181                    self.advance();
1182                    // Check for ALL keyword
1183                    if let Token::Identifier(id) = &self.current_token {
1184                        if id.to_uppercase() == "ALL" {
1185                            self.advance();
1186                            SetOperation::UnionAll
1187                        } else {
1188                            SetOperation::Union
1189                        }
1190                    } else {
1191                        SetOperation::Union
1192                    }
1193                }
1194                Token::Intersect => {
1195                    self.advance();
1196                    SetOperation::Intersect
1197                }
1198                Token::Except => {
1199                    self.advance();
1200                    SetOperation::Except
1201                }
1202                _ => unreachable!(),
1203            };
1204
1205            // Parse the next SELECT statement
1206            let next_select = self.parse_select_statement_inner()?;
1207
1208            operations.push((operation, Box::new(next_select)));
1209        }
1210
1211        Ok(operations)
1212    }
1213
1214    /// Parse SELECT items that support computed expressions with aliases
1215    fn parse_select_items(&mut self) -> Result<Vec<SelectItem>, String> {
1216        let mut items = Vec::new();
1217
1218        loop {
1219            // Check for qualified star (table.*) or unqualified star (*)
1220            // First check if we have identifier.* pattern
1221            if let Token::Identifier(name) = &self.current_token.clone() {
1222                // Peek ahead to check for .* pattern
1223                let saved_pos = self.lexer.clone();
1224                let saved_token = self.current_token.clone();
1225                let table_name = name.clone();
1226
1227                self.advance();
1228
1229                if matches!(self.current_token, Token::Dot) {
1230                    self.advance();
1231                    if matches!(self.current_token, Token::Star) {
1232                        // This is table.* pattern
1233                        items.push(SelectItem::Star {
1234                            table_prefix: Some(table_name),
1235                            leading_comments: vec![],
1236                            trailing_comment: None,
1237                        });
1238                        self.advance();
1239
1240                        // Continue to next item or end
1241                        if matches!(self.current_token, Token::Comma) {
1242                            self.advance();
1243                            continue;
1244                        } else {
1245                            break;
1246                        }
1247                    }
1248                }
1249
1250                // Not table.*, restore position and continue with normal parsing
1251                self.lexer = saved_pos;
1252                self.current_token = saved_token;
1253            }
1254
1255            // Check for unqualified *
1256            if matches!(self.current_token, Token::Star) {
1257                self.advance(); // consume *
1258
1259                // Check for EXCLUDE clause
1260                if matches!(self.current_token, Token::Exclude) {
1261                    self.advance(); // consume EXCLUDE
1262
1263                    // Expect opening paren
1264                    if !matches!(self.current_token, Token::LeftParen) {
1265                        return Err("Expected '(' after EXCLUDE".to_string());
1266                    }
1267                    self.advance(); // consume (
1268
1269                    // Parse column list
1270                    let mut excluded_columns = Vec::new();
1271                    loop {
1272                        match &self.current_token {
1273                            Token::Identifier(col_name) | Token::QuotedIdentifier(col_name) => {
1274                                excluded_columns.push(col_name.clone());
1275                                self.advance();
1276                            }
1277                            _ => return Err("Expected column name in EXCLUDE list".to_string()),
1278                        }
1279
1280                        // Check for comma or closing paren
1281                        if matches!(self.current_token, Token::Comma) {
1282                            self.advance();
1283                        } else if matches!(self.current_token, Token::RightParen) {
1284                            self.advance(); // consume )
1285                            break;
1286                        } else {
1287                            return Err("Expected ',' or ')' in EXCLUDE list".to_string());
1288                        }
1289                    }
1290
1291                    if excluded_columns.is_empty() {
1292                        return Err("EXCLUDE list cannot be empty".to_string());
1293                    }
1294
1295                    items.push(SelectItem::StarExclude {
1296                        table_prefix: None,
1297                        excluded_columns,
1298                        leading_comments: vec![],
1299                        trailing_comment: None,
1300                    });
1301                } else {
1302                    // Regular * without EXCLUDE
1303                    items.push(SelectItem::Star {
1304                        table_prefix: None,
1305                        leading_comments: vec![],
1306                        trailing_comment: None,
1307                    });
1308                }
1309            } else {
1310                // Parse expression or column
1311                let expr = self.parse_comparison()?; // Use comparison to support IS NULL and other comparisons
1312
1313                // Check for AS alias
1314                let alias = if matches!(self.current_token, Token::As) {
1315                    self.advance();
1316                    match &self.current_token {
1317                        Token::Identifier(alias_name) => {
1318                            let alias = alias_name.clone();
1319                            self.advance();
1320                            alias
1321                        }
1322                        Token::QuotedIdentifier(alias_name) => {
1323                            let alias = alias_name.clone();
1324                            self.advance();
1325                            alias
1326                        }
1327                        token => {
1328                            if let Some(keyword) = token.as_keyword_str() {
1329                                return Err(format!(
1330                                    "Reserved keyword '{}' cannot be used as column alias. Use a different name or quote it with double quotes: \"{}\"",
1331                                    keyword,
1332                                    keyword.to_lowercase()
1333                                ));
1334                            } else {
1335                                return Err("Expected alias name after AS".to_string());
1336                            }
1337                        }
1338                    }
1339                } else {
1340                    // Generate default alias based on expression
1341                    match &expr {
1342                        SqlExpression::Column(col_ref) => col_ref.name.clone(),
1343                        _ => format!("expr_{}", items.len() + 1), // Default alias for computed expressions
1344                    }
1345                };
1346
1347                // Create SelectItem based on expression type
1348                let item = match expr {
1349                    SqlExpression::Column(col_ref) if alias == col_ref.name => {
1350                        // Simple column reference without alias
1351                        SelectItem::Column {
1352                            column: col_ref,
1353                            leading_comments: vec![],
1354                            trailing_comment: None,
1355                        }
1356                    }
1357                    _ => {
1358                        // Computed expression or column with different alias
1359                        SelectItem::Expression {
1360                            expr,
1361                            alias,
1362                            leading_comments: vec![],
1363                            trailing_comment: None,
1364                        }
1365                    }
1366                };
1367
1368                items.push(item);
1369            }
1370
1371            // Check for comma to continue
1372            if matches!(self.current_token, Token::Comma) {
1373                self.advance();
1374            } else {
1375                break;
1376            }
1377        }
1378
1379        Ok(items)
1380    }
1381
1382    fn parse_identifier_list(&mut self) -> Result<Vec<String>, String> {
1383        let mut identifiers = Vec::new();
1384
1385        loop {
1386            match &self.current_token {
1387                Token::Identifier(id) => {
1388                    // Check if this is a reserved keyword that should stop identifier parsing
1389                    if Self::is_identifier_reserved(id) {
1390                        // Stop parsing identifiers if we hit a reserved keyword
1391                        break;
1392                    }
1393                    let mut name = id.clone();
1394                    self.advance();
1395
1396                    // Handle qualified names (table.column)
1397                    if matches!(self.current_token, Token::Dot) {
1398                        self.advance(); // consume dot
1399                        match &self.current_token {
1400                            Token::Identifier(col) => {
1401                                name = format!("{}.{}", name, col);
1402                                self.advance();
1403                            }
1404                            Token::QuotedIdentifier(col) => {
1405                                name = format!("{}.{}", name, col);
1406                                self.advance();
1407                            }
1408                            _ => {
1409                                return Err("Expected identifier after '.'".to_string());
1410                            }
1411                        }
1412                    }
1413
1414                    identifiers.push(name);
1415                }
1416                Token::QuotedIdentifier(id) => {
1417                    // Handle quoted identifiers like "Customer Id"
1418                    identifiers.push(id.clone());
1419                    self.advance();
1420                }
1421                _ => {
1422                    // Stop parsing if we hit any other token type
1423                    break;
1424                }
1425            }
1426
1427            if matches!(self.current_token, Token::Comma) {
1428                self.advance();
1429            } else {
1430                break;
1431            }
1432        }
1433
1434        if identifiers.is_empty() {
1435            return Err("Expected at least one identifier".to_string());
1436        }
1437
1438        Ok(identifiers)
1439    }
1440
1441    fn parse_window_spec(&mut self) -> Result<WindowSpec, String> {
1442        let mut partition_by = Vec::new();
1443        let mut order_by = Vec::new();
1444
1445        // Check for PARTITION BY
1446        if matches!(self.current_token, Token::Partition) {
1447            self.advance(); // consume PARTITION
1448            if !matches!(self.current_token, Token::By) {
1449                return Err("Expected BY after PARTITION".to_string());
1450            }
1451            self.advance(); // consume BY
1452
1453            // Parse partition columns
1454            partition_by = self.parse_identifier_list()?;
1455        }
1456
1457        // Check for ORDER BY
1458        if matches!(self.current_token, Token::OrderBy) {
1459            self.advance(); // consume ORDER BY (as single token)
1460            order_by = self.parse_order_by_list()?;
1461        } else if let Token::Identifier(s) = &self.current_token {
1462            if Self::is_identifier_reserved(s) && s.to_uppercase() == "ORDER" {
1463                // Handle ORDER BY as two tokens
1464                self.advance(); // consume ORDER
1465                if !matches!(self.current_token, Token::By) {
1466                    return Err("Expected BY after ORDER".to_string());
1467                }
1468                self.advance(); // consume BY
1469                order_by = self.parse_order_by_list()?;
1470            }
1471        }
1472
1473        // Parse optional window frame (ROWS/RANGE BETWEEN ... AND ...)
1474        let mut frame = self.parse_window_frame()?;
1475
1476        // SQL Standard: If ORDER BY is present but no frame is specified,
1477        // default to RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
1478        // This matches behavior of PostgreSQL, MySQL, SQL Server, etc.
1479        if !order_by.is_empty() && frame.is_none() {
1480            frame = Some(WindowFrame {
1481                unit: FrameUnit::Range,
1482                start: FrameBound::UnboundedPreceding,
1483                end: Some(FrameBound::CurrentRow),
1484            });
1485        }
1486
1487        Ok(WindowSpec {
1488            partition_by,
1489            order_by,
1490            frame,
1491        })
1492    }
1493
1494    fn parse_order_by_list(&mut self) -> Result<Vec<OrderByItem>, String> {
1495        let mut order_items = Vec::new();
1496
1497        loop {
1498            // Parse ANY expression (not just column names)
1499            // This supports:
1500            // - Simple columns: region
1501            // - Qualified columns: table.column
1502            // - Aggregate functions: SUM(sales_amount)
1503            // - Arithmetic: sales_amount * 1.1
1504            // - CASE expressions: CASE WHEN ... END
1505            let expr = self.parse_expression()?;
1506
1507            // Check for ASC/DESC
1508            let direction = match &self.current_token {
1509                Token::Asc => {
1510                    self.advance();
1511                    SortDirection::Asc
1512                }
1513                Token::Desc => {
1514                    self.advance();
1515                    SortDirection::Desc
1516                }
1517                _ => SortDirection::Asc, // Default to ASC if not specified
1518            };
1519
1520            order_items.push(OrderByItem { expr, direction });
1521
1522            if matches!(self.current_token, Token::Comma) {
1523                self.advance();
1524            } else {
1525                break;
1526            }
1527        }
1528
1529        Ok(order_items)
1530    }
1531
1532    /// Parse INTO clause for temporary tables
1533    /// Syntax: INTO #table_name
1534    fn parse_into_clause(&mut self) -> Result<IntoTable, String> {
1535        // Expect an identifier starting with #
1536        let name = match &self.current_token {
1537            Token::Identifier(id) if id.starts_with('#') => {
1538                let table_name = id.clone();
1539                self.advance();
1540                table_name
1541            }
1542            Token::Identifier(id) => {
1543                return Err(format!(
1544                    "Temporary table name must start with #, got: {}",
1545                    id
1546                ));
1547            }
1548            _ => {
1549                return Err(
1550                    "Expected temporary table name (starting with #) after INTO".to_string()
1551                );
1552            }
1553        };
1554
1555        Ok(IntoTable { name })
1556    }
1557
1558    fn parse_window_frame(&mut self) -> Result<Option<WindowFrame>, String> {
1559        // Check for ROWS or RANGE keyword
1560        let unit = match &self.current_token {
1561            Token::Rows => {
1562                self.advance();
1563                FrameUnit::Rows
1564            }
1565            Token::Identifier(id) if id.to_uppercase() == "RANGE" => {
1566                // RANGE as window frame unit
1567                self.advance();
1568                FrameUnit::Range
1569            }
1570            _ => return Ok(None), // No window frame specified
1571        };
1572
1573        // Check for BETWEEN or just a single bound
1574        let (start, end) = if let Token::Between = &self.current_token {
1575            self.advance(); // consume BETWEEN
1576                            // Parse start bound
1577            let start = self.parse_frame_bound()?;
1578
1579            // Expect AND
1580            if !matches!(&self.current_token, Token::And) {
1581                return Err("Expected AND after window frame start bound".to_string());
1582            }
1583            self.advance();
1584
1585            // Parse end bound
1586            let end = self.parse_frame_bound()?;
1587            (start, Some(end))
1588        } else {
1589            // Single bound (e.g., "ROWS 5 PRECEDING")
1590            let bound = self.parse_frame_bound()?;
1591            (bound, None)
1592        };
1593
1594        Ok(Some(WindowFrame { unit, start, end }))
1595    }
1596
1597    fn parse_frame_bound(&mut self) -> Result<FrameBound, String> {
1598        match &self.current_token {
1599            Token::Unbounded => {
1600                self.advance();
1601                match &self.current_token {
1602                    Token::Preceding => {
1603                        self.advance();
1604                        Ok(FrameBound::UnboundedPreceding)
1605                    }
1606                    Token::Following => {
1607                        self.advance();
1608                        Ok(FrameBound::UnboundedFollowing)
1609                    }
1610                    _ => Err("Expected PRECEDING or FOLLOWING after UNBOUNDED".to_string()),
1611                }
1612            }
1613            Token::Current => {
1614                self.advance();
1615                if matches!(&self.current_token, Token::Row) {
1616                    self.advance();
1617                    return Ok(FrameBound::CurrentRow);
1618                }
1619                Err("Expected ROW after CURRENT".to_string())
1620            }
1621            Token::NumberLiteral(num) => {
1622                let n: i64 = num
1623                    .parse()
1624                    .map_err(|_| "Invalid number in window frame".to_string())?;
1625                self.advance();
1626                match &self.current_token {
1627                    Token::Preceding => {
1628                        self.advance();
1629                        Ok(FrameBound::Preceding(n))
1630                    }
1631                    Token::Following => {
1632                        self.advance();
1633                        Ok(FrameBound::Following(n))
1634                    }
1635                    _ => Err("Expected PRECEDING or FOLLOWING after number".to_string()),
1636                }
1637            }
1638            _ => Err("Invalid window frame bound".to_string()),
1639        }
1640    }
1641
1642    fn parse_where_clause(&mut self) -> Result<WhereClause, String> {
1643        // Parse the entire WHERE clause as a single expression tree
1644        // The logical operators (AND/OR) are now handled within parse_expression
1645        let expr = self.parse_expression()?;
1646
1647        // Check for unexpected closing parenthesis
1648        if matches!(self.current_token, Token::RightParen) && self.paren_depth <= 0 {
1649            return Err(
1650                "Unexpected closing parenthesis - no matching opening parenthesis".to_string(),
1651            );
1652        }
1653
1654        // Create a single condition with the entire expression
1655        let conditions = vec![Condition {
1656            expr,
1657            connector: None,
1658        }];
1659
1660        Ok(WhereClause { conditions })
1661    }
1662
1663    fn parse_expression(&mut self) -> Result<SqlExpression, String> {
1664        self.trace_enter("parse_expression");
1665        // Start with logical OR as the lowest precedence operator
1666        // The hierarchy is: OR -> AND -> comparison -> additive -> multiplicative -> primary
1667        let mut left = self.parse_logical_or()?;
1668
1669        // Handle IN operator (not preceded by NOT)
1670        // This uses the modular comparison module
1671        left = parse_in_operator(self, left)?;
1672
1673        let result = Ok(left);
1674        self.trace_exit("parse_expression", &result);
1675        result
1676    }
1677
1678    fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
1679        // Use the new modular comparison expression parser
1680        parse_comparison_expr(self)
1681    }
1682
1683    fn parse_additive(&mut self) -> Result<SqlExpression, String> {
1684        // Use the new modular arithmetic expression parser
1685        parse_additive_expr(self)
1686    }
1687
1688    fn parse_multiplicative(&mut self) -> Result<SqlExpression, String> {
1689        // Use the new modular arithmetic expression parser
1690        parse_multiplicative_expr(self)
1691    }
1692
1693    fn parse_logical_or(&mut self) -> Result<SqlExpression, String> {
1694        // Use the new modular logical expression parser
1695        parse_logical_or_expr(self)
1696    }
1697
1698    fn parse_logical_and(&mut self) -> Result<SqlExpression, String> {
1699        // Use the new modular logical expression parser
1700        parse_logical_and_expr(self)
1701    }
1702
1703    fn parse_case_expression(&mut self) -> Result<SqlExpression, String> {
1704        // Use the new modular CASE expression parser
1705        parse_case_expr(self)
1706    }
1707
1708    fn parse_primary(&mut self) -> Result<SqlExpression, String> {
1709        // Use the new modular primary expression parser
1710        // Clone the necessary data to avoid borrowing issues
1711        let columns = self.columns.clone();
1712        let in_method_args = self.in_method_args;
1713        let ctx = PrimaryExpressionContext {
1714            columns: &columns,
1715            in_method_args,
1716        };
1717        parse_primary_expr(self, &ctx)
1718    }
1719
1720    // Keep the old implementation temporarily for reference (will be removed)
1721    fn parse_method_args(&mut self) -> Result<Vec<SqlExpression>, String> {
1722        // Set flag to indicate we're parsing method arguments
1723        self.in_method_args = true;
1724
1725        let args = self.parse_argument_list()?;
1726
1727        // Clear the flag
1728        self.in_method_args = false;
1729
1730        Ok(args)
1731    }
1732
1733    fn parse_function_args(&mut self) -> Result<(Vec<SqlExpression>, bool), String> {
1734        let mut args = Vec::new();
1735        let mut has_distinct = false;
1736
1737        if !matches!(self.current_token, Token::RightParen) {
1738            // Check if first argument starts with DISTINCT
1739            if matches!(self.current_token, Token::Distinct) {
1740                self.advance(); // consume DISTINCT
1741                has_distinct = true;
1742            }
1743
1744            // Parse full expressions as arguments — this allows comparisons and
1745            // boolean logic inside function calls, e.g. AVG(x > 5), SUM(a = 'b')
1746            args.push(self.parse_logical_or()?);
1747
1748            // Parse any remaining arguments (DISTINCT only applies to first arg for aggregates)
1749            while matches!(self.current_token, Token::Comma) {
1750                self.advance();
1751                args.push(self.parse_logical_or()?);
1752            }
1753        }
1754
1755        Ok((args, has_distinct))
1756    }
1757
1758    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
1759        let mut expressions = Vec::new();
1760
1761        loop {
1762            expressions.push(self.parse_expression()?);
1763
1764            if matches!(self.current_token, Token::Comma) {
1765                self.advance();
1766            } else {
1767                break;
1768            }
1769        }
1770
1771        Ok(expressions)
1772    }
1773
1774    #[must_use]
1775    pub fn get_position(&self) -> usize {
1776        self.lexer.get_position()
1777    }
1778
1779    // Check if current token is a JOIN-related token
1780    fn is_join_token(&self) -> bool {
1781        matches!(
1782            self.current_token,
1783            Token::Join | Token::Inner | Token::Left | Token::Right | Token::Full | Token::Cross
1784        )
1785    }
1786
1787    // Parse a JOIN clause
1788    fn parse_join_clause(&mut self) -> Result<JoinClause, String> {
1789        // Determine join type
1790        let join_type = match &self.current_token {
1791            Token::Join => {
1792                self.advance();
1793                JoinType::Inner // Default JOIN is INNER JOIN
1794            }
1795            Token::Inner => {
1796                self.advance();
1797                if !matches!(self.current_token, Token::Join) {
1798                    return Err("Expected JOIN after INNER".to_string());
1799                }
1800                self.advance();
1801                JoinType::Inner
1802            }
1803            Token::Left => {
1804                self.advance();
1805                // Handle optional OUTER keyword
1806                if matches!(self.current_token, Token::Outer) {
1807                    self.advance();
1808                }
1809                if !matches!(self.current_token, Token::Join) {
1810                    return Err("Expected JOIN after LEFT".to_string());
1811                }
1812                self.advance();
1813                JoinType::Left
1814            }
1815            Token::Right => {
1816                self.advance();
1817                // Handle optional OUTER keyword
1818                if matches!(self.current_token, Token::Outer) {
1819                    self.advance();
1820                }
1821                if !matches!(self.current_token, Token::Join) {
1822                    return Err("Expected JOIN after RIGHT".to_string());
1823                }
1824                self.advance();
1825                JoinType::Right
1826            }
1827            Token::Full => {
1828                self.advance();
1829                // Handle optional OUTER keyword
1830                if matches!(self.current_token, Token::Outer) {
1831                    self.advance();
1832                }
1833                if !matches!(self.current_token, Token::Join) {
1834                    return Err("Expected JOIN after FULL".to_string());
1835                }
1836                self.advance();
1837                JoinType::Full
1838            }
1839            Token::Cross => {
1840                self.advance();
1841                if !matches!(self.current_token, Token::Join) {
1842                    return Err("Expected JOIN after CROSS".to_string());
1843                }
1844                self.advance();
1845                JoinType::Cross
1846            }
1847            _ => return Err("Expected JOIN keyword".to_string()),
1848        };
1849
1850        // Parse the table being joined
1851        let (table, alias) = self.parse_join_table_source()?;
1852
1853        // Parse ON condition (required for all joins except CROSS JOIN)
1854        let condition = if join_type == JoinType::Cross {
1855            // CROSS JOIN doesn't have ON condition - create empty condition
1856            JoinCondition { conditions: vec![] }
1857        } else {
1858            if !matches!(self.current_token, Token::On) {
1859                return Err("Expected ON keyword after JOIN table".to_string());
1860            }
1861            self.advance();
1862            self.parse_join_condition()?
1863        };
1864
1865        Ok(JoinClause {
1866            join_type,
1867            table,
1868            alias,
1869            condition,
1870        })
1871    }
1872
1873    fn parse_join_table_source(&mut self) -> Result<(TableSource, Option<String>), String> {
1874        let table = match &self.current_token {
1875            Token::Identifier(name) => {
1876                let table_name = name.clone();
1877                self.advance();
1878                TableSource::Table(table_name)
1879            }
1880            Token::LeftParen => {
1881                // Subquery as table source
1882                self.advance();
1883                let subquery = self.parse_select_statement_inner()?;
1884                if !matches!(self.current_token, Token::RightParen) {
1885                    return Err("Expected ')' after subquery".to_string());
1886                }
1887                self.advance();
1888
1889                // Subqueries must have an alias
1890                let alias = match &self.current_token {
1891                    Token::Identifier(alias_name) => {
1892                        let alias = alias_name.clone();
1893                        self.advance();
1894                        alias
1895                    }
1896                    Token::As => {
1897                        self.advance();
1898                        match &self.current_token {
1899                            Token::Identifier(alias_name) => {
1900                                let alias = alias_name.clone();
1901                                self.advance();
1902                                alias
1903                            }
1904                            _ => return Err("Expected alias after AS keyword".to_string()),
1905                        }
1906                    }
1907                    _ => return Err("Subqueries must have an alias".to_string()),
1908                };
1909
1910                return Ok((
1911                    TableSource::DerivedTable {
1912                        query: Box::new(subquery),
1913                        alias: alias.clone(),
1914                    },
1915                    Some(alias),
1916                ));
1917            }
1918            _ => return Err("Expected table name or subquery in JOIN clause".to_string()),
1919        };
1920
1921        // Check for optional alias
1922        let alias = match &self.current_token {
1923            Token::Identifier(alias_name) => {
1924                let alias = alias_name.clone();
1925                self.advance();
1926                Some(alias)
1927            }
1928            Token::As => {
1929                self.advance();
1930                match &self.current_token {
1931                    Token::Identifier(alias_name) => {
1932                        let alias = alias_name.clone();
1933                        self.advance();
1934                        Some(alias)
1935                    }
1936                    _ => return Err("Expected alias after AS keyword".to_string()),
1937                }
1938            }
1939            _ => None,
1940        };
1941
1942        Ok((table, alias))
1943    }
1944
1945    fn parse_join_condition(&mut self) -> Result<JoinCondition, String> {
1946        let mut conditions = Vec::new();
1947
1948        // Parse first condition
1949        conditions.push(self.parse_single_join_condition()?);
1950
1951        // Parse additional conditions connected by AND
1952        while matches!(self.current_token, Token::And) {
1953            self.advance(); // consume AND
1954            conditions.push(self.parse_single_join_condition()?);
1955        }
1956
1957        Ok(JoinCondition { conditions })
1958    }
1959
1960    fn parse_single_join_condition(&mut self) -> Result<SingleJoinCondition, String> {
1961        // Parse left side as additive expression (stops before comparison operators)
1962        // This allows the comparison operator to be explicitly parsed by this function
1963        let left_expr = self.parse_additive()?;
1964
1965        // Parse operator
1966        let operator = match &self.current_token {
1967            Token::Equal => JoinOperator::Equal,
1968            Token::NotEqual => JoinOperator::NotEqual,
1969            Token::LessThan => JoinOperator::LessThan,
1970            Token::LessThanOrEqual => JoinOperator::LessThanOrEqual,
1971            Token::GreaterThan => JoinOperator::GreaterThan,
1972            Token::GreaterThanOrEqual => JoinOperator::GreaterThanOrEqual,
1973            _ => return Err("Expected comparison operator in JOIN condition".to_string()),
1974        };
1975        self.advance();
1976
1977        // Parse right side as additive expression (stops before comparison operators)
1978        let right_expr = self.parse_additive()?;
1979
1980        Ok(SingleJoinCondition {
1981            left_expr,
1982            operator,
1983            right_expr,
1984        })
1985    }
1986
1987    fn parse_column_reference(&mut self) -> Result<String, String> {
1988        match &self.current_token {
1989            Token::Identifier(name) => {
1990                let mut column_ref = name.clone();
1991                self.advance();
1992
1993                // Check for table.column notation
1994                if matches!(self.current_token, Token::Dot) {
1995                    self.advance();
1996                    match &self.current_token {
1997                        Token::Identifier(col_name) => {
1998                            column_ref.push('.');
1999                            column_ref.push_str(col_name);
2000                            self.advance();
2001                        }
2002                        _ => return Err("Expected column name after '.'".to_string()),
2003                    }
2004                }
2005
2006                Ok(column_ref)
2007            }
2008            _ => Err("Expected column reference".to_string()),
2009        }
2010    }
2011
2012    // ===== PIVOT Parsing =====
2013
2014    /// Parse a PIVOT clause after a table source
2015    /// Syntax: PIVOT (aggregate_function FOR pivot_column IN (value1, value2, ...))
2016    fn parse_pivot_clause(&mut self, source: TableSource) -> Result<TableSource, String> {
2017        // Consume PIVOT keyword
2018        self.consume(Token::Pivot)?;
2019
2020        // Consume opening parenthesis
2021        self.consume(Token::LeftParen)?;
2022
2023        // Parse aggregate function (e.g., MAX(AmountEaten))
2024        let aggregate = self.parse_pivot_aggregate()?;
2025
2026        // Parse FOR keyword
2027        self.consume(Token::For)?;
2028
2029        // Parse pivot column name
2030        let pivot_column = match &self.current_token {
2031            Token::Identifier(col) => {
2032                let column = col.clone();
2033                self.advance();
2034                column
2035            }
2036            Token::QuotedIdentifier(col) => {
2037                let column = col.clone();
2038                self.advance();
2039                column
2040            }
2041            _ => return Err("Expected column name after FOR in PIVOT".to_string()),
2042        };
2043
2044        // Parse IN keyword
2045        if !matches!(self.current_token, Token::In) {
2046            return Err("Expected IN keyword in PIVOT clause".to_string());
2047        }
2048        self.advance();
2049
2050        // Parse pivot values
2051        let pivot_values = self.parse_pivot_in_clause()?;
2052
2053        // Consume closing parenthesis for PIVOT
2054        self.consume(Token::RightParen)?;
2055
2056        // Check for optional alias
2057        let alias = self.parse_optional_alias()?;
2058
2059        Ok(TableSource::Pivot {
2060            source: Box::new(source),
2061            aggregate,
2062            pivot_column,
2063            pivot_values,
2064            alias,
2065        })
2066    }
2067
2068    /// Parse the aggregate function specification in PIVOT
2069    /// Example: MAX(AmountEaten), SUM(sales), COUNT(*)
2070    fn parse_pivot_aggregate(&mut self) -> Result<PivotAggregate, String> {
2071        // Parse aggregate function name
2072        let function = match &self.current_token {
2073            Token::Identifier(name) => {
2074                let func_name = name.to_uppercase();
2075                // Validate it's an aggregate function
2076                match func_name.as_str() {
2077                    "MAX" | "MIN" | "SUM" | "AVG" | "COUNT" => {
2078                        self.advance();
2079                        func_name
2080                    }
2081                    _ => {
2082                        return Err(format!(
2083                            "Expected aggregate function (MAX, MIN, SUM, AVG, COUNT), got {}",
2084                            func_name
2085                        ))
2086                    }
2087                }
2088            }
2089            _ => return Err("Expected aggregate function in PIVOT".to_string()),
2090        };
2091
2092        // Consume opening parenthesis
2093        self.consume(Token::LeftParen)?;
2094
2095        // Parse column name (or * for COUNT)
2096        let column = match &self.current_token {
2097            Token::Identifier(col) => {
2098                let column = col.clone();
2099                self.advance();
2100                column
2101            }
2102            Token::QuotedIdentifier(col) => {
2103                let column = col.clone();
2104                self.advance();
2105                column
2106            }
2107            Token::Star => {
2108                // COUNT(*) is allowed
2109                if function == "COUNT" {
2110                    self.advance();
2111                    "*".to_string()
2112                } else {
2113                    return Err(format!("Only COUNT can use *, not {}", function));
2114                }
2115            }
2116            _ => return Err("Expected column name in aggregate function".to_string()),
2117        };
2118
2119        // Consume closing parenthesis
2120        self.consume(Token::RightParen)?;
2121
2122        Ok(PivotAggregate { function, column })
2123    }
2124
2125    /// Parse the IN clause values in PIVOT
2126    /// Example: IN ('Sammich', 'Pickle', 'Apple')
2127    /// Returns vector of pivot values
2128    fn parse_pivot_in_clause(&mut self) -> Result<Vec<String>, String> {
2129        // Consume opening parenthesis
2130        self.consume(Token::LeftParen)?;
2131
2132        let mut values = Vec::new();
2133
2134        // Parse first value
2135        match &self.current_token {
2136            Token::StringLiteral(val) => {
2137                values.push(val.clone());
2138                self.advance();
2139            }
2140            Token::Identifier(val) => {
2141                // Allow unquoted identifiers as well
2142                values.push(val.clone());
2143                self.advance();
2144            }
2145            Token::NumberLiteral(val) => {
2146                // Allow numeric values
2147                values.push(val.clone());
2148                self.advance();
2149            }
2150            _ => return Err("Expected value in PIVOT IN clause".to_string()),
2151        }
2152
2153        // Parse additional values separated by commas
2154        while matches!(self.current_token, Token::Comma) {
2155            self.advance(); // consume comma
2156
2157            match &self.current_token {
2158                Token::StringLiteral(val) => {
2159                    values.push(val.clone());
2160                    self.advance();
2161                }
2162                Token::Identifier(val) => {
2163                    values.push(val.clone());
2164                    self.advance();
2165                }
2166                Token::NumberLiteral(val) => {
2167                    values.push(val.clone());
2168                    self.advance();
2169                }
2170                _ => return Err("Expected value after comma in PIVOT IN clause".to_string()),
2171            }
2172        }
2173
2174        // Consume closing parenthesis
2175        self.consume(Token::RightParen)?;
2176
2177        if values.is_empty() {
2178            return Err("PIVOT IN clause must have at least one value".to_string());
2179        }
2180
2181        Ok(values)
2182    }
2183}
2184
2185// Context detection for cursor position
2186#[derive(Debug, Clone)]
2187pub enum CursorContext {
2188    SelectClause,
2189    FromClause,
2190    WhereClause,
2191    OrderByClause,
2192    AfterColumn(String),
2193    AfterLogicalOp(LogicalOp),
2194    AfterComparisonOp(String, String), // column_name, operator
2195    InMethodCall(String, String),      // object, method
2196    InExpression,
2197    Unknown,
2198}
2199
2200/// Safe UTF-8 string slicing that ensures we don't slice in the middle of a character
2201fn safe_slice_to(s: &str, pos: usize) -> &str {
2202    if pos >= s.len() {
2203        return s;
2204    }
2205
2206    // Find the nearest valid character boundary at or before pos
2207    let mut safe_pos = pos;
2208    while safe_pos > 0 && !s.is_char_boundary(safe_pos) {
2209        safe_pos -= 1;
2210    }
2211
2212    &s[..safe_pos]
2213}
2214
2215/// Safe UTF-8 string slicing from a position to the end
2216fn safe_slice_from(s: &str, pos: usize) -> &str {
2217    if pos >= s.len() {
2218        return "";
2219    }
2220
2221    // Find the nearest valid character boundary at or after pos
2222    let mut safe_pos = pos;
2223    while safe_pos < s.len() && !s.is_char_boundary(safe_pos) {
2224        safe_pos += 1;
2225    }
2226
2227    &s[safe_pos..]
2228}
2229
2230#[must_use]
2231pub fn detect_cursor_context(query: &str, cursor_pos: usize) -> (CursorContext, Option<String>) {
2232    let truncated = safe_slice_to(query, cursor_pos);
2233    let mut parser = Parser::new(truncated);
2234
2235    // Try to parse as much as possible
2236    if let Ok(stmt) = parser.parse() {
2237        let (ctx, partial) = analyze_statement(&stmt, truncated, cursor_pos);
2238        #[cfg(test)]
2239        println!("analyze_statement returned: {ctx:?}, {partial:?} for query: '{truncated}'");
2240        (ctx, partial)
2241    } else {
2242        // Partial parse - analyze what we have
2243        let (ctx, partial) = analyze_partial(truncated, cursor_pos);
2244        #[cfg(test)]
2245        println!("analyze_partial returned: {ctx:?}, {partial:?} for query: '{truncated}'");
2246        (ctx, partial)
2247    }
2248}
2249
2250#[must_use]
2251pub fn tokenize_query(query: &str) -> Vec<String> {
2252    let mut lexer = Lexer::new(query);
2253    let tokens = lexer.tokenize_all();
2254    tokens.iter().map(|t| format!("{t:?}")).collect()
2255}
2256
2257#[must_use]
2258/// Helper function to find the start of a quoted string searching backwards
2259fn find_quote_start(bytes: &[u8], mut pos: usize) -> Option<usize> {
2260    // Skip the closing quote and search backwards
2261    if pos > 0 {
2262        pos -= 1;
2263        while pos > 0 {
2264            if bytes[pos] == b'"' {
2265                // Check if it's not an escaped quote
2266                if pos == 0 || bytes[pos - 1] != b'\\' {
2267                    return Some(pos);
2268                }
2269            }
2270            pos -= 1;
2271        }
2272        // Check position 0 separately
2273        if bytes[0] == b'"' {
2274            return Some(0);
2275        }
2276    }
2277    None
2278}
2279
2280/// Helper function to handle method call context after validation
2281fn handle_method_call_context(col_name: &str, after_dot: &str) -> (CursorContext, Option<String>) {
2282    // Check if there's a partial method name after the dot
2283    let partial_method = if after_dot.is_empty() {
2284        None
2285    } else if after_dot.chars().all(|c| c.is_alphanumeric() || c == '_') {
2286        Some(after_dot.to_string())
2287    } else {
2288        None
2289    };
2290
2291    // For AfterColumn context, strip quotes if present for consistency
2292    let col_name_for_context =
2293        if col_name.starts_with('"') && col_name.ends_with('"') && col_name.len() > 2 {
2294            col_name[1..col_name.len() - 1].to_string()
2295        } else {
2296            col_name.to_string()
2297        };
2298
2299    (
2300        CursorContext::AfterColumn(col_name_for_context),
2301        partial_method,
2302    )
2303}
2304
2305/// Helper function to check if we're after a comparison operator
2306fn check_after_comparison_operator(query: &str) -> Option<(CursorContext, Option<String>)> {
2307    for op in &Parser::COMPARISON_OPERATORS {
2308        if let Some(op_pos) = query.rfind(op) {
2309            let before_op = safe_slice_to(query, op_pos);
2310            let after_op_start = op_pos + op.len();
2311            let after_op = if after_op_start < query.len() {
2312                &query[after_op_start..]
2313            } else {
2314                ""
2315            };
2316
2317            // Check if we have a column name before the operator
2318            if let Some(col_name) = before_op.split_whitespace().last() {
2319                if col_name.chars().all(|c| c.is_alphanumeric() || c == '_') {
2320                    // Check if we're at or near the end of the query
2321                    let after_op_trimmed = after_op.trim();
2322                    if after_op_trimmed.is_empty()
2323                        || (after_op_trimmed
2324                            .chars()
2325                            .all(|c| c.is_alphanumeric() || c == '_')
2326                            && !after_op_trimmed.contains('('))
2327                    {
2328                        let partial = if after_op_trimmed.is_empty() {
2329                            None
2330                        } else {
2331                            Some(after_op_trimmed.to_string())
2332                        };
2333                        return Some((
2334                            CursorContext::AfterComparisonOp(
2335                                col_name.to_string(),
2336                                op.trim().to_string(),
2337                            ),
2338                            partial,
2339                        ));
2340                    }
2341                }
2342            }
2343        }
2344    }
2345    None
2346}
2347
2348fn analyze_statement(
2349    stmt: &SelectStatement,
2350    query: &str,
2351    _cursor_pos: usize,
2352) -> (CursorContext, Option<String>) {
2353    // First check for method call context (e.g., "columnName." or "columnName.Con")
2354    let trimmed = query.trim();
2355
2356    // Check if we're after a comparison operator (e.g., "createdDate > ")
2357    if let Some(result) = check_after_comparison_operator(query) {
2358        return result;
2359    }
2360
2361    // First check if we're after AND/OR - this takes precedence
2362    // Helper function to check if string ends with a logical operator
2363    let ends_with_logical_op = |s: &str| -> bool {
2364        let s_upper = s.to_uppercase();
2365        s_upper.ends_with(" AND") || s_upper.ends_with(" OR")
2366    };
2367
2368    if ends_with_logical_op(trimmed) {
2369        // Don't check for method context if we're clearly after a logical operator
2370    } else {
2371        // Look for the last dot in the query
2372        if let Some(dot_pos) = trimmed.rfind('.') {
2373            // Check if we're after a column name and dot
2374            let before_dot = safe_slice_to(trimmed, dot_pos);
2375            let after_dot_start = dot_pos + 1;
2376            let after_dot = if after_dot_start < trimmed.len() {
2377                &trimmed[after_dot_start..]
2378            } else {
2379                ""
2380            };
2381
2382            // Check if the part after dot looks like an incomplete method call
2383            // (not a complete method call like "Contains(...)")
2384            if !after_dot.contains('(') {
2385                // Try to extract the column name - could be quoted or regular
2386                let col_name = if before_dot.ends_with('"') {
2387                    // Handle quoted identifier - search backwards for matching opening quote
2388                    let bytes = before_dot.as_bytes();
2389                    let pos = before_dot.len() - 1; // Position of closing quote
2390
2391                    find_quote_start(bytes, pos).map(|start| safe_slice_from(before_dot, start))
2392                } else {
2393                    // Regular identifier - get the last word, handling parentheses
2394                    // Strip all leading parentheses
2395                    before_dot
2396                        .split_whitespace()
2397                        .last()
2398                        .map(|word| word.trim_start_matches('('))
2399                };
2400
2401                if let Some(col_name) = col_name {
2402                    // For quoted identifiers, keep the quotes, for regular identifiers check validity
2403                    let is_valid = Parser::is_valid_identifier(col_name);
2404
2405                    if is_valid {
2406                        return handle_method_call_context(col_name, after_dot);
2407                    }
2408                }
2409            }
2410        }
2411    }
2412
2413    // Check if we're in WHERE clause
2414    if let Some(where_clause) = &stmt.where_clause {
2415        // Check if query ends with AND/OR (with or without trailing space/partial)
2416        let trimmed_upper = trimmed.to_uppercase();
2417        if trimmed_upper.ends_with(" AND") || trimmed_upper.ends_with(" OR") {
2418            let op = if trimmed_upper.ends_with(" AND") {
2419                LogicalOp::And
2420            } else {
2421                LogicalOp::Or
2422            };
2423            return (CursorContext::AfterLogicalOp(op), None);
2424        }
2425
2426        // Check if we have AND/OR followed by a partial word
2427        let query_upper = query.to_uppercase();
2428        if let Some(and_pos) = query_upper.rfind(" AND ") {
2429            let after_and = safe_slice_from(query, and_pos + 5);
2430            let partial = extract_partial_at_end(after_and);
2431            if partial.is_some() {
2432                return (CursorContext::AfterLogicalOp(LogicalOp::And), partial);
2433            }
2434        }
2435
2436        if let Some(or_pos) = query_upper.rfind(" OR ") {
2437            let after_or = safe_slice_from(query, or_pos + 4);
2438            let partial = extract_partial_at_end(after_or);
2439            if partial.is_some() {
2440                return (CursorContext::AfterLogicalOp(LogicalOp::Or), partial);
2441            }
2442        }
2443
2444        if let Some(last_condition) = where_clause.conditions.last() {
2445            if let Some(connector) = &last_condition.connector {
2446                // We're after AND/OR
2447                return (
2448                    CursorContext::AfterLogicalOp(connector.clone()),
2449                    extract_partial_at_end(query),
2450                );
2451            }
2452        }
2453        // We're in WHERE clause but not after AND/OR
2454        return (CursorContext::WhereClause, extract_partial_at_end(query));
2455    }
2456
2457    // Check if we're after ORDER BY
2458    let query_upper = query.to_uppercase();
2459    if query_upper.ends_with(" ORDER BY") {
2460        return (CursorContext::OrderByClause, None);
2461    }
2462
2463    // Check other contexts based on what's in the statement
2464    if stmt.order_by.is_some() {
2465        return (CursorContext::OrderByClause, extract_partial_at_end(query));
2466    }
2467
2468    if stmt.from_table.is_some() && stmt.where_clause.is_none() && stmt.order_by.is_none() {
2469        return (CursorContext::FromClause, extract_partial_at_end(query));
2470    }
2471
2472    if !stmt.columns.is_empty() && stmt.from_table.is_none() {
2473        return (CursorContext::SelectClause, extract_partial_at_end(query));
2474    }
2475
2476    (CursorContext::Unknown, None)
2477}
2478
2479/// Helper function to find the last occurrence of a token type in the token stream
2480fn find_last_token(tokens: &[(usize, usize, Token)], target: &Token) -> Option<usize> {
2481    tokens
2482        .iter()
2483        .rposition(|(_, _, t)| t == target)
2484        .map(|idx| tokens[idx].0)
2485}
2486
2487/// Helper function to find the last occurrence of any matching token
2488fn find_last_matching_token<F>(
2489    tokens: &[(usize, usize, Token)],
2490    predicate: F,
2491) -> Option<(usize, &Token)>
2492where
2493    F: Fn(&Token) -> bool,
2494{
2495    tokens
2496        .iter()
2497        .rposition(|(_, _, t)| predicate(t))
2498        .map(|idx| (tokens[idx].0, &tokens[idx].2))
2499}
2500
2501/// Helper function to check if we're in a specific clause based on tokens
2502fn is_in_clause(
2503    tokens: &[(usize, usize, Token)],
2504    clause_token: Token,
2505    exclude_tokens: &[Token],
2506) -> bool {
2507    // Find the last occurrence of the clause token
2508    if let Some(clause_pos) = find_last_token(tokens, &clause_token) {
2509        // Check if any exclude tokens appear after it
2510        for (pos, _, token) in tokens.iter() {
2511            if *pos > clause_pos && exclude_tokens.contains(token) {
2512                return false;
2513            }
2514        }
2515        return true;
2516    }
2517    false
2518}
2519
2520fn analyze_partial(query: &str, cursor_pos: usize) -> (CursorContext, Option<String>) {
2521    // Tokenize the query up to cursor position
2522    let mut lexer = Lexer::new(query);
2523    let tokens = lexer.tokenize_all_with_positions();
2524
2525    let trimmed = query.trim();
2526
2527    #[cfg(test)]
2528    {
2529        if trimmed.contains("\"Last Name\"") {
2530            eprintln!("DEBUG analyze_partial: query='{query}', trimmed='{trimmed}'");
2531        }
2532    }
2533
2534    // Check if we're after a comparison operator (e.g., "createdDate > ")
2535    if let Some(result) = check_after_comparison_operator(query) {
2536        return result;
2537    }
2538
2539    // Look for the last dot in the query (method call context) - check this FIRST
2540    // before AND/OR detection to properly handle cases like "AND (Country."
2541    if let Some(dot_pos) = trimmed.rfind('.') {
2542        #[cfg(test)]
2543        {
2544            if trimmed.contains("\"Last Name\"") {
2545                eprintln!("DEBUG: Found dot at position {dot_pos}");
2546            }
2547        }
2548        // Check if we're after a column name and dot
2549        let before_dot = &trimmed[..dot_pos];
2550        let after_dot = &trimmed[dot_pos + 1..];
2551
2552        // Check if the part after dot looks like an incomplete method call
2553        // (not a complete method call like "Contains(...)")
2554        if !after_dot.contains('(') {
2555            // Try to extract the column name before the dot
2556            // It could be a quoted identifier like "Last Name" or a regular identifier
2557            let col_name = if before_dot.ends_with('"') {
2558                // Handle quoted identifier - search backwards for matching opening quote
2559                let bytes = before_dot.as_bytes();
2560                let pos = before_dot.len() - 1; // Position of closing quote
2561
2562                #[cfg(test)]
2563                {
2564                    if trimmed.contains("\"Last Name\"") {
2565                        eprintln!("DEBUG: before_dot='{before_dot}', looking for opening quote");
2566                    }
2567                }
2568
2569                let found_start = find_quote_start(bytes, pos);
2570
2571                if let Some(start) = found_start {
2572                    // Extract the full quoted identifier including quotes
2573                    let result = safe_slice_from(before_dot, start);
2574                    #[cfg(test)]
2575                    {
2576                        if trimmed.contains("\"Last Name\"") {
2577                            eprintln!("DEBUG: Extracted quoted identifier: '{result}'");
2578                        }
2579                    }
2580                    Some(result)
2581                } else {
2582                    #[cfg(test)]
2583                    {
2584                        if trimmed.contains("\"Last Name\"") {
2585                            eprintln!("DEBUG: No opening quote found!");
2586                        }
2587                    }
2588                    None
2589                }
2590            } else {
2591                // Regular identifier - get the last word, handling parentheses
2592                // Strip all leading parentheses
2593                before_dot
2594                    .split_whitespace()
2595                    .last()
2596                    .map(|word| word.trim_start_matches('('))
2597            };
2598
2599            if let Some(col_name) = col_name {
2600                #[cfg(test)]
2601                {
2602                    if trimmed.contains("\"Last Name\"") {
2603                        eprintln!("DEBUG: col_name = '{col_name}'");
2604                    }
2605                }
2606
2607                // For quoted identifiers, keep the quotes, for regular identifiers check validity
2608                let is_valid = Parser::is_valid_identifier(col_name);
2609
2610                #[cfg(test)]
2611                {
2612                    if trimmed.contains("\"Last Name\"") {
2613                        eprintln!("DEBUG: is_valid = {is_valid}");
2614                    }
2615                }
2616
2617                if is_valid {
2618                    return handle_method_call_context(col_name, after_dot);
2619                }
2620            }
2621        }
2622    }
2623
2624    // Check if we're after AND/OR using tokens - but only after checking for method calls
2625    if let Some((pos, token)) =
2626        find_last_matching_token(&tokens, |t| matches!(t, Token::And | Token::Or))
2627    {
2628        // Check if cursor is after the logical operator
2629        let token_end_pos = if matches!(token, Token::And) {
2630            pos + 3 // "AND" is 3 characters
2631        } else {
2632            pos + 2 // "OR" is 2 characters
2633        };
2634
2635        if cursor_pos > token_end_pos {
2636            // Extract any partial word after the operator
2637            let after_op = safe_slice_from(query, token_end_pos + 1); // +1 for the space
2638            let partial = extract_partial_at_end(after_op);
2639            let op = if matches!(token, Token::And) {
2640                LogicalOp::And
2641            } else {
2642                LogicalOp::Or
2643            };
2644            return (CursorContext::AfterLogicalOp(op), partial);
2645        }
2646    }
2647
2648    // Check if the last token is AND or OR (handles case where it's at the very end)
2649    if let Some((_, _, last_token)) = tokens.last() {
2650        if matches!(last_token, Token::And | Token::Or) {
2651            let op = if matches!(last_token, Token::And) {
2652                LogicalOp::And
2653            } else {
2654                LogicalOp::Or
2655            };
2656            return (CursorContext::AfterLogicalOp(op), None);
2657        }
2658    }
2659
2660    // Check if we're in ORDER BY clause using tokens
2661    if let Some(order_pos) = find_last_token(&tokens, &Token::OrderBy) {
2662        // Check if there's a BY token after ORDER
2663        let has_by = tokens
2664            .iter()
2665            .any(|(pos, _, t)| *pos > order_pos && matches!(t, Token::By));
2666        if has_by
2667            || tokens
2668                .last()
2669                .map_or(false, |(_, _, t)| matches!(t, Token::OrderBy))
2670        {
2671            return (CursorContext::OrderByClause, extract_partial_at_end(query));
2672        }
2673    }
2674
2675    // Check if we're in WHERE clause using tokens
2676    if is_in_clause(&tokens, Token::Where, &[Token::OrderBy, Token::GroupBy]) {
2677        return (CursorContext::WhereClause, extract_partial_at_end(query));
2678    }
2679
2680    // Check if we're in FROM clause using tokens
2681    if is_in_clause(
2682        &tokens,
2683        Token::From,
2684        &[Token::Where, Token::OrderBy, Token::GroupBy],
2685    ) {
2686        return (CursorContext::FromClause, extract_partial_at_end(query));
2687    }
2688
2689    // Check if we're in SELECT clause using tokens
2690    if find_last_token(&tokens, &Token::Select).is_some()
2691        && find_last_token(&tokens, &Token::From).is_none()
2692    {
2693        return (CursorContext::SelectClause, extract_partial_at_end(query));
2694    }
2695
2696    (CursorContext::Unknown, None)
2697}
2698
2699fn extract_partial_at_end(query: &str) -> Option<String> {
2700    let trimmed = query.trim();
2701
2702    // First check if the last word itself starts with a quote (unclosed quoted identifier being typed)
2703    if let Some(last_word) = trimmed.split_whitespace().last() {
2704        if last_word.starts_with('"') && !last_word.ends_with('"') {
2705            // This is an unclosed quoted identifier like "Cust
2706            return Some(last_word.to_string());
2707        }
2708    }
2709
2710    // Regular identifier extraction
2711    let last_word = trimmed.split_whitespace().last()?;
2712
2713    // Check if it's a partial identifier (not a keyword or operator)
2714    // First check if it's alphanumeric (potential identifier)
2715    if last_word.chars().all(|c| c.is_alphanumeric() || c == '_') {
2716        // Use lexer to determine if it's a keyword or identifier
2717        if !is_sql_keyword(last_word) {
2718            Some(last_word.to_string())
2719        } else {
2720            None
2721        }
2722    } else {
2723        None
2724    }
2725}
2726
2727// Implement the ParsePrimary trait for Parser to use the modular expression parsing
2728impl ParsePrimary for Parser {
2729    fn current_token(&self) -> &Token {
2730        &self.current_token
2731    }
2732
2733    fn advance(&mut self) {
2734        self.advance();
2735    }
2736
2737    fn consume(&mut self, expected: Token) -> Result<(), String> {
2738        self.consume(expected)
2739    }
2740
2741    fn parse_case_expression(&mut self) -> Result<SqlExpression, String> {
2742        self.parse_case_expression()
2743    }
2744
2745    fn parse_function_args(&mut self) -> Result<(Vec<SqlExpression>, bool), String> {
2746        self.parse_function_args()
2747    }
2748
2749    fn parse_window_spec(&mut self) -> Result<WindowSpec, String> {
2750        self.parse_window_spec()
2751    }
2752
2753    fn parse_logical_or(&mut self) -> Result<SqlExpression, String> {
2754        self.parse_logical_or()
2755    }
2756
2757    fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
2758        self.parse_comparison()
2759    }
2760
2761    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2762        self.parse_expression_list()
2763    }
2764
2765    fn parse_subquery(&mut self) -> Result<SelectStatement, String> {
2766        // Parse subquery without parenthesis balance validation
2767        if matches!(self.current_token, Token::With) {
2768            self.parse_with_clause_inner()
2769        } else {
2770            self.parse_select_statement_inner()
2771        }
2772    }
2773}
2774
2775// Implement the ExpressionParser trait for Parser to use the modular expression parsing
2776impl ExpressionParser for Parser {
2777    fn current_token(&self) -> &Token {
2778        &self.current_token
2779    }
2780
2781    fn advance(&mut self) {
2782        // Call the main advance method directly to avoid recursion
2783        match &self.current_token {
2784            Token::LeftParen => self.paren_depth += 1,
2785            Token::RightParen => {
2786                self.paren_depth -= 1;
2787            }
2788            _ => {}
2789        }
2790        self.current_token = self.lexer.next_token();
2791    }
2792
2793    fn peek(&self) -> Option<&Token> {
2794        // We can't return a reference to a token from a temporary lexer,
2795        // so we need a different approach. For now, let's use a workaround
2796        // that checks the next token type without consuming it.
2797        // This is a limitation of the current design.
2798        // A proper fix would be to store the peeked token in the Parser struct.
2799        None // TODO: Implement proper lookahead
2800    }
2801
2802    fn is_at_end(&self) -> bool {
2803        matches!(self.current_token, Token::Eof)
2804    }
2805
2806    fn consume(&mut self, expected: Token) -> Result<(), String> {
2807        // Call the main consume method to avoid recursion
2808        if std::mem::discriminant(&self.current_token) == std::mem::discriminant(&expected) {
2809            self.update_paren_depth(&expected)?;
2810            self.current_token = self.lexer.next_token();
2811            Ok(())
2812        } else {
2813            Err(format!(
2814                "Expected {:?}, found {:?}",
2815                expected, self.current_token
2816            ))
2817        }
2818    }
2819
2820    fn parse_identifier(&mut self) -> Result<String, String> {
2821        if let Token::Identifier(id) = &self.current_token {
2822            let id = id.clone();
2823            self.advance();
2824            Ok(id)
2825        } else {
2826            Err(format!(
2827                "Expected identifier, found {:?}",
2828                self.current_token
2829            ))
2830        }
2831    }
2832}
2833
2834// Implement the ParseArithmetic trait for Parser to use the modular arithmetic parsing
2835impl ParseArithmetic for Parser {
2836    fn current_token(&self) -> &Token {
2837        &self.current_token
2838    }
2839
2840    fn advance(&mut self) {
2841        self.advance();
2842    }
2843
2844    fn consume(&mut self, expected: Token) -> Result<(), String> {
2845        self.consume(expected)
2846    }
2847
2848    fn parse_primary(&mut self) -> Result<SqlExpression, String> {
2849        self.parse_primary()
2850    }
2851
2852    fn parse_multiplicative(&mut self) -> Result<SqlExpression, String> {
2853        self.parse_multiplicative()
2854    }
2855
2856    fn parse_method_args(&mut self) -> Result<Vec<SqlExpression>, String> {
2857        self.parse_method_args()
2858    }
2859}
2860
2861// Implement the ParseComparison trait for Parser to use the modular comparison parsing
2862impl ParseComparison for Parser {
2863    fn current_token(&self) -> &Token {
2864        &self.current_token
2865    }
2866
2867    fn advance(&mut self) {
2868        self.advance();
2869    }
2870
2871    fn consume(&mut self, expected: Token) -> Result<(), String> {
2872        self.consume(expected)
2873    }
2874
2875    fn parse_primary(&mut self) -> Result<SqlExpression, String> {
2876        self.parse_primary()
2877    }
2878
2879    fn parse_additive(&mut self) -> Result<SqlExpression, String> {
2880        self.parse_additive()
2881    }
2882
2883    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2884        self.parse_expression_list()
2885    }
2886
2887    fn parse_subquery(&mut self) -> Result<SelectStatement, String> {
2888        // Parse subquery without parenthesis balance validation
2889        if matches!(self.current_token, Token::With) {
2890            self.parse_with_clause_inner()
2891        } else {
2892            self.parse_select_statement_inner()
2893        }
2894    }
2895}
2896
2897// Implement the ParseLogical trait for Parser to use the modular logical parsing
2898impl ParseLogical for Parser {
2899    fn current_token(&self) -> &Token {
2900        &self.current_token
2901    }
2902
2903    fn advance(&mut self) {
2904        self.advance();
2905    }
2906
2907    fn consume(&mut self, expected: Token) -> Result<(), String> {
2908        self.consume(expected)
2909    }
2910
2911    fn parse_logical_and(&mut self) -> Result<SqlExpression, String> {
2912        self.parse_logical_and()
2913    }
2914
2915    fn parse_base_logical_expression(&mut self) -> Result<SqlExpression, String> {
2916        // This is the base for logical AND - it should parse comparison expressions
2917        // to avoid infinite recursion with parse_expression
2918        self.parse_comparison()
2919    }
2920
2921    fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
2922        self.parse_comparison()
2923    }
2924
2925    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2926        self.parse_expression_list()
2927    }
2928}
2929
2930// Implement the ParseCase trait for Parser to use the modular CASE parsing
2931impl ParseCase for Parser {
2932    fn current_token(&self) -> &Token {
2933        &self.current_token
2934    }
2935
2936    fn advance(&mut self) {
2937        self.advance();
2938    }
2939
2940    fn consume(&mut self, expected: Token) -> Result<(), String> {
2941        self.consume(expected)
2942    }
2943
2944    fn parse_expression(&mut self) -> Result<SqlExpression, String> {
2945        self.parse_expression()
2946    }
2947}
2948
2949fn is_sql_keyword(word: &str) -> bool {
2950    // Use the lexer to check if this word produces a keyword token
2951    let mut lexer = Lexer::new(word);
2952    let token = lexer.next_token();
2953
2954    // Check if it's a keyword token (not an identifier)
2955    !matches!(token, Token::Identifier(_) | Token::Eof)
2956}
2957
2958#[cfg(test)]
2959mod tests {
2960    use super::*;
2961
2962    /// Test that Parser::new() defaults to Standard mode (backward compatible)
2963    #[test]
2964    fn test_parser_mode_default_is_standard() {
2965        let sql = "-- Leading comment\nSELECT * FROM users";
2966        let mut parser = Parser::new(sql);
2967        let stmt = parser.parse().unwrap();
2968
2969        // In Standard mode, comments should be empty
2970        assert!(stmt.leading_comments.is_empty());
2971        assert!(stmt.trailing_comment.is_none());
2972    }
2973
2974    /// Test that PreserveComments mode collects leading comments
2975    #[test]
2976    fn test_parser_mode_preserve_leading_comments() {
2977        let sql = "-- Important query\n-- Author: Alice\nSELECT id, name FROM users";
2978        let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2979        let stmt = parser.parse().unwrap();
2980
2981        // Should have 2 leading comments
2982        assert_eq!(stmt.leading_comments.len(), 2);
2983        assert!(stmt.leading_comments[0].is_line_comment);
2984        assert!(stmt.leading_comments[0].text.contains("Important query"));
2985        assert!(stmt.leading_comments[1].text.contains("Author: Alice"));
2986    }
2987
2988    /// Test that PreserveComments mode collects trailing comments
2989    #[test]
2990    fn test_parser_mode_preserve_trailing_comment() {
2991        let sql = "SELECT * FROM users -- Fetch all users";
2992        let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2993        let stmt = parser.parse().unwrap();
2994
2995        // Should have trailing comment
2996        assert!(stmt.trailing_comment.is_some());
2997        let comment = stmt.trailing_comment.unwrap();
2998        assert!(comment.is_line_comment);
2999        assert!(comment.text.contains("Fetch all users"));
3000    }
3001
3002    /// Test that PreserveComments mode handles block comments
3003    #[test]
3004    fn test_parser_mode_preserve_block_comments() {
3005        let sql = "/* Query explanation */\nSELECT * FROM users";
3006        let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
3007        let stmt = parser.parse().unwrap();
3008
3009        // Should have leading block comment
3010        assert_eq!(stmt.leading_comments.len(), 1);
3011        assert!(!stmt.leading_comments[0].is_line_comment); // It's a block comment
3012        assert!(stmt.leading_comments[0].text.contains("Query explanation"));
3013    }
3014
3015    /// Test that PreserveComments mode collects both leading and trailing
3016    #[test]
3017    fn test_parser_mode_preserve_both_comments() {
3018        let sql = "-- Leading\nSELECT * FROM users -- Trailing";
3019        let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
3020        let stmt = parser.parse().unwrap();
3021
3022        // Should have both
3023        assert_eq!(stmt.leading_comments.len(), 1);
3024        assert!(stmt.leading_comments[0].text.contains("Leading"));
3025        assert!(stmt.trailing_comment.is_some());
3026        assert!(stmt.trailing_comment.unwrap().text.contains("Trailing"));
3027    }
3028
3029    /// Test that Standard mode has zero performance overhead (no comment parsing)
3030    #[test]
3031    fn test_parser_mode_standard_ignores_comments() {
3032        let sql = "-- Comment 1\n/* Comment 2 */\nSELECT * FROM users -- Comment 3";
3033        let mut parser = Parser::with_mode(sql, ParserMode::Standard);
3034        let stmt = parser.parse().unwrap();
3035
3036        // Comments should be completely ignored
3037        assert!(stmt.leading_comments.is_empty());
3038        assert!(stmt.trailing_comment.is_none());
3039
3040        // But query should still parse correctly
3041        assert_eq!(stmt.select_items.len(), 1);
3042        assert_eq!(stmt.from_table, Some("users".to_string()));
3043    }
3044
3045    /// Test backward compatibility - existing code using Parser::new() unchanged
3046    #[test]
3047    fn test_parser_backward_compatibility() {
3048        let sql = "SELECT id, name FROM users WHERE active = true";
3049
3050        // Old way (still works, defaults to Standard mode)
3051        let mut parser1 = Parser::new(sql);
3052        let stmt1 = parser1.parse().unwrap();
3053
3054        // Explicit Standard mode (same behavior)
3055        let mut parser2 = Parser::with_mode(sql, ParserMode::Standard);
3056        let stmt2 = parser2.parse().unwrap();
3057
3058        // Both should produce identical ASTs (comments are empty in both)
3059        assert_eq!(stmt1.select_items.len(), stmt2.select_items.len());
3060        assert_eq!(stmt1.from_table, stmt2.from_table);
3061        assert_eq!(stmt1.where_clause.is_some(), stmt2.where_clause.is_some());
3062        assert!(stmt1.leading_comments.is_empty());
3063        assert!(stmt2.leading_comments.is_empty());
3064    }
3065
3066    /// Test PIVOT parsing - currently returns error as execution is not implemented
3067    #[test]
3068    fn test_pivot_parsing_not_yet_supported() {
3069        let sql = "SELECT * FROM food_eaten PIVOT (MAX(AmountEaten) FOR FoodName IN ('Sammich', 'Pickle', 'Apple'))";
3070        let mut parser = Parser::new(sql);
3071        let result = parser.parse();
3072
3073        // PIVOT is now fully supported! Verify parsing succeeds
3074        assert!(result.is_ok());
3075        let stmt = result.unwrap();
3076
3077        // Verify from_source contains a PIVOT
3078        assert!(stmt.from_source.is_some());
3079        if let Some(crate::sql::parser::ast::TableSource::Pivot { .. }) = stmt.from_source {
3080            // Success!
3081        } else {
3082            panic!("Expected from_source to be a Pivot variant");
3083        }
3084    }
3085
3086    /// Test PIVOT syntax with different aggregate functions
3087    #[test]
3088    fn test_pivot_aggregate_functions() {
3089        // Test with SUM - PIVOT is now fully supported!
3090        let sql = "SELECT * FROM sales PIVOT (SUM(amount) FOR month IN ('Jan', 'Feb', 'Mar'))";
3091        let mut parser = Parser::new(sql);
3092        let result = parser.parse();
3093        assert!(result.is_ok());
3094
3095        // Test with COUNT
3096        let sql2 = "SELECT * FROM sales PIVOT (COUNT(*) FOR month IN ('Jan', 'Feb'))";
3097        let mut parser2 = Parser::new(sql2);
3098        let result2 = parser2.parse();
3099        assert!(result2.is_ok());
3100
3101        // Test with AVG
3102        let sql3 = "SELECT * FROM sales PIVOT (AVG(price) FOR category IN ('A', 'B'))";
3103        let mut parser3 = Parser::new(sql3);
3104        let result3 = parser3.parse();
3105        assert!(result3.is_ok());
3106    }
3107
3108    /// Test PIVOT with subquery source
3109    #[test]
3110    fn test_pivot_with_subquery() {
3111        let sql = "SELECT * FROM (SELECT * FROM food_eaten WHERE Id > 5) AS t \
3112                   PIVOT (MAX(AmountEaten) FOR FoodName IN ('Sammich', 'Pickle'))";
3113        let mut parser = Parser::new(sql);
3114        let result = parser.parse();
3115
3116        // PIVOT with subquery is now fully supported!
3117        assert!(result.is_ok());
3118        let stmt = result.unwrap();
3119        assert!(stmt.from_source.is_some());
3120    }
3121
3122    /// Test PIVOT with alias
3123    #[test]
3124    fn test_pivot_with_alias() {
3125        let sql =
3126            "SELECT * FROM sales PIVOT (SUM(amount) FOR month IN ('Jan', 'Feb')) AS pivot_table";
3127        let mut parser = Parser::new(sql);
3128        let result = parser.parse();
3129
3130        // PIVOT with alias is now fully supported!
3131        assert!(result.is_ok());
3132        let stmt = result.unwrap();
3133        assert!(stmt.from_source.is_some());
3134    }
3135
3136    /// Pull the WebCTESpec out of a parsed top-level statement that uses a
3137    /// single WEB CTE. Test helper.
3138    fn extract_web_spec(
3139        stmt: &crate::sql::parser::ast::SelectStatement,
3140    ) -> &crate::sql::parser::ast::WebCTESpec {
3141        use crate::sql::parser::ast::CTEType;
3142        assert!(!stmt.ctes.is_empty(), "statement should have CTEs");
3143        match &stmt.ctes[0].cte_type {
3144            CTEType::Web(spec) => spec,
3145            other => panic!("expected Web CTE, got {:?}", other),
3146        }
3147    }
3148
3149    #[test]
3150    fn test_web_cte_delimiter_pipe() {
3151        let sql = "WITH WEB foo AS (URL 'file:///tmp/missing.dat' FORMAT CSV DELIMITER '|') \
3152                   SELECT * FROM foo";
3153        let mut parser = Parser::new(sql);
3154        let stmt = parser.parse().expect("parse failed");
3155        let spec = extract_web_spec(&stmt);
3156        assert_eq!(spec.delimiter, Some(b'|'));
3157    }
3158
3159    #[test]
3160    fn test_web_cte_delimiter_tab_via_escape() {
3161        let sql = "WITH WEB foo AS (URL 'file:///tmp/missing.dat' FORMAT CSV DELIMITER '\\t') \
3162                   SELECT * FROM foo";
3163        let mut parser = Parser::new(sql);
3164        let stmt = parser.parse().expect("parse failed");
3165        let spec = extract_web_spec(&stmt);
3166        assert_eq!(spec.delimiter, Some(b'\t'));
3167    }
3168
3169    #[test]
3170    fn test_web_cte_no_delimiter_defaults_to_none() {
3171        let sql = "WITH WEB foo AS (URL 'file:///tmp/missing.dat' FORMAT CSV) SELECT * FROM foo";
3172        let mut parser = Parser::new(sql);
3173        let stmt = parser.parse().expect("parse failed");
3174        let spec = extract_web_spec(&stmt);
3175        assert!(spec.delimiter.is_none());
3176    }
3177
3178    #[test]
3179    fn test_web_cte_delimiter_rejects_multi_char() {
3180        let sql = "WITH WEB foo AS (URL 'file:///tmp/missing.dat' FORMAT CSV DELIMITER '||') \
3181                   SELECT * FROM foo";
3182        let mut parser = Parser::new(sql);
3183        let err = parser.parse().unwrap_err();
3184        let msg = err.to_string();
3185        assert!(
3186            msg.contains("DELIMITER") || msg.contains("single ASCII"),
3187            "should reject multi-char delimiter: {}",
3188            msg
3189        );
3190    }
3191}