Skip to main content

sql_cli/sql/
recursive_parser.rs

1// Keep chrono imports for the parser implementation
2
3// Re-exports for backward compatibility - these serve as both imports and re-exports
4pub use super::parser::ast::{
5    CTEType, Comment, Condition, DataFormat, FileCTESpec, FrameBound, FrameUnit, HttpMethod,
6    IntoTable, JoinClause, JoinCondition, JoinOperator, JoinType, LogicalOp, OrderByColumn,
7    OrderByItem, PivotAggregate, SelectItem, SelectStatement, SetOperation, SingleJoinCondition,
8    SortDirection, SqlExpression, TableFunction, TableSource, WebCTESpec, WhenBranch, WhereClause,
9    WindowFrame, WindowSpec, CTE,
10};
11pub use super::parser::legacy::{ParseContext, ParseState, Schema, SqlParser, SqlToken, TableInfo};
12pub use super::parser::lexer::{Lexer, LexerMode, Token};
13pub use super::parser::ParserConfig;
14
15// Re-export formatting functions for backward compatibility
16pub use super::parser::formatter::{format_ast_tree, format_sql_pretty, format_sql_pretty_compact};
17
18// New AST-based formatter
19pub use super::parser::ast_formatter::{format_sql_ast, format_sql_ast_with_config, FormatConfig};
20
21// Import the new expression modules
22use super::parser::expressions::arithmetic::{
23    parse_additive as parse_additive_expr, parse_multiplicative as parse_multiplicative_expr,
24    ParseArithmetic,
25};
26use super::parser::expressions::case::{parse_case_expression as parse_case_expr, ParseCase};
27use super::parser::expressions::comparison::{
28    parse_comparison as parse_comparison_expr, parse_in_operator, ParseComparison,
29};
30use super::parser::expressions::logical::{
31    parse_logical_and as parse_logical_and_expr, parse_logical_or as parse_logical_or_expr,
32    ParseLogical,
33};
34use super::parser::expressions::primary::{
35    parse_primary as parse_primary_expr, ParsePrimary, PrimaryExpressionContext,
36};
37use super::parser::expressions::ExpressionParser;
38
39// Import function registry to check for function existence
40use crate::sql::functions::{FunctionCategory, FunctionRegistry};
41use crate::sql::generators::GeneratorRegistry;
42use std::sync::Arc;
43
44// Import Web CTE parser
45use super::parser::file_cte_parser::FileCteParser;
46use super::parser::web_cte_parser::WebCteParser;
47
48/// Parser mode - controls whether comments are preserved in AST
49#[derive(Debug, Clone, Copy, PartialEq)]
50pub enum ParserMode {
51    /// Standard parsing - skip comments (current behavior, backward compatible)
52    Standard,
53    /// Preserve comments in AST (opt-in for formatters)
54    PreserveComments,
55}
56
57impl Default for ParserMode {
58    fn default() -> Self {
59        ParserMode::Standard
60    }
61}
62
63pub struct Parser {
64    lexer: Lexer,
65    pub current_token: Token,    // Made public for web_cte_parser access
66    in_method_args: bool,        // Track if we're parsing method arguments
67    columns: Vec<String>,        // Known column names for context-aware parsing
68    paren_depth: i32,            // Track parentheses nesting depth
69    paren_depth_stack: Vec<i32>, // Stack to save/restore paren depth for nested contexts
70    _config: ParserConfig,       // Parser configuration including case sensitivity
71    debug_trace: bool,           // Enable detailed token-by-token trace
72    trace_depth: usize,          // Track recursion depth for indented trace
73    function_registry: Arc<FunctionRegistry>, // Function registry for validation
74    generator_registry: Arc<GeneratorRegistry>, // Generator registry for table functions
75    mode: ParserMode,            // Parser mode for comment preservation
76}
77
78impl Parser {
79    #[must_use]
80    pub fn new(input: &str) -> Self {
81        Self::with_mode(input, ParserMode::default())
82    }
83
84    /// Create a new parser with explicit mode for comment preservation
85    #[must_use]
86    pub fn with_mode(input: &str, mode: ParserMode) -> Self {
87        // Choose lexer mode based on parser mode
88        let lexer_mode = match mode {
89            ParserMode::Standard => LexerMode::SkipComments,
90            ParserMode::PreserveComments => LexerMode::PreserveComments,
91        };
92
93        let mut lexer = Lexer::with_mode(input, lexer_mode);
94        let current_token = lexer.next_token();
95        Self {
96            lexer,
97            current_token,
98            in_method_args: false,
99            columns: Vec::new(),
100            paren_depth: 0,
101            paren_depth_stack: Vec::new(),
102            _config: ParserConfig::default(),
103            debug_trace: false,
104            trace_depth: 0,
105            function_registry: Arc::new(FunctionRegistry::new()),
106            generator_registry: Arc::new(GeneratorRegistry::new()),
107            mode,
108        }
109    }
110
111    #[must_use]
112    pub fn with_config(input: &str, config: ParserConfig) -> Self {
113        let mut lexer = Lexer::new(input);
114        let current_token = lexer.next_token();
115        Self {
116            lexer,
117            current_token,
118            in_method_args: false,
119            columns: Vec::new(),
120            paren_depth: 0,
121            paren_depth_stack: Vec::new(),
122            _config: config,
123            debug_trace: false,
124            trace_depth: 0,
125            function_registry: Arc::new(FunctionRegistry::new()),
126            generator_registry: Arc::new(GeneratorRegistry::new()),
127            mode: ParserMode::default(),
128        }
129    }
130
131    #[must_use]
132    pub fn with_columns(mut self, columns: Vec<String>) -> Self {
133        self.columns = columns;
134        self
135    }
136
137    #[must_use]
138    pub fn with_debug_trace(mut self, enabled: bool) -> Self {
139        self.debug_trace = enabled;
140        self
141    }
142
143    #[must_use]
144    pub fn with_function_registry(mut self, registry: Arc<FunctionRegistry>) -> Self {
145        self.function_registry = registry;
146        self
147    }
148
149    #[must_use]
150    pub fn with_generator_registry(mut self, registry: Arc<GeneratorRegistry>) -> Self {
151        self.generator_registry = registry;
152        self
153    }
154
155    fn trace_enter(&mut self, context: &str) {
156        if self.debug_trace {
157            let indent = "  ".repeat(self.trace_depth);
158            eprintln!("{}→ {} | Token: {:?}", indent, context, self.current_token);
159            self.trace_depth += 1;
160        }
161    }
162
163    fn trace_exit(&mut self, context: &str, result: &Result<impl std::fmt::Debug, String>) {
164        if self.debug_trace {
165            self.trace_depth = self.trace_depth.saturating_sub(1);
166            let indent = "  ".repeat(self.trace_depth);
167            match result {
168                Ok(val) => eprintln!("{}← {} ✓ | Result: {:?}", indent, context, val),
169                Err(e) => eprintln!("{}← {} ✗ | Error: {}", indent, context, e),
170            }
171        }
172    }
173
174    fn trace_token(&self, action: &str) {
175        if self.debug_trace {
176            let indent = "  ".repeat(self.trace_depth);
177            eprintln!("{}  {} | Token: {:?}", indent, action, self.current_token);
178        }
179    }
180
181    #[allow(dead_code)]
182    fn peek_token(&self) -> Option<Token> {
183        // Alternative peek that returns owned token
184        let mut temp_lexer = self.lexer.clone();
185        let next_token = temp_lexer.next_token();
186        if matches!(next_token, Token::Eof) {
187            None
188        } else {
189            Some(next_token)
190        }
191    }
192
193    /// Check if current token is one of the reserved keywords that should stop parsing
194    /// Check if an identifier string is a reserved keyword (for backward compatibility)
195    /// This is used when the lexer hasn't properly tokenized keywords and they come through
196    /// as Token::Identifier instead of their proper token types
197    fn is_identifier_reserved(id: &str) -> bool {
198        let id_upper = id.to_uppercase();
199        matches!(
200            id_upper.as_str(),
201            "ORDER" | "HAVING" | "LIMIT" | "OFFSET" | "UNION" | "INTERSECT" | "EXCEPT"
202        )
203    }
204
205    /// Get comparison operator string representation (for autocomplete context)
206    const COMPARISON_OPERATORS: [&'static str; 6] = [" > ", " < ", " >= ", " <= ", " = ", " != "];
207
208    pub fn consume(&mut self, expected: Token) -> Result<(), String> {
209        self.trace_token(&format!("Consuming expected {:?}", expected));
210        if std::mem::discriminant(&self.current_token) == std::mem::discriminant(&expected) {
211            // Track parentheses depth
212            self.update_paren_depth(&expected)?;
213
214            self.current_token = self.lexer.next_token();
215            Ok(())
216        } else {
217            // Provide better error messages for common cases
218            let error_msg = match (&expected, &self.current_token) {
219                (Token::RightParen, Token::Eof) if self.paren_depth > 0 => {
220                    format!(
221                        "Unclosed parenthesis - missing {} closing parenthes{}",
222                        self.paren_depth,
223                        if self.paren_depth == 1 { "is" } else { "es" }
224                    )
225                }
226                (Token::RightParen, _) if self.paren_depth > 0 => {
227                    format!(
228                        "Expected closing parenthesis but found {:?} (currently {} unclosed parenthes{})",
229                        self.current_token,
230                        self.paren_depth,
231                        if self.paren_depth == 1 { "is" } else { "es" }
232                    )
233                }
234                _ => format!("Expected {:?}, found {:?}", expected, self.current_token),
235            };
236            Err(error_msg)
237        }
238    }
239
240    pub fn advance(&mut self) {
241        // Track parentheses depth when advancing
242        match &self.current_token {
243            Token::LeftParen => self.paren_depth += 1,
244            Token::RightParen => {
245                self.paren_depth -= 1;
246                // Note: We don't check for < 0 here because advance() is used
247                // in contexts where we're not necessarily expecting a right paren
248            }
249            _ => {}
250        }
251        let old_token = self.current_token.clone();
252        self.current_token = self.lexer.next_token();
253        if self.debug_trace {
254            let indent = "  ".repeat(self.trace_depth);
255            eprintln!(
256                "{}  Advanced: {:?} → {:?}",
257                indent, old_token, self.current_token
258            );
259        }
260    }
261
262    /// Collect all leading comments before a SQL construct
263    /// This consumes comment tokens and returns them as a Vec<Comment>
264    fn collect_leading_comments(&mut self) -> Vec<Comment> {
265        let mut comments = Vec::new();
266        loop {
267            match &self.current_token {
268                Token::LineComment(text) => {
269                    comments.push(Comment::line(text.clone()));
270                    self.advance();
271                }
272                Token::BlockComment(text) => {
273                    comments.push(Comment::block(text.clone()));
274                    self.advance();
275                }
276                _ => break,
277            }
278        }
279        comments
280    }
281
282    /// Collect a trailing inline comment (on the same line)
283    /// This consumes a single comment token if present
284    fn collect_trailing_comment(&mut self) -> Option<Comment> {
285        match &self.current_token {
286            Token::LineComment(text) => {
287                let comment = Some(Comment::line(text.clone()));
288                self.advance();
289                comment
290            }
291            Token::BlockComment(text) => {
292                let comment = Some(Comment::block(text.clone()));
293                self.advance();
294                comment
295            }
296            _ => None,
297        }
298    }
299
300    fn push_paren_depth(&mut self) {
301        self.paren_depth_stack.push(self.paren_depth);
302        self.paren_depth = 0;
303    }
304
305    fn pop_paren_depth(&mut self) {
306        if let Some(depth) = self.paren_depth_stack.pop() {
307            // Ignore the internal depth - just restore the saved value
308            self.paren_depth = depth;
309        }
310    }
311
312    pub fn parse(&mut self) -> Result<SelectStatement, String> {
313        self.trace_enter("parse");
314
315        // Collect leading comments FIRST (before checking for WITH or SELECT)
316        // This allows comments before WITH clauses to be preserved
317        let leading_comments = if self.mode == ParserMode::PreserveComments {
318            self.collect_leading_comments()
319        } else {
320            vec![]
321        };
322
323        // Now check for WITH clause (after consuming comments)
324        let result = if matches!(self.current_token, Token::With) {
325            let mut stmt = self.parse_with_clause()?;
326            // Attach the leading comments we collected
327            stmt.leading_comments = leading_comments;
328            stmt
329        } else {
330            // For SELECT without WITH, pass comments to inner parser
331            let stmt = self.parse_select_statement_with_comments_public(leading_comments)?;
332            self.check_balanced_parentheses()?;
333            stmt
334        };
335
336        self.trace_exit("parse", &Ok(&result));
337        Ok(result)
338    }
339
340    /// Public wrapper that accepts pre-collected comments and checks parens
341    fn parse_select_statement_with_comments_public(
342        &mut self,
343        comments: Vec<Comment>,
344    ) -> Result<SelectStatement, String> {
345        self.parse_select_statement_with_comments(comments)
346    }
347
348    fn parse_with_clause(&mut self) -> Result<SelectStatement, String> {
349        self.consume(Token::With)?;
350        let ctes = self.parse_cte_list()?;
351
352        // Parse the main SELECT statement - use inner version since we're already tracking parens
353        let mut main_query = self.parse_select_statement_inner_no_comments()?;
354        main_query.ctes = ctes;
355
356        // Check for balanced parentheses at the end of parsing
357        self.check_balanced_parentheses()?;
358
359        Ok(main_query)
360    }
361
362    fn parse_with_clause_inner(&mut self) -> Result<SelectStatement, String> {
363        self.consume(Token::With)?;
364        let ctes = self.parse_cte_list()?;
365
366        // Parse the main SELECT statement (without parenthesis checking for subqueries)
367        let mut main_query = self.parse_select_statement_inner()?;
368        main_query.ctes = ctes;
369
370        Ok(main_query)
371    }
372
373    // Helper function to parse CTE list - eliminates duplication
374    fn parse_cte_list(&mut self) -> Result<Vec<CTE>, String> {
375        let mut ctes = Vec::new();
376
377        // Parse CTEs
378        loop {
379            // Check for WEB keyword before the CTE name (WEB uses an outer marker).
380            // FILE CTEs are detected *inside* the parens after AS, since the design
381            // doc uses `WITH name AS (FILE PATH '...')` rather than `WITH FILE name ...`.
382            let is_web = if matches!(&self.current_token, Token::Web) {
383                self.trace_token("Found WEB keyword for CTE");
384                self.advance();
385                true
386            } else {
387                false
388            };
389
390            // Parse CTE name - allow keywords as CTE names since they're valid identifiers in this context
391            let name = match &self.current_token {
392                Token::Identifier(name) => name.clone(),
393                token => {
394                    // Check if this is a keyword that can be used as an identifier
395                    if let Some(keyword) = token.as_keyword_str() {
396                        // Allow keywords as CTE names (they're valid in this context)
397                        keyword.to_lowercase()
398                    } else {
399                        return Err(format!(
400                            "Expected CTE name after {}",
401                            if is_web { "WEB" } else { "WITH or comma" }
402                        ));
403                    }
404                }
405            };
406            self.advance();
407
408            // Optional column list: WITH t(col1, col2) AS ...
409            let column_list = if matches!(self.current_token, Token::LeftParen) {
410                self.advance();
411                let cols = self.parse_identifier_list()?;
412                self.consume(Token::RightParen)?;
413                Some(cols)
414            } else {
415                None
416            };
417
418            // Expect AS
419            self.consume(Token::As)?;
420
421            let cte_type = if is_web {
422                // Expect opening parenthesis for WEB CTE
423                self.consume(Token::LeftParen)?;
424                // Parse WEB CTE specification using dedicated parser
425                let web_spec = WebCteParser::parse(self)?;
426                // Consume closing parenthesis for WEB CTE
427                self.consume(Token::RightParen)?;
428                CTEType::Web(web_spec)
429            } else {
430                // Push depth BEFORE consuming the opening paren, matching the
431                // original standard-CTE flow. This keeps the `(`...`)` pair
432                // balanced inside the inner context.
433                self.push_paren_depth();
434                self.consume(Token::LeftParen)?;
435
436                let result = if matches!(&self.current_token, Token::File) {
437                    self.trace_token("Found FILE keyword inside CTE parens");
438                    self.advance();
439                    let file_spec = FileCteParser::parse(self)?;
440                    CTEType::File(file_spec)
441                } else {
442                    let query = self.parse_select_statement_inner()?;
443                    CTEType::Standard(query)
444                };
445
446                // Expect closing parenthesis while still in CTE context
447                self.consume(Token::RightParen)?;
448                // Now pop to restore outer depth after consuming both parens
449                self.pop_paren_depth();
450                result
451            };
452
453            ctes.push(CTE {
454                name,
455                column_list,
456                cte_type,
457            });
458
459            // Check for more CTEs
460            if !matches!(self.current_token, Token::Comma) {
461                break;
462            }
463            self.advance();
464        }
465
466        Ok(ctes)
467    }
468
469    /// Helper function to parse an optional table alias (with or without AS keyword)
470    fn parse_optional_alias(&mut self) -> Result<Option<String>, String> {
471        if matches!(self.current_token, Token::As) {
472            self.advance();
473            match &self.current_token {
474                Token::Identifier(name) => {
475                    let alias = name.clone();
476                    self.advance();
477                    Ok(Some(alias))
478                }
479                token => {
480                    // Check if it's a reserved keyword - provide helpful error
481                    if let Some(keyword) = token.as_keyword_str() {
482                        Err(format!(
483                            "Reserved keyword '{}' cannot be used as column alias. Use a different name or quote it with double quotes: \"{}\"",
484                            keyword,
485                            keyword.to_lowercase()
486                        ))
487                    } else {
488                        Err("Expected alias name after AS".to_string())
489                    }
490                }
491            }
492        } else if let Token::Identifier(name) = &self.current_token {
493            // AS is optional for table aliases
494            let alias = name.clone();
495            self.advance();
496            Ok(Some(alias))
497        } else {
498            Ok(None)
499        }
500    }
501
502    /// Helper function to check if an identifier is valid (quoted or regular)
503    fn is_valid_identifier(name: &str) -> bool {
504        if name.starts_with('"') && name.ends_with('"') {
505            // Quoted identifier - always valid
506            true
507        } else {
508            // Regular identifier - check if it's alphanumeric or underscore
509            name.chars().all(|c| c.is_alphanumeric() || c == '_')
510        }
511    }
512
513    /// Helper function to update parentheses depth tracking
514    fn update_paren_depth(&mut self, token: &Token) -> Result<(), String> {
515        match token {
516            Token::LeftParen => self.paren_depth += 1,
517            Token::RightParen => {
518                self.paren_depth -= 1;
519                // Check for extra closing parenthesis
520                if self.paren_depth < 0 {
521                    return Err(
522                        "Unexpected closing parenthesis - no matching opening parenthesis"
523                            .to_string(),
524                    );
525                }
526            }
527            _ => {}
528        }
529        Ok(())
530    }
531
532    /// Helper function to parse comma-separated argument list
533    fn parse_argument_list(&mut self) -> Result<Vec<SqlExpression>, String> {
534        let mut args = Vec::new();
535
536        if !matches!(self.current_token, Token::RightParen) {
537            loop {
538                args.push(self.parse_expression()?);
539
540                if matches!(self.current_token, Token::Comma) {
541                    self.advance();
542                } else {
543                    break;
544                }
545            }
546        }
547
548        Ok(args)
549    }
550
551    /// Helper function to check for balanced parentheses at the end of parsing
552    fn check_balanced_parentheses(&self) -> Result<(), String> {
553        if self.paren_depth > 0 {
554            Err(format!(
555                "Unclosed parenthesis - missing {} closing parenthes{}",
556                self.paren_depth,
557                if self.paren_depth == 1 { "is" } else { "es" }
558            ))
559        } else if self.paren_depth < 0 {
560            Err("Extra closing parenthesis found - no matching opening parenthesis".to_string())
561        } else {
562            Ok(())
563        }
564    }
565
566    /// Check if an expression contains aggregate functions (COUNT, SUM, AVG, etc.)
567    /// This is used to detect unsupported patterns in HAVING clause
568    fn contains_aggregate_function(expr: &SqlExpression) -> bool {
569        match expr {
570            SqlExpression::FunctionCall { name, args, .. } => {
571                // Check if this is an aggregate function
572                let upper_name = name.to_uppercase();
573                let is_aggregate = matches!(
574                    upper_name.as_str(),
575                    "COUNT" | "SUM" | "AVG" | "MIN" | "MAX" | "GROUP_CONCAT" | "STRING_AGG"
576                );
577
578                // If this is an aggregate, return true
579                // Otherwise, recursively check arguments
580                is_aggregate || args.iter().any(Self::contains_aggregate_function)
581            }
582            // Recursively check nested expressions
583            SqlExpression::BinaryOp { left, right, .. } => {
584                Self::contains_aggregate_function(left) || Self::contains_aggregate_function(right)
585            }
586            SqlExpression::Not { expr } => Self::contains_aggregate_function(expr),
587            SqlExpression::MethodCall { args, .. } => {
588                args.iter().any(Self::contains_aggregate_function)
589            }
590            SqlExpression::ChainedMethodCall { base, args, .. } => {
591                Self::contains_aggregate_function(base)
592                    || args.iter().any(Self::contains_aggregate_function)
593            }
594            SqlExpression::CaseExpression {
595                when_branches,
596                else_branch,
597            } => {
598                when_branches.iter().any(|branch| {
599                    Self::contains_aggregate_function(&branch.condition)
600                        || Self::contains_aggregate_function(&branch.result)
601                }) || else_branch
602                    .as_ref()
603                    .map_or(false, |e| Self::contains_aggregate_function(e))
604            }
605            SqlExpression::SimpleCaseExpression {
606                expr,
607                when_branches,
608                else_branch,
609            } => {
610                Self::contains_aggregate_function(expr)
611                    || when_branches.iter().any(|branch| {
612                        Self::contains_aggregate_function(&branch.value)
613                            || Self::contains_aggregate_function(&branch.result)
614                    })
615                    || else_branch
616                        .as_ref()
617                        .map_or(false, |e| Self::contains_aggregate_function(e))
618            }
619            SqlExpression::ScalarSubquery { query } => {
620                // Subqueries can have their own aggregates, but that's fine
621                // We're only checking the outer HAVING clause
622                query
623                    .having
624                    .as_ref()
625                    .map_or(false, |h| Self::contains_aggregate_function(h))
626            }
627            // Leaf nodes - no aggregates
628            SqlExpression::Column(_)
629            | SqlExpression::StringLiteral(_)
630            | SqlExpression::NumberLiteral(_)
631            | SqlExpression::BooleanLiteral(_)
632            | SqlExpression::Null
633            | SqlExpression::DateTimeConstructor { .. }
634            | SqlExpression::DateTimeToday { .. } => false,
635
636            // Window functions contain aggregates by definition
637            SqlExpression::WindowFunction { .. } => true,
638
639            // Between has three parts to check
640            SqlExpression::Between { expr, lower, upper } => {
641                Self::contains_aggregate_function(expr)
642                    || Self::contains_aggregate_function(lower)
643                    || Self::contains_aggregate_function(upper)
644            }
645
646            // IN list - check expr and all values
647            SqlExpression::InList { expr, values } | SqlExpression::NotInList { expr, values } => {
648                Self::contains_aggregate_function(expr)
649                    || values.iter().any(Self::contains_aggregate_function)
650            }
651
652            // IN subquery - check expr and subquery
653            SqlExpression::InSubquery { expr, subquery }
654            | SqlExpression::NotInSubquery { expr, subquery } => {
655                Self::contains_aggregate_function(expr)
656                    || subquery
657                        .having
658                        .as_ref()
659                        .map_or(false, |h| Self::contains_aggregate_function(h))
660            }
661
662            // Tuple IN/NOT IN subquery - check each expr and subquery
663            SqlExpression::InSubqueryTuple { exprs, subquery }
664            | SqlExpression::NotInSubqueryTuple { exprs, subquery } => {
665                exprs.iter().any(Self::contains_aggregate_function)
666                    || subquery
667                        .having
668                        .as_ref()
669                        .map_or(false, |h| Self::contains_aggregate_function(h))
670            }
671
672            // UNNEST - check column expression
673            SqlExpression::Unnest { column, .. } => Self::contains_aggregate_function(column),
674        }
675    }
676
677    fn parse_select_statement(&mut self) -> Result<SelectStatement, String> {
678        self.trace_enter("parse_select_statement");
679        let result = self.parse_select_statement_inner()?;
680
681        // Check for balanced parentheses at the end of parsing
682        self.check_balanced_parentheses()?;
683
684        Ok(result)
685    }
686
687    fn parse_select_statement_inner(&mut self) -> Result<SelectStatement, String> {
688        // Collect leading comments ONLY in PreserveComments mode
689        let leading_comments = if self.mode == ParserMode::PreserveComments {
690            self.collect_leading_comments()
691        } else {
692            vec![]
693        };
694
695        self.parse_select_statement_with_comments(leading_comments)
696    }
697
698    /// Parse SELECT statement without collecting leading comments
699    /// Used when comments were already collected (e.g., before WITH clause)
700    fn parse_select_statement_inner_no_comments(&mut self) -> Result<SelectStatement, String> {
701        self.parse_select_statement_with_comments(vec![])
702    }
703
704    /// Core SELECT parsing logic - takes pre-collected comments
705    fn parse_select_statement_with_comments(
706        &mut self,
707        leading_comments: Vec<Comment>,
708    ) -> Result<SelectStatement, String> {
709        self.consume(Token::Select)?;
710
711        // Check for DISTINCT keyword
712        let distinct = if matches!(self.current_token, Token::Distinct) {
713            self.advance();
714            true
715        } else {
716            false
717        };
718
719        // Parse SELECT items (supports computed expressions)
720        let select_items = self.parse_select_items()?;
721
722        // Create legacy columns vector for backward compatibility
723        let columns = select_items
724            .iter()
725            .map(|item| match item {
726                SelectItem::Star { .. } => "*".to_string(),
727                SelectItem::StarExclude { .. } => "*".to_string(), // Treated as * in legacy columns
728                SelectItem::Column {
729                    column: col_ref, ..
730                } => col_ref.name.clone(),
731                SelectItem::Expression { alias, .. } => alias.clone(),
732            })
733            .collect();
734
735        // Parse INTO clause (for temporary tables) - comes immediately after SELECT items
736        let into_table = if matches!(self.current_token, Token::Into) {
737            self.advance();
738            Some(self.parse_into_clause()?)
739        } else {
740            None
741        };
742
743        // Parse FROM clause - can be a table name, subquery, or table function
744        let (from_table, from_subquery, from_function, from_alias) = if matches!(
745            self.current_token,
746            Token::From
747        ) {
748            self.advance();
749
750            // Check for table function like RANGE()
751            // Also handle keywords that could be table/CTE names
752            let table_or_function_name = match &self.current_token {
753                Token::Identifier(name) => Some(name.clone()),
754                token => {
755                    // Check if it's a keyword that can be used as table/CTE name
756                    token.as_keyword_str().map(|k| k.to_lowercase())
757                }
758            };
759
760            if let Some(name) = table_or_function_name {
761                // Check if this is a table function by consulting the registry
762                // We need to lookahead to see if there's a parenthesis to distinguish
763                // between a function call and a table with the same name
764                let has_paren = self.peek_token() == Some(Token::LeftParen);
765                if self.debug_trace {
766                    eprintln!(
767                        "  Checking {} for table function, has_paren={}",
768                        name, has_paren
769                    );
770                }
771
772                // Check if it's a known table function or generator
773                // In FROM clause context, prioritize generators over scalar functions
774                let is_table_function = if has_paren {
775                    // First check generator registry (for FROM clause context)
776                    if self.debug_trace {
777                        eprintln!("  Checking generator registry for {}", name.to_uppercase());
778                    }
779                    if let Some(_gen) = self.generator_registry.get(&name.to_uppercase()) {
780                        if self.debug_trace {
781                            eprintln!("  Found {} in generator registry", name);
782                        }
783                        self.trace_token(&format!("Found generator: {}", name));
784                        true
785                    } else {
786                        // Then check if it's a table function in the function registry
787                        if let Some(func) = self.function_registry.get(&name.to_uppercase()) {
788                            let sig = func.signature();
789                            let is_table_fn = sig.category == FunctionCategory::TableFunction;
790                            if self.debug_trace {
791                                eprintln!(
792                                    "  Found {} in function registry, is_table_function={}",
793                                    name, is_table_fn
794                                );
795                            }
796                            if is_table_fn {
797                                self.trace_token(&format!(
798                                    "Found table function in function registry: {}",
799                                    name
800                                ));
801                            }
802                            is_table_fn
803                        } else {
804                            if self.debug_trace {
805                                eprintln!("  {} not found in either registry", name);
806                                self.trace_token(&format!(
807                                    "Not found as generator or table function: {}",
808                                    name
809                                ));
810                            }
811                            false
812                        }
813                    }
814                } else {
815                    if self.debug_trace {
816                        eprintln!("  No parenthesis after {}, treating as table", name);
817                    }
818                    false
819                };
820
821                if is_table_function {
822                    // Parse table function
823                    let function_name = name.clone();
824                    self.advance(); // Skip function name
825
826                    // Parse arguments
827                    self.consume(Token::LeftParen)?;
828                    let args = self.parse_argument_list()?;
829                    self.consume(Token::RightParen)?;
830
831                    // Optional alias
832                    let alias = if matches!(self.current_token, Token::As) {
833                        self.advance();
834                        match &self.current_token {
835                            Token::Identifier(name) => {
836                                let alias = name.clone();
837                                self.advance();
838                                Some(alias)
839                            }
840                            token => {
841                                if let Some(keyword) = token.as_keyword_str() {
842                                    return Err(format!(
843                                            "Reserved keyword '{}' cannot be used as column alias. Use a different name or quote it with double quotes: \"{}\"",
844                                            keyword,
845                                            keyword.to_lowercase()
846                                        ));
847                                } else {
848                                    return Err("Expected alias name after AS".to_string());
849                                }
850                            }
851                        }
852                    } else if let Token::Identifier(name) = &self.current_token {
853                        let alias = name.clone();
854                        self.advance();
855                        Some(alias)
856                    } else {
857                        None
858                    };
859
860                    (
861                        None,
862                        None,
863                        Some(TableFunction::Generator {
864                            name: function_name,
865                            args,
866                        }),
867                        alias,
868                    )
869                } else {
870                    // Not a RANGE, SPLIT, or generator function, so it's a regular table name
871                    let table_name = name.clone();
872                    self.advance();
873
874                    // Check for optional alias
875                    let alias = self.parse_optional_alias()?;
876
877                    (Some(table_name), None, None, alias)
878                }
879            } else if matches!(self.current_token, Token::LeftParen) {
880                // Check for subquery: FROM (SELECT ...) or FROM (WITH ... SELECT ...)
881                self.advance();
882
883                // Parse the subquery - it might start with WITH
884                let subquery = if matches!(self.current_token, Token::With) {
885                    self.parse_with_clause_inner()?
886                } else {
887                    self.parse_select_statement_inner()?
888                };
889
890                self.consume(Token::RightParen)?;
891
892                // Subqueries must have an alias
893                let alias = if matches!(self.current_token, Token::As) {
894                    self.advance();
895                    match &self.current_token {
896                        Token::Identifier(name) => {
897                            let alias = name.clone();
898                            self.advance();
899                            alias
900                        }
901                        token => {
902                            if let Some(keyword) = token.as_keyword_str() {
903                                return Err(format!(
904                                        "Reserved keyword '{}' cannot be used as subquery alias. Use a different name or quote it with double quotes: \"{}\"",
905                                        keyword,
906                                        keyword.to_lowercase()
907                                    ));
908                            } else {
909                                return Err("Expected alias name after AS".to_string());
910                            }
911                        }
912                    }
913                } else {
914                    // AS is optional, but alias is required
915                    match &self.current_token {
916                        Token::Identifier(name) => {
917                            let alias = name.clone();
918                            self.advance();
919                            alias
920                        }
921                        _ => {
922                            return Err(
923                                "Subquery in FROM must have an alias (e.g., AS t)".to_string()
924                            )
925                        }
926                    }
927                };
928
929                (None, Some(Box::new(subquery)), None, Some(alias))
930            } else {
931                // Regular table name - handle identifiers and keywords
932                let table_name = match &self.current_token {
933                    Token::Identifier(table) => table.clone(),
934                    Token::QuotedIdentifier(table) => table.clone(),
935                    token => {
936                        // Check if it's a keyword that can be used as table/CTE name
937                        if let Some(keyword) = token.as_keyword_str() {
938                            keyword.to_lowercase()
939                        } else {
940                            return Err("Expected table name or subquery after FROM".to_string());
941                        }
942                    }
943                };
944
945                self.advance();
946
947                // Check for optional alias
948                let alias = self.parse_optional_alias()?;
949
950                (Some(table_name), None, None, alias)
951            }
952        } else {
953            (None, None, None, None)
954        };
955
956        // Check for PIVOT after FROM table source
957        // PIVOT wraps the FROM table/subquery before JOINs are processed
958        // This creates a PIVOT TableSource that will be processed by PivotExpander transformer
959        let pivot_source = if matches!(self.current_token, Token::Pivot) {
960            // Build a TableSource from the current FROM clause
961            let source = if let Some(ref table_name) = from_table {
962                TableSource::Table(table_name.clone())
963            } else if let Some(ref subquery) = from_subquery {
964                TableSource::DerivedTable {
965                    query: subquery.clone(),
966                    alias: from_alias.clone().unwrap_or_default(),
967                }
968            } else {
969                return Err("PIVOT requires a table or subquery source".to_string());
970            };
971
972            // Parse the PIVOT clause - this wraps the source in a Pivot TableSource
973            let pivoted = self.parse_pivot_clause(source)?;
974            Some(pivoted)
975        } else {
976            None
977        };
978
979        // Parse JOIN clauses
980        let mut joins = Vec::new();
981        while self.is_join_token() {
982            joins.push(self.parse_join_clause()?);
983        }
984
985        let where_clause = if matches!(self.current_token, Token::Where) {
986            self.advance();
987            Some(self.parse_where_clause()?)
988        } else {
989            None
990        };
991
992        let group_by = if matches!(self.current_token, Token::GroupBy) {
993            self.advance();
994            // Parse expressions instead of just identifiers for GROUP BY
995            // This allows GROUP BY TIME_BUCKET(...), CASE ..., etc.
996            Some(self.parse_expression_list()?)
997        } else {
998            None
999        };
1000
1001        // Parse HAVING clause (must come after GROUP BY)
1002        let having = if matches!(self.current_token, Token::Having) {
1003            if group_by.is_none() {
1004                return Err("HAVING clause requires GROUP BY".to_string());
1005            }
1006            self.advance();
1007            let having_expr = self.parse_expression()?;
1008
1009            // Note: Aggregate functions in HAVING are now supported via the
1010            // HavingAliasTransformer preprocessing step, which automatically
1011            // adds aliases and rewrites the HAVING clause to use them.
1012
1013            Some(having_expr)
1014        } else {
1015            None
1016        };
1017
1018        // Parse QUALIFY clause (Snowflake-style window function filtering)
1019        // QUALIFY filters on window function results without needing a subquery
1020        // Example: SELECT *, ROW_NUMBER() OVER (...) AS rn FROM t QUALIFY rn <= 3
1021        let qualify = if matches!(self.current_token, Token::Qualify) {
1022            self.advance();
1023            let qualify_expr = self.parse_expression()?;
1024
1025            // Note: QUALIFY is handled by the QualifyToWhereTransformer preprocessing step
1026            // which converts it to WHERE after window functions are lifted to CTEs
1027
1028            Some(qualify_expr)
1029        } else {
1030            None
1031        };
1032
1033        // Parse ORDER BY clause (comes after GROUP BY, HAVING, and QUALIFY)
1034        let order_by = if matches!(self.current_token, Token::OrderBy) {
1035            self.trace_token("Found OrderBy token");
1036            self.advance();
1037            Some(self.parse_order_by_list()?)
1038        } else if let Token::Identifier(s) = &self.current_token {
1039            // This shouldn't happen if the lexer properly tokenizes ORDER BY
1040            // But keeping as fallback for compatibility
1041            if Self::is_identifier_reserved(s) && s.to_uppercase() == "ORDER" {
1042                self.trace_token("Warning: ORDER as identifier instead of OrderBy token");
1043                self.advance(); // consume ORDER
1044                if matches!(&self.current_token, Token::By) {
1045                    self.advance(); // consume BY
1046                    Some(self.parse_order_by_list()?)
1047                } else {
1048                    return Err("Expected BY after ORDER".to_string());
1049                }
1050            } else {
1051                None
1052            }
1053        } else {
1054            None
1055        };
1056
1057        // Parse LIMIT clause
1058        let limit = if matches!(self.current_token, Token::Limit) {
1059            self.advance();
1060            match &self.current_token {
1061                Token::NumberLiteral(num) => {
1062                    let limit_val = num
1063                        .parse::<usize>()
1064                        .map_err(|_| format!("Invalid LIMIT value: {num}"))?;
1065                    self.advance();
1066                    Some(limit_val)
1067                }
1068                _ => return Err("Expected number after LIMIT".to_string()),
1069            }
1070        } else {
1071            None
1072        };
1073
1074        // Parse OFFSET clause
1075        let offset = if matches!(self.current_token, Token::Offset) {
1076            self.advance();
1077            match &self.current_token {
1078                Token::NumberLiteral(num) => {
1079                    let offset_val = num
1080                        .parse::<usize>()
1081                        .map_err(|_| format!("Invalid OFFSET value: {num}"))?;
1082                    self.advance();
1083                    Some(offset_val)
1084                }
1085                _ => return Err("Expected number after OFFSET".to_string()),
1086            }
1087        } else {
1088            None
1089        };
1090
1091        // Parse INTO clause (alternative position - SQL Server also supports INTO after all clauses)
1092        // This handles: SELECT * FROM table WHERE x > 5 INTO #temp
1093        // If INTO was already parsed after SELECT, this will be None (can't have two INTOs)
1094        let into_table = if into_table.is_none() && matches!(self.current_token, Token::Into) {
1095            self.advance();
1096            Some(self.parse_into_clause()?)
1097        } else {
1098            into_table // Keep the one from after SELECT if it exists
1099        };
1100
1101        // Parse UNION/INTERSECT/EXCEPT operations
1102        let set_operations = self.parse_set_operations()?;
1103
1104        // Collect trailing comment ONLY in PreserveComments mode
1105        let trailing_comment = if self.mode == ParserMode::PreserveComments {
1106            self.collect_trailing_comment()
1107        } else {
1108            None
1109        };
1110
1111        // Build unified from_source from parsed components
1112        // PIVOT takes precedence if it exists (it already wraps the base source)
1113        let from_source = if let Some(pivot) = pivot_source {
1114            Some(pivot)
1115        } else if let Some(ref table_name) = from_table {
1116            Some(TableSource::Table(table_name.clone()))
1117        } else if let Some(ref subquery) = from_subquery {
1118            Some(TableSource::DerivedTable {
1119                query: subquery.clone(),
1120                alias: from_alias.clone().unwrap_or_default(),
1121            })
1122        } else if let Some(ref _func) = from_function {
1123            // Table functions don't use TableSource yet, keep as None for now
1124            // TODO: Add TableFunction variant to TableSource
1125            None
1126        } else {
1127            None
1128        };
1129
1130        Ok(SelectStatement {
1131            distinct,
1132            columns,
1133            select_items,
1134            from_source,
1135            #[allow(deprecated)]
1136            from_table,
1137            #[allow(deprecated)]
1138            from_subquery,
1139            #[allow(deprecated)]
1140            from_function,
1141            #[allow(deprecated)]
1142            from_alias,
1143            joins,
1144            where_clause,
1145            order_by,
1146            group_by,
1147            having,
1148            qualify,
1149            limit,
1150            offset,
1151            ctes: Vec::new(), // Will be populated by WITH clause parser
1152            into_table,
1153            set_operations,
1154            leading_comments,
1155            trailing_comment,
1156        })
1157    }
1158
1159    /// Parse UNION/INTERSECT/EXCEPT operations
1160    /// Returns a vector of (operation, select_statement) pairs
1161    fn parse_set_operations(
1162        &mut self,
1163    ) -> Result<Vec<(SetOperation, Box<SelectStatement>)>, String> {
1164        let mut operations = Vec::new();
1165
1166        while matches!(
1167            self.current_token,
1168            Token::Union | Token::Intersect | Token::Except
1169        ) {
1170            // Determine the operation type
1171            let operation = match &self.current_token {
1172                Token::Union => {
1173                    self.advance();
1174                    // Check for ALL keyword
1175                    if let Token::Identifier(id) = &self.current_token {
1176                        if id.to_uppercase() == "ALL" {
1177                            self.advance();
1178                            SetOperation::UnionAll
1179                        } else {
1180                            SetOperation::Union
1181                        }
1182                    } else {
1183                        SetOperation::Union
1184                    }
1185                }
1186                Token::Intersect => {
1187                    self.advance();
1188                    SetOperation::Intersect
1189                }
1190                Token::Except => {
1191                    self.advance();
1192                    SetOperation::Except
1193                }
1194                _ => unreachable!(),
1195            };
1196
1197            // Parse the next SELECT statement
1198            let next_select = self.parse_select_statement_inner()?;
1199
1200            operations.push((operation, Box::new(next_select)));
1201        }
1202
1203        Ok(operations)
1204    }
1205
1206    /// Parse SELECT items that support computed expressions with aliases
1207    fn parse_select_items(&mut self) -> Result<Vec<SelectItem>, String> {
1208        let mut items = Vec::new();
1209
1210        loop {
1211            // Check for qualified star (table.*) or unqualified star (*)
1212            // First check if we have identifier.* pattern
1213            if let Token::Identifier(name) = &self.current_token.clone() {
1214                // Peek ahead to check for .* pattern
1215                let saved_pos = self.lexer.clone();
1216                let saved_token = self.current_token.clone();
1217                let table_name = name.clone();
1218
1219                self.advance();
1220
1221                if matches!(self.current_token, Token::Dot) {
1222                    self.advance();
1223                    if matches!(self.current_token, Token::Star) {
1224                        // This is table.* pattern
1225                        items.push(SelectItem::Star {
1226                            table_prefix: Some(table_name),
1227                            leading_comments: vec![],
1228                            trailing_comment: None,
1229                        });
1230                        self.advance();
1231
1232                        // Continue to next item or end
1233                        if matches!(self.current_token, Token::Comma) {
1234                            self.advance();
1235                            continue;
1236                        } else {
1237                            break;
1238                        }
1239                    }
1240                }
1241
1242                // Not table.*, restore position and continue with normal parsing
1243                self.lexer = saved_pos;
1244                self.current_token = saved_token;
1245            }
1246
1247            // Check for unqualified *
1248            if matches!(self.current_token, Token::Star) {
1249                self.advance(); // consume *
1250
1251                // Check for EXCLUDE clause
1252                if matches!(self.current_token, Token::Exclude) {
1253                    self.advance(); // consume EXCLUDE
1254
1255                    // Expect opening paren
1256                    if !matches!(self.current_token, Token::LeftParen) {
1257                        return Err("Expected '(' after EXCLUDE".to_string());
1258                    }
1259                    self.advance(); // consume (
1260
1261                    // Parse column list
1262                    let mut excluded_columns = Vec::new();
1263                    loop {
1264                        match &self.current_token {
1265                            Token::Identifier(col_name) | Token::QuotedIdentifier(col_name) => {
1266                                excluded_columns.push(col_name.clone());
1267                                self.advance();
1268                            }
1269                            _ => return Err("Expected column name in EXCLUDE list".to_string()),
1270                        }
1271
1272                        // Check for comma or closing paren
1273                        if matches!(self.current_token, Token::Comma) {
1274                            self.advance();
1275                        } else if matches!(self.current_token, Token::RightParen) {
1276                            self.advance(); // consume )
1277                            break;
1278                        } else {
1279                            return Err("Expected ',' or ')' in EXCLUDE list".to_string());
1280                        }
1281                    }
1282
1283                    if excluded_columns.is_empty() {
1284                        return Err("EXCLUDE list cannot be empty".to_string());
1285                    }
1286
1287                    items.push(SelectItem::StarExclude {
1288                        table_prefix: None,
1289                        excluded_columns,
1290                        leading_comments: vec![],
1291                        trailing_comment: None,
1292                    });
1293                } else {
1294                    // Regular * without EXCLUDE
1295                    items.push(SelectItem::Star {
1296                        table_prefix: None,
1297                        leading_comments: vec![],
1298                        trailing_comment: None,
1299                    });
1300                }
1301            } else {
1302                // Parse expression or column
1303                let expr = self.parse_comparison()?; // Use comparison to support IS NULL and other comparisons
1304
1305                // Check for AS alias
1306                let alias = if matches!(self.current_token, Token::As) {
1307                    self.advance();
1308                    match &self.current_token {
1309                        Token::Identifier(alias_name) => {
1310                            let alias = alias_name.clone();
1311                            self.advance();
1312                            alias
1313                        }
1314                        Token::QuotedIdentifier(alias_name) => {
1315                            let alias = alias_name.clone();
1316                            self.advance();
1317                            alias
1318                        }
1319                        token => {
1320                            if let Some(keyword) = token.as_keyword_str() {
1321                                return Err(format!(
1322                                    "Reserved keyword '{}' cannot be used as column alias. Use a different name or quote it with double quotes: \"{}\"",
1323                                    keyword,
1324                                    keyword.to_lowercase()
1325                                ));
1326                            } else {
1327                                return Err("Expected alias name after AS".to_string());
1328                            }
1329                        }
1330                    }
1331                } else {
1332                    // Generate default alias based on expression
1333                    match &expr {
1334                        SqlExpression::Column(col_ref) => col_ref.name.clone(),
1335                        _ => format!("expr_{}", items.len() + 1), // Default alias for computed expressions
1336                    }
1337                };
1338
1339                // Create SelectItem based on expression type
1340                let item = match expr {
1341                    SqlExpression::Column(col_ref) if alias == col_ref.name => {
1342                        // Simple column reference without alias
1343                        SelectItem::Column {
1344                            column: col_ref,
1345                            leading_comments: vec![],
1346                            trailing_comment: None,
1347                        }
1348                    }
1349                    _ => {
1350                        // Computed expression or column with different alias
1351                        SelectItem::Expression {
1352                            expr,
1353                            alias,
1354                            leading_comments: vec![],
1355                            trailing_comment: None,
1356                        }
1357                    }
1358                };
1359
1360                items.push(item);
1361            }
1362
1363            // Check for comma to continue
1364            if matches!(self.current_token, Token::Comma) {
1365                self.advance();
1366            } else {
1367                break;
1368            }
1369        }
1370
1371        Ok(items)
1372    }
1373
1374    fn parse_identifier_list(&mut self) -> Result<Vec<String>, String> {
1375        let mut identifiers = Vec::new();
1376
1377        loop {
1378            match &self.current_token {
1379                Token::Identifier(id) => {
1380                    // Check if this is a reserved keyword that should stop identifier parsing
1381                    if Self::is_identifier_reserved(id) {
1382                        // Stop parsing identifiers if we hit a reserved keyword
1383                        break;
1384                    }
1385                    let mut name = id.clone();
1386                    self.advance();
1387
1388                    // Handle qualified names (table.column)
1389                    if matches!(self.current_token, Token::Dot) {
1390                        self.advance(); // consume dot
1391                        match &self.current_token {
1392                            Token::Identifier(col) => {
1393                                name = format!("{}.{}", name, col);
1394                                self.advance();
1395                            }
1396                            Token::QuotedIdentifier(col) => {
1397                                name = format!("{}.{}", name, col);
1398                                self.advance();
1399                            }
1400                            _ => {
1401                                return Err("Expected identifier after '.'".to_string());
1402                            }
1403                        }
1404                    }
1405
1406                    identifiers.push(name);
1407                }
1408                Token::QuotedIdentifier(id) => {
1409                    // Handle quoted identifiers like "Customer Id"
1410                    identifiers.push(id.clone());
1411                    self.advance();
1412                }
1413                _ => {
1414                    // Stop parsing if we hit any other token type
1415                    break;
1416                }
1417            }
1418
1419            if matches!(self.current_token, Token::Comma) {
1420                self.advance();
1421            } else {
1422                break;
1423            }
1424        }
1425
1426        if identifiers.is_empty() {
1427            return Err("Expected at least one identifier".to_string());
1428        }
1429
1430        Ok(identifiers)
1431    }
1432
1433    fn parse_window_spec(&mut self) -> Result<WindowSpec, String> {
1434        let mut partition_by = Vec::new();
1435        let mut order_by = Vec::new();
1436
1437        // Check for PARTITION BY
1438        if matches!(self.current_token, Token::Partition) {
1439            self.advance(); // consume PARTITION
1440            if !matches!(self.current_token, Token::By) {
1441                return Err("Expected BY after PARTITION".to_string());
1442            }
1443            self.advance(); // consume BY
1444
1445            // Parse partition columns
1446            partition_by = self.parse_identifier_list()?;
1447        }
1448
1449        // Check for ORDER BY
1450        if matches!(self.current_token, Token::OrderBy) {
1451            self.advance(); // consume ORDER BY (as single token)
1452            order_by = self.parse_order_by_list()?;
1453        } else if let Token::Identifier(s) = &self.current_token {
1454            if Self::is_identifier_reserved(s) && s.to_uppercase() == "ORDER" {
1455                // Handle ORDER BY as two tokens
1456                self.advance(); // consume ORDER
1457                if !matches!(self.current_token, Token::By) {
1458                    return Err("Expected BY after ORDER".to_string());
1459                }
1460                self.advance(); // consume BY
1461                order_by = self.parse_order_by_list()?;
1462            }
1463        }
1464
1465        // Parse optional window frame (ROWS/RANGE BETWEEN ... AND ...)
1466        let mut frame = self.parse_window_frame()?;
1467
1468        // SQL Standard: If ORDER BY is present but no frame is specified,
1469        // default to RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
1470        // This matches behavior of PostgreSQL, MySQL, SQL Server, etc.
1471        if !order_by.is_empty() && frame.is_none() {
1472            frame = Some(WindowFrame {
1473                unit: FrameUnit::Range,
1474                start: FrameBound::UnboundedPreceding,
1475                end: Some(FrameBound::CurrentRow),
1476            });
1477        }
1478
1479        Ok(WindowSpec {
1480            partition_by,
1481            order_by,
1482            frame,
1483        })
1484    }
1485
1486    fn parse_order_by_list(&mut self) -> Result<Vec<OrderByItem>, String> {
1487        let mut order_items = Vec::new();
1488
1489        loop {
1490            // Parse ANY expression (not just column names)
1491            // This supports:
1492            // - Simple columns: region
1493            // - Qualified columns: table.column
1494            // - Aggregate functions: SUM(sales_amount)
1495            // - Arithmetic: sales_amount * 1.1
1496            // - CASE expressions: CASE WHEN ... END
1497            let expr = self.parse_expression()?;
1498
1499            // Check for ASC/DESC
1500            let direction = match &self.current_token {
1501                Token::Asc => {
1502                    self.advance();
1503                    SortDirection::Asc
1504                }
1505                Token::Desc => {
1506                    self.advance();
1507                    SortDirection::Desc
1508                }
1509                _ => SortDirection::Asc, // Default to ASC if not specified
1510            };
1511
1512            order_items.push(OrderByItem { expr, direction });
1513
1514            if matches!(self.current_token, Token::Comma) {
1515                self.advance();
1516            } else {
1517                break;
1518            }
1519        }
1520
1521        Ok(order_items)
1522    }
1523
1524    /// Parse INTO clause for temporary tables
1525    /// Syntax: INTO #table_name
1526    fn parse_into_clause(&mut self) -> Result<IntoTable, String> {
1527        // Expect an identifier starting with #
1528        let name = match &self.current_token {
1529            Token::Identifier(id) if id.starts_with('#') => {
1530                let table_name = id.clone();
1531                self.advance();
1532                table_name
1533            }
1534            Token::Identifier(id) => {
1535                return Err(format!(
1536                    "Temporary table name must start with #, got: {}",
1537                    id
1538                ));
1539            }
1540            _ => {
1541                return Err(
1542                    "Expected temporary table name (starting with #) after INTO".to_string()
1543                );
1544            }
1545        };
1546
1547        Ok(IntoTable { name })
1548    }
1549
1550    fn parse_window_frame(&mut self) -> Result<Option<WindowFrame>, String> {
1551        // Check for ROWS or RANGE keyword
1552        let unit = match &self.current_token {
1553            Token::Rows => {
1554                self.advance();
1555                FrameUnit::Rows
1556            }
1557            Token::Identifier(id) if id.to_uppercase() == "RANGE" => {
1558                // RANGE as window frame unit
1559                self.advance();
1560                FrameUnit::Range
1561            }
1562            _ => return Ok(None), // No window frame specified
1563        };
1564
1565        // Check for BETWEEN or just a single bound
1566        let (start, end) = if let Token::Between = &self.current_token {
1567            self.advance(); // consume BETWEEN
1568                            // Parse start bound
1569            let start = self.parse_frame_bound()?;
1570
1571            // Expect AND
1572            if !matches!(&self.current_token, Token::And) {
1573                return Err("Expected AND after window frame start bound".to_string());
1574            }
1575            self.advance();
1576
1577            // Parse end bound
1578            let end = self.parse_frame_bound()?;
1579            (start, Some(end))
1580        } else {
1581            // Single bound (e.g., "ROWS 5 PRECEDING")
1582            let bound = self.parse_frame_bound()?;
1583            (bound, None)
1584        };
1585
1586        Ok(Some(WindowFrame { unit, start, end }))
1587    }
1588
1589    fn parse_frame_bound(&mut self) -> Result<FrameBound, String> {
1590        match &self.current_token {
1591            Token::Unbounded => {
1592                self.advance();
1593                match &self.current_token {
1594                    Token::Preceding => {
1595                        self.advance();
1596                        Ok(FrameBound::UnboundedPreceding)
1597                    }
1598                    Token::Following => {
1599                        self.advance();
1600                        Ok(FrameBound::UnboundedFollowing)
1601                    }
1602                    _ => Err("Expected PRECEDING or FOLLOWING after UNBOUNDED".to_string()),
1603                }
1604            }
1605            Token::Current => {
1606                self.advance();
1607                if matches!(&self.current_token, Token::Row) {
1608                    self.advance();
1609                    return Ok(FrameBound::CurrentRow);
1610                }
1611                Err("Expected ROW after CURRENT".to_string())
1612            }
1613            Token::NumberLiteral(num) => {
1614                let n: i64 = num
1615                    .parse()
1616                    .map_err(|_| "Invalid number in window frame".to_string())?;
1617                self.advance();
1618                match &self.current_token {
1619                    Token::Preceding => {
1620                        self.advance();
1621                        Ok(FrameBound::Preceding(n))
1622                    }
1623                    Token::Following => {
1624                        self.advance();
1625                        Ok(FrameBound::Following(n))
1626                    }
1627                    _ => Err("Expected PRECEDING or FOLLOWING after number".to_string()),
1628                }
1629            }
1630            _ => Err("Invalid window frame bound".to_string()),
1631        }
1632    }
1633
1634    fn parse_where_clause(&mut self) -> Result<WhereClause, String> {
1635        // Parse the entire WHERE clause as a single expression tree
1636        // The logical operators (AND/OR) are now handled within parse_expression
1637        let expr = self.parse_expression()?;
1638
1639        // Check for unexpected closing parenthesis
1640        if matches!(self.current_token, Token::RightParen) && self.paren_depth <= 0 {
1641            return Err(
1642                "Unexpected closing parenthesis - no matching opening parenthesis".to_string(),
1643            );
1644        }
1645
1646        // Create a single condition with the entire expression
1647        let conditions = vec![Condition {
1648            expr,
1649            connector: None,
1650        }];
1651
1652        Ok(WhereClause { conditions })
1653    }
1654
1655    fn parse_expression(&mut self) -> Result<SqlExpression, String> {
1656        self.trace_enter("parse_expression");
1657        // Start with logical OR as the lowest precedence operator
1658        // The hierarchy is: OR -> AND -> comparison -> additive -> multiplicative -> primary
1659        let mut left = self.parse_logical_or()?;
1660
1661        // Handle IN operator (not preceded by NOT)
1662        // This uses the modular comparison module
1663        left = parse_in_operator(self, left)?;
1664
1665        let result = Ok(left);
1666        self.trace_exit("parse_expression", &result);
1667        result
1668    }
1669
1670    fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
1671        // Use the new modular comparison expression parser
1672        parse_comparison_expr(self)
1673    }
1674
1675    fn parse_additive(&mut self) -> Result<SqlExpression, String> {
1676        // Use the new modular arithmetic expression parser
1677        parse_additive_expr(self)
1678    }
1679
1680    fn parse_multiplicative(&mut self) -> Result<SqlExpression, String> {
1681        // Use the new modular arithmetic expression parser
1682        parse_multiplicative_expr(self)
1683    }
1684
1685    fn parse_logical_or(&mut self) -> Result<SqlExpression, String> {
1686        // Use the new modular logical expression parser
1687        parse_logical_or_expr(self)
1688    }
1689
1690    fn parse_logical_and(&mut self) -> Result<SqlExpression, String> {
1691        // Use the new modular logical expression parser
1692        parse_logical_and_expr(self)
1693    }
1694
1695    fn parse_case_expression(&mut self) -> Result<SqlExpression, String> {
1696        // Use the new modular CASE expression parser
1697        parse_case_expr(self)
1698    }
1699
1700    fn parse_primary(&mut self) -> Result<SqlExpression, String> {
1701        // Use the new modular primary expression parser
1702        // Clone the necessary data to avoid borrowing issues
1703        let columns = self.columns.clone();
1704        let in_method_args = self.in_method_args;
1705        let ctx = PrimaryExpressionContext {
1706            columns: &columns,
1707            in_method_args,
1708        };
1709        parse_primary_expr(self, &ctx)
1710    }
1711
1712    // Keep the old implementation temporarily for reference (will be removed)
1713    fn parse_method_args(&mut self) -> Result<Vec<SqlExpression>, String> {
1714        // Set flag to indicate we're parsing method arguments
1715        self.in_method_args = true;
1716
1717        let args = self.parse_argument_list()?;
1718
1719        // Clear the flag
1720        self.in_method_args = false;
1721
1722        Ok(args)
1723    }
1724
1725    fn parse_function_args(&mut self) -> Result<(Vec<SqlExpression>, bool), String> {
1726        let mut args = Vec::new();
1727        let mut has_distinct = false;
1728
1729        if !matches!(self.current_token, Token::RightParen) {
1730            // Check if first argument starts with DISTINCT
1731            if matches!(self.current_token, Token::Distinct) {
1732                self.advance(); // consume DISTINCT
1733                has_distinct = true;
1734            }
1735
1736            // Parse full expressions as arguments — this allows comparisons and
1737            // boolean logic inside function calls, e.g. AVG(x > 5), SUM(a = 'b')
1738            args.push(self.parse_logical_or()?);
1739
1740            // Parse any remaining arguments (DISTINCT only applies to first arg for aggregates)
1741            while matches!(self.current_token, Token::Comma) {
1742                self.advance();
1743                args.push(self.parse_logical_or()?);
1744            }
1745        }
1746
1747        Ok((args, has_distinct))
1748    }
1749
1750    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
1751        let mut expressions = Vec::new();
1752
1753        loop {
1754            expressions.push(self.parse_expression()?);
1755
1756            if matches!(self.current_token, Token::Comma) {
1757                self.advance();
1758            } else {
1759                break;
1760            }
1761        }
1762
1763        Ok(expressions)
1764    }
1765
1766    #[must_use]
1767    pub fn get_position(&self) -> usize {
1768        self.lexer.get_position()
1769    }
1770
1771    // Check if current token is a JOIN-related token
1772    fn is_join_token(&self) -> bool {
1773        matches!(
1774            self.current_token,
1775            Token::Join | Token::Inner | Token::Left | Token::Right | Token::Full | Token::Cross
1776        )
1777    }
1778
1779    // Parse a JOIN clause
1780    fn parse_join_clause(&mut self) -> Result<JoinClause, String> {
1781        // Determine join type
1782        let join_type = match &self.current_token {
1783            Token::Join => {
1784                self.advance();
1785                JoinType::Inner // Default JOIN is INNER JOIN
1786            }
1787            Token::Inner => {
1788                self.advance();
1789                if !matches!(self.current_token, Token::Join) {
1790                    return Err("Expected JOIN after INNER".to_string());
1791                }
1792                self.advance();
1793                JoinType::Inner
1794            }
1795            Token::Left => {
1796                self.advance();
1797                // Handle optional OUTER keyword
1798                if matches!(self.current_token, Token::Outer) {
1799                    self.advance();
1800                }
1801                if !matches!(self.current_token, Token::Join) {
1802                    return Err("Expected JOIN after LEFT".to_string());
1803                }
1804                self.advance();
1805                JoinType::Left
1806            }
1807            Token::Right => {
1808                self.advance();
1809                // Handle optional OUTER keyword
1810                if matches!(self.current_token, Token::Outer) {
1811                    self.advance();
1812                }
1813                if !matches!(self.current_token, Token::Join) {
1814                    return Err("Expected JOIN after RIGHT".to_string());
1815                }
1816                self.advance();
1817                JoinType::Right
1818            }
1819            Token::Full => {
1820                self.advance();
1821                // Handle optional OUTER keyword
1822                if matches!(self.current_token, Token::Outer) {
1823                    self.advance();
1824                }
1825                if !matches!(self.current_token, Token::Join) {
1826                    return Err("Expected JOIN after FULL".to_string());
1827                }
1828                self.advance();
1829                JoinType::Full
1830            }
1831            Token::Cross => {
1832                self.advance();
1833                if !matches!(self.current_token, Token::Join) {
1834                    return Err("Expected JOIN after CROSS".to_string());
1835                }
1836                self.advance();
1837                JoinType::Cross
1838            }
1839            _ => return Err("Expected JOIN keyword".to_string()),
1840        };
1841
1842        // Parse the table being joined
1843        let (table, alias) = self.parse_join_table_source()?;
1844
1845        // Parse ON condition (required for all joins except CROSS JOIN)
1846        let condition = if join_type == JoinType::Cross {
1847            // CROSS JOIN doesn't have ON condition - create empty condition
1848            JoinCondition { conditions: vec![] }
1849        } else {
1850            if !matches!(self.current_token, Token::On) {
1851                return Err("Expected ON keyword after JOIN table".to_string());
1852            }
1853            self.advance();
1854            self.parse_join_condition()?
1855        };
1856
1857        Ok(JoinClause {
1858            join_type,
1859            table,
1860            alias,
1861            condition,
1862        })
1863    }
1864
1865    fn parse_join_table_source(&mut self) -> Result<(TableSource, Option<String>), String> {
1866        let table = match &self.current_token {
1867            Token::Identifier(name) => {
1868                let table_name = name.clone();
1869                self.advance();
1870                TableSource::Table(table_name)
1871            }
1872            Token::LeftParen => {
1873                // Subquery as table source
1874                self.advance();
1875                let subquery = self.parse_select_statement_inner()?;
1876                if !matches!(self.current_token, Token::RightParen) {
1877                    return Err("Expected ')' after subquery".to_string());
1878                }
1879                self.advance();
1880
1881                // Subqueries must have an alias
1882                let alias = match &self.current_token {
1883                    Token::Identifier(alias_name) => {
1884                        let alias = alias_name.clone();
1885                        self.advance();
1886                        alias
1887                    }
1888                    Token::As => {
1889                        self.advance();
1890                        match &self.current_token {
1891                            Token::Identifier(alias_name) => {
1892                                let alias = alias_name.clone();
1893                                self.advance();
1894                                alias
1895                            }
1896                            _ => return Err("Expected alias after AS keyword".to_string()),
1897                        }
1898                    }
1899                    _ => return Err("Subqueries must have an alias".to_string()),
1900                };
1901
1902                return Ok((
1903                    TableSource::DerivedTable {
1904                        query: Box::new(subquery),
1905                        alias: alias.clone(),
1906                    },
1907                    Some(alias),
1908                ));
1909            }
1910            _ => return Err("Expected table name or subquery in JOIN clause".to_string()),
1911        };
1912
1913        // Check for optional alias
1914        let alias = match &self.current_token {
1915            Token::Identifier(alias_name) => {
1916                let alias = alias_name.clone();
1917                self.advance();
1918                Some(alias)
1919            }
1920            Token::As => {
1921                self.advance();
1922                match &self.current_token {
1923                    Token::Identifier(alias_name) => {
1924                        let alias = alias_name.clone();
1925                        self.advance();
1926                        Some(alias)
1927                    }
1928                    _ => return Err("Expected alias after AS keyword".to_string()),
1929                }
1930            }
1931            _ => None,
1932        };
1933
1934        Ok((table, alias))
1935    }
1936
1937    fn parse_join_condition(&mut self) -> Result<JoinCondition, String> {
1938        let mut conditions = Vec::new();
1939
1940        // Parse first condition
1941        conditions.push(self.parse_single_join_condition()?);
1942
1943        // Parse additional conditions connected by AND
1944        while matches!(self.current_token, Token::And) {
1945            self.advance(); // consume AND
1946            conditions.push(self.parse_single_join_condition()?);
1947        }
1948
1949        Ok(JoinCondition { conditions })
1950    }
1951
1952    fn parse_single_join_condition(&mut self) -> Result<SingleJoinCondition, String> {
1953        // Parse left side as additive expression (stops before comparison operators)
1954        // This allows the comparison operator to be explicitly parsed by this function
1955        let left_expr = self.parse_additive()?;
1956
1957        // Parse operator
1958        let operator = match &self.current_token {
1959            Token::Equal => JoinOperator::Equal,
1960            Token::NotEqual => JoinOperator::NotEqual,
1961            Token::LessThan => JoinOperator::LessThan,
1962            Token::LessThanOrEqual => JoinOperator::LessThanOrEqual,
1963            Token::GreaterThan => JoinOperator::GreaterThan,
1964            Token::GreaterThanOrEqual => JoinOperator::GreaterThanOrEqual,
1965            _ => return Err("Expected comparison operator in JOIN condition".to_string()),
1966        };
1967        self.advance();
1968
1969        // Parse right side as additive expression (stops before comparison operators)
1970        let right_expr = self.parse_additive()?;
1971
1972        Ok(SingleJoinCondition {
1973            left_expr,
1974            operator,
1975            right_expr,
1976        })
1977    }
1978
1979    fn parse_column_reference(&mut self) -> Result<String, String> {
1980        match &self.current_token {
1981            Token::Identifier(name) => {
1982                let mut column_ref = name.clone();
1983                self.advance();
1984
1985                // Check for table.column notation
1986                if matches!(self.current_token, Token::Dot) {
1987                    self.advance();
1988                    match &self.current_token {
1989                        Token::Identifier(col_name) => {
1990                            column_ref.push('.');
1991                            column_ref.push_str(col_name);
1992                            self.advance();
1993                        }
1994                        _ => return Err("Expected column name after '.'".to_string()),
1995                    }
1996                }
1997
1998                Ok(column_ref)
1999            }
2000            _ => Err("Expected column reference".to_string()),
2001        }
2002    }
2003
2004    // ===== PIVOT Parsing =====
2005
2006    /// Parse a PIVOT clause after a table source
2007    /// Syntax: PIVOT (aggregate_function FOR pivot_column IN (value1, value2, ...))
2008    fn parse_pivot_clause(&mut self, source: TableSource) -> Result<TableSource, String> {
2009        // Consume PIVOT keyword
2010        self.consume(Token::Pivot)?;
2011
2012        // Consume opening parenthesis
2013        self.consume(Token::LeftParen)?;
2014
2015        // Parse aggregate function (e.g., MAX(AmountEaten))
2016        let aggregate = self.parse_pivot_aggregate()?;
2017
2018        // Parse FOR keyword
2019        self.consume(Token::For)?;
2020
2021        // Parse pivot column name
2022        let pivot_column = match &self.current_token {
2023            Token::Identifier(col) => {
2024                let column = col.clone();
2025                self.advance();
2026                column
2027            }
2028            Token::QuotedIdentifier(col) => {
2029                let column = col.clone();
2030                self.advance();
2031                column
2032            }
2033            _ => return Err("Expected column name after FOR in PIVOT".to_string()),
2034        };
2035
2036        // Parse IN keyword
2037        if !matches!(self.current_token, Token::In) {
2038            return Err("Expected IN keyword in PIVOT clause".to_string());
2039        }
2040        self.advance();
2041
2042        // Parse pivot values
2043        let pivot_values = self.parse_pivot_in_clause()?;
2044
2045        // Consume closing parenthesis for PIVOT
2046        self.consume(Token::RightParen)?;
2047
2048        // Check for optional alias
2049        let alias = self.parse_optional_alias()?;
2050
2051        Ok(TableSource::Pivot {
2052            source: Box::new(source),
2053            aggregate,
2054            pivot_column,
2055            pivot_values,
2056            alias,
2057        })
2058    }
2059
2060    /// Parse the aggregate function specification in PIVOT
2061    /// Example: MAX(AmountEaten), SUM(sales), COUNT(*)
2062    fn parse_pivot_aggregate(&mut self) -> Result<PivotAggregate, String> {
2063        // Parse aggregate function name
2064        let function = match &self.current_token {
2065            Token::Identifier(name) => {
2066                let func_name = name.to_uppercase();
2067                // Validate it's an aggregate function
2068                match func_name.as_str() {
2069                    "MAX" | "MIN" | "SUM" | "AVG" | "COUNT" => {
2070                        self.advance();
2071                        func_name
2072                    }
2073                    _ => {
2074                        return Err(format!(
2075                            "Expected aggregate function (MAX, MIN, SUM, AVG, COUNT), got {}",
2076                            func_name
2077                        ))
2078                    }
2079                }
2080            }
2081            _ => return Err("Expected aggregate function in PIVOT".to_string()),
2082        };
2083
2084        // Consume opening parenthesis
2085        self.consume(Token::LeftParen)?;
2086
2087        // Parse column name (or * for COUNT)
2088        let column = match &self.current_token {
2089            Token::Identifier(col) => {
2090                let column = col.clone();
2091                self.advance();
2092                column
2093            }
2094            Token::QuotedIdentifier(col) => {
2095                let column = col.clone();
2096                self.advance();
2097                column
2098            }
2099            Token::Star => {
2100                // COUNT(*) is allowed
2101                if function == "COUNT" {
2102                    self.advance();
2103                    "*".to_string()
2104                } else {
2105                    return Err(format!("Only COUNT can use *, not {}", function));
2106                }
2107            }
2108            _ => return Err("Expected column name in aggregate function".to_string()),
2109        };
2110
2111        // Consume closing parenthesis
2112        self.consume(Token::RightParen)?;
2113
2114        Ok(PivotAggregate { function, column })
2115    }
2116
2117    /// Parse the IN clause values in PIVOT
2118    /// Example: IN ('Sammich', 'Pickle', 'Apple')
2119    /// Returns vector of pivot values
2120    fn parse_pivot_in_clause(&mut self) -> Result<Vec<String>, String> {
2121        // Consume opening parenthesis
2122        self.consume(Token::LeftParen)?;
2123
2124        let mut values = Vec::new();
2125
2126        // Parse first value
2127        match &self.current_token {
2128            Token::StringLiteral(val) => {
2129                values.push(val.clone());
2130                self.advance();
2131            }
2132            Token::Identifier(val) => {
2133                // Allow unquoted identifiers as well
2134                values.push(val.clone());
2135                self.advance();
2136            }
2137            Token::NumberLiteral(val) => {
2138                // Allow numeric values
2139                values.push(val.clone());
2140                self.advance();
2141            }
2142            _ => return Err("Expected value in PIVOT IN clause".to_string()),
2143        }
2144
2145        // Parse additional values separated by commas
2146        while matches!(self.current_token, Token::Comma) {
2147            self.advance(); // consume comma
2148
2149            match &self.current_token {
2150                Token::StringLiteral(val) => {
2151                    values.push(val.clone());
2152                    self.advance();
2153                }
2154                Token::Identifier(val) => {
2155                    values.push(val.clone());
2156                    self.advance();
2157                }
2158                Token::NumberLiteral(val) => {
2159                    values.push(val.clone());
2160                    self.advance();
2161                }
2162                _ => return Err("Expected value after comma in PIVOT IN clause".to_string()),
2163            }
2164        }
2165
2166        // Consume closing parenthesis
2167        self.consume(Token::RightParen)?;
2168
2169        if values.is_empty() {
2170            return Err("PIVOT IN clause must have at least one value".to_string());
2171        }
2172
2173        Ok(values)
2174    }
2175}
2176
2177// Context detection for cursor position
2178#[derive(Debug, Clone)]
2179pub enum CursorContext {
2180    SelectClause,
2181    FromClause,
2182    WhereClause,
2183    OrderByClause,
2184    AfterColumn(String),
2185    AfterLogicalOp(LogicalOp),
2186    AfterComparisonOp(String, String), // column_name, operator
2187    InMethodCall(String, String),      // object, method
2188    InExpression,
2189    Unknown,
2190}
2191
2192/// Safe UTF-8 string slicing that ensures we don't slice in the middle of a character
2193fn safe_slice_to(s: &str, pos: usize) -> &str {
2194    if pos >= s.len() {
2195        return s;
2196    }
2197
2198    // Find the nearest valid character boundary at or before pos
2199    let mut safe_pos = pos;
2200    while safe_pos > 0 && !s.is_char_boundary(safe_pos) {
2201        safe_pos -= 1;
2202    }
2203
2204    &s[..safe_pos]
2205}
2206
2207/// Safe UTF-8 string slicing from a position to the end
2208fn safe_slice_from(s: &str, pos: usize) -> &str {
2209    if pos >= s.len() {
2210        return "";
2211    }
2212
2213    // Find the nearest valid character boundary at or after pos
2214    let mut safe_pos = pos;
2215    while safe_pos < s.len() && !s.is_char_boundary(safe_pos) {
2216        safe_pos += 1;
2217    }
2218
2219    &s[safe_pos..]
2220}
2221
2222#[must_use]
2223pub fn detect_cursor_context(query: &str, cursor_pos: usize) -> (CursorContext, Option<String>) {
2224    let truncated = safe_slice_to(query, cursor_pos);
2225    let mut parser = Parser::new(truncated);
2226
2227    // Try to parse as much as possible
2228    if let Ok(stmt) = parser.parse() {
2229        let (ctx, partial) = analyze_statement(&stmt, truncated, cursor_pos);
2230        #[cfg(test)]
2231        println!("analyze_statement returned: {ctx:?}, {partial:?} for query: '{truncated}'");
2232        (ctx, partial)
2233    } else {
2234        // Partial parse - analyze what we have
2235        let (ctx, partial) = analyze_partial(truncated, cursor_pos);
2236        #[cfg(test)]
2237        println!("analyze_partial returned: {ctx:?}, {partial:?} for query: '{truncated}'");
2238        (ctx, partial)
2239    }
2240}
2241
2242#[must_use]
2243pub fn tokenize_query(query: &str) -> Vec<String> {
2244    let mut lexer = Lexer::new(query);
2245    let tokens = lexer.tokenize_all();
2246    tokens.iter().map(|t| format!("{t:?}")).collect()
2247}
2248
2249#[must_use]
2250/// Helper function to find the start of a quoted string searching backwards
2251fn find_quote_start(bytes: &[u8], mut pos: usize) -> Option<usize> {
2252    // Skip the closing quote and search backwards
2253    if pos > 0 {
2254        pos -= 1;
2255        while pos > 0 {
2256            if bytes[pos] == b'"' {
2257                // Check if it's not an escaped quote
2258                if pos == 0 || bytes[pos - 1] != b'\\' {
2259                    return Some(pos);
2260                }
2261            }
2262            pos -= 1;
2263        }
2264        // Check position 0 separately
2265        if bytes[0] == b'"' {
2266            return Some(0);
2267        }
2268    }
2269    None
2270}
2271
2272/// Helper function to handle method call context after validation
2273fn handle_method_call_context(col_name: &str, after_dot: &str) -> (CursorContext, Option<String>) {
2274    // Check if there's a partial method name after the dot
2275    let partial_method = if after_dot.is_empty() {
2276        None
2277    } else if after_dot.chars().all(|c| c.is_alphanumeric() || c == '_') {
2278        Some(after_dot.to_string())
2279    } else {
2280        None
2281    };
2282
2283    // For AfterColumn context, strip quotes if present for consistency
2284    let col_name_for_context =
2285        if col_name.starts_with('"') && col_name.ends_with('"') && col_name.len() > 2 {
2286            col_name[1..col_name.len() - 1].to_string()
2287        } else {
2288            col_name.to_string()
2289        };
2290
2291    (
2292        CursorContext::AfterColumn(col_name_for_context),
2293        partial_method,
2294    )
2295}
2296
2297/// Helper function to check if we're after a comparison operator
2298fn check_after_comparison_operator(query: &str) -> Option<(CursorContext, Option<String>)> {
2299    for op in &Parser::COMPARISON_OPERATORS {
2300        if let Some(op_pos) = query.rfind(op) {
2301            let before_op = safe_slice_to(query, op_pos);
2302            let after_op_start = op_pos + op.len();
2303            let after_op = if after_op_start < query.len() {
2304                &query[after_op_start..]
2305            } else {
2306                ""
2307            };
2308
2309            // Check if we have a column name before the operator
2310            if let Some(col_name) = before_op.split_whitespace().last() {
2311                if col_name.chars().all(|c| c.is_alphanumeric() || c == '_') {
2312                    // Check if we're at or near the end of the query
2313                    let after_op_trimmed = after_op.trim();
2314                    if after_op_trimmed.is_empty()
2315                        || (after_op_trimmed
2316                            .chars()
2317                            .all(|c| c.is_alphanumeric() || c == '_')
2318                            && !after_op_trimmed.contains('('))
2319                    {
2320                        let partial = if after_op_trimmed.is_empty() {
2321                            None
2322                        } else {
2323                            Some(after_op_trimmed.to_string())
2324                        };
2325                        return Some((
2326                            CursorContext::AfterComparisonOp(
2327                                col_name.to_string(),
2328                                op.trim().to_string(),
2329                            ),
2330                            partial,
2331                        ));
2332                    }
2333                }
2334            }
2335        }
2336    }
2337    None
2338}
2339
2340fn analyze_statement(
2341    stmt: &SelectStatement,
2342    query: &str,
2343    _cursor_pos: usize,
2344) -> (CursorContext, Option<String>) {
2345    // First check for method call context (e.g., "columnName." or "columnName.Con")
2346    let trimmed = query.trim();
2347
2348    // Check if we're after a comparison operator (e.g., "createdDate > ")
2349    if let Some(result) = check_after_comparison_operator(query) {
2350        return result;
2351    }
2352
2353    // First check if we're after AND/OR - this takes precedence
2354    // Helper function to check if string ends with a logical operator
2355    let ends_with_logical_op = |s: &str| -> bool {
2356        let s_upper = s.to_uppercase();
2357        s_upper.ends_with(" AND") || s_upper.ends_with(" OR")
2358    };
2359
2360    if ends_with_logical_op(trimmed) {
2361        // Don't check for method context if we're clearly after a logical operator
2362    } else {
2363        // Look for the last dot in the query
2364        if let Some(dot_pos) = trimmed.rfind('.') {
2365            // Check if we're after a column name and dot
2366            let before_dot = safe_slice_to(trimmed, dot_pos);
2367            let after_dot_start = dot_pos + 1;
2368            let after_dot = if after_dot_start < trimmed.len() {
2369                &trimmed[after_dot_start..]
2370            } else {
2371                ""
2372            };
2373
2374            // Check if the part after dot looks like an incomplete method call
2375            // (not a complete method call like "Contains(...)")
2376            if !after_dot.contains('(') {
2377                // Try to extract the column name - could be quoted or regular
2378                let col_name = if before_dot.ends_with('"') {
2379                    // Handle quoted identifier - search backwards for matching opening quote
2380                    let bytes = before_dot.as_bytes();
2381                    let pos = before_dot.len() - 1; // Position of closing quote
2382
2383                    find_quote_start(bytes, pos).map(|start| safe_slice_from(before_dot, start))
2384                } else {
2385                    // Regular identifier - get the last word, handling parentheses
2386                    // Strip all leading parentheses
2387                    before_dot
2388                        .split_whitespace()
2389                        .last()
2390                        .map(|word| word.trim_start_matches('('))
2391                };
2392
2393                if let Some(col_name) = col_name {
2394                    // For quoted identifiers, keep the quotes, for regular identifiers check validity
2395                    let is_valid = Parser::is_valid_identifier(col_name);
2396
2397                    if is_valid {
2398                        return handle_method_call_context(col_name, after_dot);
2399                    }
2400                }
2401            }
2402        }
2403    }
2404
2405    // Check if we're in WHERE clause
2406    if let Some(where_clause) = &stmt.where_clause {
2407        // Check if query ends with AND/OR (with or without trailing space/partial)
2408        let trimmed_upper = trimmed.to_uppercase();
2409        if trimmed_upper.ends_with(" AND") || trimmed_upper.ends_with(" OR") {
2410            let op = if trimmed_upper.ends_with(" AND") {
2411                LogicalOp::And
2412            } else {
2413                LogicalOp::Or
2414            };
2415            return (CursorContext::AfterLogicalOp(op), None);
2416        }
2417
2418        // Check if we have AND/OR followed by a partial word
2419        let query_upper = query.to_uppercase();
2420        if let Some(and_pos) = query_upper.rfind(" AND ") {
2421            let after_and = safe_slice_from(query, and_pos + 5);
2422            let partial = extract_partial_at_end(after_and);
2423            if partial.is_some() {
2424                return (CursorContext::AfterLogicalOp(LogicalOp::And), partial);
2425            }
2426        }
2427
2428        if let Some(or_pos) = query_upper.rfind(" OR ") {
2429            let after_or = safe_slice_from(query, or_pos + 4);
2430            let partial = extract_partial_at_end(after_or);
2431            if partial.is_some() {
2432                return (CursorContext::AfterLogicalOp(LogicalOp::Or), partial);
2433            }
2434        }
2435
2436        if let Some(last_condition) = where_clause.conditions.last() {
2437            if let Some(connector) = &last_condition.connector {
2438                // We're after AND/OR
2439                return (
2440                    CursorContext::AfterLogicalOp(connector.clone()),
2441                    extract_partial_at_end(query),
2442                );
2443            }
2444        }
2445        // We're in WHERE clause but not after AND/OR
2446        return (CursorContext::WhereClause, extract_partial_at_end(query));
2447    }
2448
2449    // Check if we're after ORDER BY
2450    let query_upper = query.to_uppercase();
2451    if query_upper.ends_with(" ORDER BY") {
2452        return (CursorContext::OrderByClause, None);
2453    }
2454
2455    // Check other contexts based on what's in the statement
2456    if stmt.order_by.is_some() {
2457        return (CursorContext::OrderByClause, extract_partial_at_end(query));
2458    }
2459
2460    if stmt.from_table.is_some() && stmt.where_clause.is_none() && stmt.order_by.is_none() {
2461        return (CursorContext::FromClause, extract_partial_at_end(query));
2462    }
2463
2464    if !stmt.columns.is_empty() && stmt.from_table.is_none() {
2465        return (CursorContext::SelectClause, extract_partial_at_end(query));
2466    }
2467
2468    (CursorContext::Unknown, None)
2469}
2470
2471/// Helper function to find the last occurrence of a token type in the token stream
2472fn find_last_token(tokens: &[(usize, usize, Token)], target: &Token) -> Option<usize> {
2473    tokens
2474        .iter()
2475        .rposition(|(_, _, t)| t == target)
2476        .map(|idx| tokens[idx].0)
2477}
2478
2479/// Helper function to find the last occurrence of any matching token
2480fn find_last_matching_token<F>(
2481    tokens: &[(usize, usize, Token)],
2482    predicate: F,
2483) -> Option<(usize, &Token)>
2484where
2485    F: Fn(&Token) -> bool,
2486{
2487    tokens
2488        .iter()
2489        .rposition(|(_, _, t)| predicate(t))
2490        .map(|idx| (tokens[idx].0, &tokens[idx].2))
2491}
2492
2493/// Helper function to check if we're in a specific clause based on tokens
2494fn is_in_clause(
2495    tokens: &[(usize, usize, Token)],
2496    clause_token: Token,
2497    exclude_tokens: &[Token],
2498) -> bool {
2499    // Find the last occurrence of the clause token
2500    if let Some(clause_pos) = find_last_token(tokens, &clause_token) {
2501        // Check if any exclude tokens appear after it
2502        for (pos, _, token) in tokens.iter() {
2503            if *pos > clause_pos && exclude_tokens.contains(token) {
2504                return false;
2505            }
2506        }
2507        return true;
2508    }
2509    false
2510}
2511
2512fn analyze_partial(query: &str, cursor_pos: usize) -> (CursorContext, Option<String>) {
2513    // Tokenize the query up to cursor position
2514    let mut lexer = Lexer::new(query);
2515    let tokens = lexer.tokenize_all_with_positions();
2516
2517    let trimmed = query.trim();
2518
2519    #[cfg(test)]
2520    {
2521        if trimmed.contains("\"Last Name\"") {
2522            eprintln!("DEBUG analyze_partial: query='{query}', trimmed='{trimmed}'");
2523        }
2524    }
2525
2526    // Check if we're after a comparison operator (e.g., "createdDate > ")
2527    if let Some(result) = check_after_comparison_operator(query) {
2528        return result;
2529    }
2530
2531    // Look for the last dot in the query (method call context) - check this FIRST
2532    // before AND/OR detection to properly handle cases like "AND (Country."
2533    if let Some(dot_pos) = trimmed.rfind('.') {
2534        #[cfg(test)]
2535        {
2536            if trimmed.contains("\"Last Name\"") {
2537                eprintln!("DEBUG: Found dot at position {dot_pos}");
2538            }
2539        }
2540        // Check if we're after a column name and dot
2541        let before_dot = &trimmed[..dot_pos];
2542        let after_dot = &trimmed[dot_pos + 1..];
2543
2544        // Check if the part after dot looks like an incomplete method call
2545        // (not a complete method call like "Contains(...)")
2546        if !after_dot.contains('(') {
2547            // Try to extract the column name before the dot
2548            // It could be a quoted identifier like "Last Name" or a regular identifier
2549            let col_name = if before_dot.ends_with('"') {
2550                // Handle quoted identifier - search backwards for matching opening quote
2551                let bytes = before_dot.as_bytes();
2552                let pos = before_dot.len() - 1; // Position of closing quote
2553
2554                #[cfg(test)]
2555                {
2556                    if trimmed.contains("\"Last Name\"") {
2557                        eprintln!("DEBUG: before_dot='{before_dot}', looking for opening quote");
2558                    }
2559                }
2560
2561                let found_start = find_quote_start(bytes, pos);
2562
2563                if let Some(start) = found_start {
2564                    // Extract the full quoted identifier including quotes
2565                    let result = safe_slice_from(before_dot, start);
2566                    #[cfg(test)]
2567                    {
2568                        if trimmed.contains("\"Last Name\"") {
2569                            eprintln!("DEBUG: Extracted quoted identifier: '{result}'");
2570                        }
2571                    }
2572                    Some(result)
2573                } else {
2574                    #[cfg(test)]
2575                    {
2576                        if trimmed.contains("\"Last Name\"") {
2577                            eprintln!("DEBUG: No opening quote found!");
2578                        }
2579                    }
2580                    None
2581                }
2582            } else {
2583                // Regular identifier - get the last word, handling parentheses
2584                // Strip all leading parentheses
2585                before_dot
2586                    .split_whitespace()
2587                    .last()
2588                    .map(|word| word.trim_start_matches('('))
2589            };
2590
2591            if let Some(col_name) = col_name {
2592                #[cfg(test)]
2593                {
2594                    if trimmed.contains("\"Last Name\"") {
2595                        eprintln!("DEBUG: col_name = '{col_name}'");
2596                    }
2597                }
2598
2599                // For quoted identifiers, keep the quotes, for regular identifiers check validity
2600                let is_valid = Parser::is_valid_identifier(col_name);
2601
2602                #[cfg(test)]
2603                {
2604                    if trimmed.contains("\"Last Name\"") {
2605                        eprintln!("DEBUG: is_valid = {is_valid}");
2606                    }
2607                }
2608
2609                if is_valid {
2610                    return handle_method_call_context(col_name, after_dot);
2611                }
2612            }
2613        }
2614    }
2615
2616    // Check if we're after AND/OR using tokens - but only after checking for method calls
2617    if let Some((pos, token)) =
2618        find_last_matching_token(&tokens, |t| matches!(t, Token::And | Token::Or))
2619    {
2620        // Check if cursor is after the logical operator
2621        let token_end_pos = if matches!(token, Token::And) {
2622            pos + 3 // "AND" is 3 characters
2623        } else {
2624            pos + 2 // "OR" is 2 characters
2625        };
2626
2627        if cursor_pos > token_end_pos {
2628            // Extract any partial word after the operator
2629            let after_op = safe_slice_from(query, token_end_pos + 1); // +1 for the space
2630            let partial = extract_partial_at_end(after_op);
2631            let op = if matches!(token, Token::And) {
2632                LogicalOp::And
2633            } else {
2634                LogicalOp::Or
2635            };
2636            return (CursorContext::AfterLogicalOp(op), partial);
2637        }
2638    }
2639
2640    // Check if the last token is AND or OR (handles case where it's at the very end)
2641    if let Some((_, _, last_token)) = tokens.last() {
2642        if matches!(last_token, Token::And | Token::Or) {
2643            let op = if matches!(last_token, Token::And) {
2644                LogicalOp::And
2645            } else {
2646                LogicalOp::Or
2647            };
2648            return (CursorContext::AfterLogicalOp(op), None);
2649        }
2650    }
2651
2652    // Check if we're in ORDER BY clause using tokens
2653    if let Some(order_pos) = find_last_token(&tokens, &Token::OrderBy) {
2654        // Check if there's a BY token after ORDER
2655        let has_by = tokens
2656            .iter()
2657            .any(|(pos, _, t)| *pos > order_pos && matches!(t, Token::By));
2658        if has_by
2659            || tokens
2660                .last()
2661                .map_or(false, |(_, _, t)| matches!(t, Token::OrderBy))
2662        {
2663            return (CursorContext::OrderByClause, extract_partial_at_end(query));
2664        }
2665    }
2666
2667    // Check if we're in WHERE clause using tokens
2668    if is_in_clause(&tokens, Token::Where, &[Token::OrderBy, Token::GroupBy]) {
2669        return (CursorContext::WhereClause, extract_partial_at_end(query));
2670    }
2671
2672    // Check if we're in FROM clause using tokens
2673    if is_in_clause(
2674        &tokens,
2675        Token::From,
2676        &[Token::Where, Token::OrderBy, Token::GroupBy],
2677    ) {
2678        return (CursorContext::FromClause, extract_partial_at_end(query));
2679    }
2680
2681    // Check if we're in SELECT clause using tokens
2682    if find_last_token(&tokens, &Token::Select).is_some()
2683        && find_last_token(&tokens, &Token::From).is_none()
2684    {
2685        return (CursorContext::SelectClause, extract_partial_at_end(query));
2686    }
2687
2688    (CursorContext::Unknown, None)
2689}
2690
2691fn extract_partial_at_end(query: &str) -> Option<String> {
2692    let trimmed = query.trim();
2693
2694    // First check if the last word itself starts with a quote (unclosed quoted identifier being typed)
2695    if let Some(last_word) = trimmed.split_whitespace().last() {
2696        if last_word.starts_with('"') && !last_word.ends_with('"') {
2697            // This is an unclosed quoted identifier like "Cust
2698            return Some(last_word.to_string());
2699        }
2700    }
2701
2702    // Regular identifier extraction
2703    let last_word = trimmed.split_whitespace().last()?;
2704
2705    // Check if it's a partial identifier (not a keyword or operator)
2706    // First check if it's alphanumeric (potential identifier)
2707    if last_word.chars().all(|c| c.is_alphanumeric() || c == '_') {
2708        // Use lexer to determine if it's a keyword or identifier
2709        if !is_sql_keyword(last_word) {
2710            Some(last_word.to_string())
2711        } else {
2712            None
2713        }
2714    } else {
2715        None
2716    }
2717}
2718
2719// Implement the ParsePrimary trait for Parser to use the modular expression parsing
2720impl ParsePrimary for Parser {
2721    fn current_token(&self) -> &Token {
2722        &self.current_token
2723    }
2724
2725    fn advance(&mut self) {
2726        self.advance();
2727    }
2728
2729    fn consume(&mut self, expected: Token) -> Result<(), String> {
2730        self.consume(expected)
2731    }
2732
2733    fn parse_case_expression(&mut self) -> Result<SqlExpression, String> {
2734        self.parse_case_expression()
2735    }
2736
2737    fn parse_function_args(&mut self) -> Result<(Vec<SqlExpression>, bool), String> {
2738        self.parse_function_args()
2739    }
2740
2741    fn parse_window_spec(&mut self) -> Result<WindowSpec, String> {
2742        self.parse_window_spec()
2743    }
2744
2745    fn parse_logical_or(&mut self) -> Result<SqlExpression, String> {
2746        self.parse_logical_or()
2747    }
2748
2749    fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
2750        self.parse_comparison()
2751    }
2752
2753    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2754        self.parse_expression_list()
2755    }
2756
2757    fn parse_subquery(&mut self) -> Result<SelectStatement, String> {
2758        // Parse subquery without parenthesis balance validation
2759        if matches!(self.current_token, Token::With) {
2760            self.parse_with_clause_inner()
2761        } else {
2762            self.parse_select_statement_inner()
2763        }
2764    }
2765}
2766
2767// Implement the ExpressionParser trait for Parser to use the modular expression parsing
2768impl ExpressionParser for Parser {
2769    fn current_token(&self) -> &Token {
2770        &self.current_token
2771    }
2772
2773    fn advance(&mut self) {
2774        // Call the main advance method directly to avoid recursion
2775        match &self.current_token {
2776            Token::LeftParen => self.paren_depth += 1,
2777            Token::RightParen => {
2778                self.paren_depth -= 1;
2779            }
2780            _ => {}
2781        }
2782        self.current_token = self.lexer.next_token();
2783    }
2784
2785    fn peek(&self) -> Option<&Token> {
2786        // We can't return a reference to a token from a temporary lexer,
2787        // so we need a different approach. For now, let's use a workaround
2788        // that checks the next token type without consuming it.
2789        // This is a limitation of the current design.
2790        // A proper fix would be to store the peeked token in the Parser struct.
2791        None // TODO: Implement proper lookahead
2792    }
2793
2794    fn is_at_end(&self) -> bool {
2795        matches!(self.current_token, Token::Eof)
2796    }
2797
2798    fn consume(&mut self, expected: Token) -> Result<(), String> {
2799        // Call the main consume method to avoid recursion
2800        if std::mem::discriminant(&self.current_token) == std::mem::discriminant(&expected) {
2801            self.update_paren_depth(&expected)?;
2802            self.current_token = self.lexer.next_token();
2803            Ok(())
2804        } else {
2805            Err(format!(
2806                "Expected {:?}, found {:?}",
2807                expected, self.current_token
2808            ))
2809        }
2810    }
2811
2812    fn parse_identifier(&mut self) -> Result<String, String> {
2813        if let Token::Identifier(id) = &self.current_token {
2814            let id = id.clone();
2815            self.advance();
2816            Ok(id)
2817        } else {
2818            Err(format!(
2819                "Expected identifier, found {:?}",
2820                self.current_token
2821            ))
2822        }
2823    }
2824}
2825
2826// Implement the ParseArithmetic trait for Parser to use the modular arithmetic parsing
2827impl ParseArithmetic for Parser {
2828    fn current_token(&self) -> &Token {
2829        &self.current_token
2830    }
2831
2832    fn advance(&mut self) {
2833        self.advance();
2834    }
2835
2836    fn consume(&mut self, expected: Token) -> Result<(), String> {
2837        self.consume(expected)
2838    }
2839
2840    fn parse_primary(&mut self) -> Result<SqlExpression, String> {
2841        self.parse_primary()
2842    }
2843
2844    fn parse_multiplicative(&mut self) -> Result<SqlExpression, String> {
2845        self.parse_multiplicative()
2846    }
2847
2848    fn parse_method_args(&mut self) -> Result<Vec<SqlExpression>, String> {
2849        self.parse_method_args()
2850    }
2851}
2852
2853// Implement the ParseComparison trait for Parser to use the modular comparison parsing
2854impl ParseComparison for Parser {
2855    fn current_token(&self) -> &Token {
2856        &self.current_token
2857    }
2858
2859    fn advance(&mut self) {
2860        self.advance();
2861    }
2862
2863    fn consume(&mut self, expected: Token) -> Result<(), String> {
2864        self.consume(expected)
2865    }
2866
2867    fn parse_primary(&mut self) -> Result<SqlExpression, String> {
2868        self.parse_primary()
2869    }
2870
2871    fn parse_additive(&mut self) -> Result<SqlExpression, String> {
2872        self.parse_additive()
2873    }
2874
2875    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2876        self.parse_expression_list()
2877    }
2878
2879    fn parse_subquery(&mut self) -> Result<SelectStatement, String> {
2880        // Parse subquery without parenthesis balance validation
2881        if matches!(self.current_token, Token::With) {
2882            self.parse_with_clause_inner()
2883        } else {
2884            self.parse_select_statement_inner()
2885        }
2886    }
2887}
2888
2889// Implement the ParseLogical trait for Parser to use the modular logical parsing
2890impl ParseLogical for Parser {
2891    fn current_token(&self) -> &Token {
2892        &self.current_token
2893    }
2894
2895    fn advance(&mut self) {
2896        self.advance();
2897    }
2898
2899    fn consume(&mut self, expected: Token) -> Result<(), String> {
2900        self.consume(expected)
2901    }
2902
2903    fn parse_logical_and(&mut self) -> Result<SqlExpression, String> {
2904        self.parse_logical_and()
2905    }
2906
2907    fn parse_base_logical_expression(&mut self) -> Result<SqlExpression, String> {
2908        // This is the base for logical AND - it should parse comparison expressions
2909        // to avoid infinite recursion with parse_expression
2910        self.parse_comparison()
2911    }
2912
2913    fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
2914        self.parse_comparison()
2915    }
2916
2917    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2918        self.parse_expression_list()
2919    }
2920}
2921
2922// Implement the ParseCase trait for Parser to use the modular CASE parsing
2923impl ParseCase for Parser {
2924    fn current_token(&self) -> &Token {
2925        &self.current_token
2926    }
2927
2928    fn advance(&mut self) {
2929        self.advance();
2930    }
2931
2932    fn consume(&mut self, expected: Token) -> Result<(), String> {
2933        self.consume(expected)
2934    }
2935
2936    fn parse_expression(&mut self) -> Result<SqlExpression, String> {
2937        self.parse_expression()
2938    }
2939}
2940
2941fn is_sql_keyword(word: &str) -> bool {
2942    // Use the lexer to check if this word produces a keyword token
2943    let mut lexer = Lexer::new(word);
2944    let token = lexer.next_token();
2945
2946    // Check if it's a keyword token (not an identifier)
2947    !matches!(token, Token::Identifier(_) | Token::Eof)
2948}
2949
2950#[cfg(test)]
2951mod tests {
2952    use super::*;
2953
2954    /// Test that Parser::new() defaults to Standard mode (backward compatible)
2955    #[test]
2956    fn test_parser_mode_default_is_standard() {
2957        let sql = "-- Leading comment\nSELECT * FROM users";
2958        let mut parser = Parser::new(sql);
2959        let stmt = parser.parse().unwrap();
2960
2961        // In Standard mode, comments should be empty
2962        assert!(stmt.leading_comments.is_empty());
2963        assert!(stmt.trailing_comment.is_none());
2964    }
2965
2966    /// Test that PreserveComments mode collects leading comments
2967    #[test]
2968    fn test_parser_mode_preserve_leading_comments() {
2969        let sql = "-- Important query\n-- Author: Alice\nSELECT id, name FROM users";
2970        let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2971        let stmt = parser.parse().unwrap();
2972
2973        // Should have 2 leading comments
2974        assert_eq!(stmt.leading_comments.len(), 2);
2975        assert!(stmt.leading_comments[0].is_line_comment);
2976        assert!(stmt.leading_comments[0].text.contains("Important query"));
2977        assert!(stmt.leading_comments[1].text.contains("Author: Alice"));
2978    }
2979
2980    /// Test that PreserveComments mode collects trailing comments
2981    #[test]
2982    fn test_parser_mode_preserve_trailing_comment() {
2983        let sql = "SELECT * FROM users -- Fetch all users";
2984        let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2985        let stmt = parser.parse().unwrap();
2986
2987        // Should have trailing comment
2988        assert!(stmt.trailing_comment.is_some());
2989        let comment = stmt.trailing_comment.unwrap();
2990        assert!(comment.is_line_comment);
2991        assert!(comment.text.contains("Fetch all users"));
2992    }
2993
2994    /// Test that PreserveComments mode handles block comments
2995    #[test]
2996    fn test_parser_mode_preserve_block_comments() {
2997        let sql = "/* Query explanation */\nSELECT * FROM users";
2998        let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2999        let stmt = parser.parse().unwrap();
3000
3001        // Should have leading block comment
3002        assert_eq!(stmt.leading_comments.len(), 1);
3003        assert!(!stmt.leading_comments[0].is_line_comment); // It's a block comment
3004        assert!(stmt.leading_comments[0].text.contains("Query explanation"));
3005    }
3006
3007    /// Test that PreserveComments mode collects both leading and trailing
3008    #[test]
3009    fn test_parser_mode_preserve_both_comments() {
3010        let sql = "-- Leading\nSELECT * FROM users -- Trailing";
3011        let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
3012        let stmt = parser.parse().unwrap();
3013
3014        // Should have both
3015        assert_eq!(stmt.leading_comments.len(), 1);
3016        assert!(stmt.leading_comments[0].text.contains("Leading"));
3017        assert!(stmt.trailing_comment.is_some());
3018        assert!(stmt.trailing_comment.unwrap().text.contains("Trailing"));
3019    }
3020
3021    /// Test that Standard mode has zero performance overhead (no comment parsing)
3022    #[test]
3023    fn test_parser_mode_standard_ignores_comments() {
3024        let sql = "-- Comment 1\n/* Comment 2 */\nSELECT * FROM users -- Comment 3";
3025        let mut parser = Parser::with_mode(sql, ParserMode::Standard);
3026        let stmt = parser.parse().unwrap();
3027
3028        // Comments should be completely ignored
3029        assert!(stmt.leading_comments.is_empty());
3030        assert!(stmt.trailing_comment.is_none());
3031
3032        // But query should still parse correctly
3033        assert_eq!(stmt.select_items.len(), 1);
3034        assert_eq!(stmt.from_table, Some("users".to_string()));
3035    }
3036
3037    /// Test backward compatibility - existing code using Parser::new() unchanged
3038    #[test]
3039    fn test_parser_backward_compatibility() {
3040        let sql = "SELECT id, name FROM users WHERE active = true";
3041
3042        // Old way (still works, defaults to Standard mode)
3043        let mut parser1 = Parser::new(sql);
3044        let stmt1 = parser1.parse().unwrap();
3045
3046        // Explicit Standard mode (same behavior)
3047        let mut parser2 = Parser::with_mode(sql, ParserMode::Standard);
3048        let stmt2 = parser2.parse().unwrap();
3049
3050        // Both should produce identical ASTs (comments are empty in both)
3051        assert_eq!(stmt1.select_items.len(), stmt2.select_items.len());
3052        assert_eq!(stmt1.from_table, stmt2.from_table);
3053        assert_eq!(stmt1.where_clause.is_some(), stmt2.where_clause.is_some());
3054        assert!(stmt1.leading_comments.is_empty());
3055        assert!(stmt2.leading_comments.is_empty());
3056    }
3057
3058    /// Test PIVOT parsing - currently returns error as execution is not implemented
3059    #[test]
3060    fn test_pivot_parsing_not_yet_supported() {
3061        let sql = "SELECT * FROM food_eaten PIVOT (MAX(AmountEaten) FOR FoodName IN ('Sammich', 'Pickle', 'Apple'))";
3062        let mut parser = Parser::new(sql);
3063        let result = parser.parse();
3064
3065        // PIVOT is now fully supported! Verify parsing succeeds
3066        assert!(result.is_ok());
3067        let stmt = result.unwrap();
3068
3069        // Verify from_source contains a PIVOT
3070        assert!(stmt.from_source.is_some());
3071        if let Some(crate::sql::parser::ast::TableSource::Pivot { .. }) = stmt.from_source {
3072            // Success!
3073        } else {
3074            panic!("Expected from_source to be a Pivot variant");
3075        }
3076    }
3077
3078    /// Test PIVOT syntax with different aggregate functions
3079    #[test]
3080    fn test_pivot_aggregate_functions() {
3081        // Test with SUM - PIVOT is now fully supported!
3082        let sql = "SELECT * FROM sales PIVOT (SUM(amount) FOR month IN ('Jan', 'Feb', 'Mar'))";
3083        let mut parser = Parser::new(sql);
3084        let result = parser.parse();
3085        assert!(result.is_ok());
3086
3087        // Test with COUNT
3088        let sql2 = "SELECT * FROM sales PIVOT (COUNT(*) FOR month IN ('Jan', 'Feb'))";
3089        let mut parser2 = Parser::new(sql2);
3090        let result2 = parser2.parse();
3091        assert!(result2.is_ok());
3092
3093        // Test with AVG
3094        let sql3 = "SELECT * FROM sales PIVOT (AVG(price) FOR category IN ('A', 'B'))";
3095        let mut parser3 = Parser::new(sql3);
3096        let result3 = parser3.parse();
3097        assert!(result3.is_ok());
3098    }
3099
3100    /// Test PIVOT with subquery source
3101    #[test]
3102    fn test_pivot_with_subquery() {
3103        let sql = "SELECT * FROM (SELECT * FROM food_eaten WHERE Id > 5) AS t \
3104                   PIVOT (MAX(AmountEaten) FOR FoodName IN ('Sammich', 'Pickle'))";
3105        let mut parser = Parser::new(sql);
3106        let result = parser.parse();
3107
3108        // PIVOT with subquery is now fully supported!
3109        assert!(result.is_ok());
3110        let stmt = result.unwrap();
3111        assert!(stmt.from_source.is_some());
3112    }
3113
3114    /// Test PIVOT with alias
3115    #[test]
3116    fn test_pivot_with_alias() {
3117        let sql =
3118            "SELECT * FROM sales PIVOT (SUM(amount) FOR month IN ('Jan', 'Feb')) AS pivot_table";
3119        let mut parser = Parser::new(sql);
3120        let result = parser.parse();
3121
3122        // PIVOT with alias is now fully supported!
3123        assert!(result.is_ok());
3124        let stmt = result.unwrap();
3125        assert!(stmt.from_source.is_some());
3126    }
3127}