Skip to main content

sql_cli/sql/
recursive_parser.rs

1// Keep chrono imports for the parser implementation
2
3// Re-exports for backward compatibility - these serve as both imports and re-exports
4pub use super::parser::ast::{
5    CTEType, Comment, Condition, DataFormat, FileCTESpec, FrameBound, FrameUnit, HttpMethod,
6    IntoTable, JoinClause, JoinCondition, JoinOperator, JoinType, LogicalOp, OrderByColumn,
7    OrderByItem, PivotAggregate, SelectItem, SelectStatement, SetOperation, SingleJoinCondition,
8    SortDirection, SqlExpression, TableFunction, TableSource, WebCTESpec, WhenBranch, WhereClause,
9    WindowFrame, WindowSpec, CTE,
10};
11pub use super::parser::legacy::{ParseContext, ParseState, Schema, SqlParser, SqlToken, TableInfo};
12pub use super::parser::lexer::{Lexer, LexerMode, Token};
13pub use super::parser::ParserConfig;
14
15// Re-export formatting functions for backward compatibility
16pub use super::parser::formatter::{format_ast_tree, format_sql_pretty, format_sql_pretty_compact};
17
18// New AST-based formatter
19pub use super::parser::ast_formatter::{format_sql_ast, format_sql_ast_with_config, FormatConfig};
20
21// Import the new expression modules
22use super::parser::expressions::arithmetic::{
23    parse_additive as parse_additive_expr, parse_multiplicative as parse_multiplicative_expr,
24    ParseArithmetic,
25};
26use super::parser::expressions::case::{parse_case_expression as parse_case_expr, ParseCase};
27use super::parser::expressions::comparison::{
28    parse_comparison as parse_comparison_expr, parse_in_operator, ParseComparison,
29};
30use super::parser::expressions::logical::{
31    parse_logical_and as parse_logical_and_expr, parse_logical_or as parse_logical_or_expr,
32    ParseLogical,
33};
34use super::parser::expressions::primary::{
35    parse_primary as parse_primary_expr, ParsePrimary, PrimaryExpressionContext,
36};
37use super::parser::expressions::ExpressionParser;
38
39// Import function registry to check for function existence
40use crate::sql::functions::{FunctionCategory, FunctionRegistry};
41use crate::sql::generators::GeneratorRegistry;
42use std::sync::Arc;
43
44// Import Web CTE parser
45use super::parser::file_cte_parser::FileCteParser;
46use super::parser::web_cte_parser::WebCteParser;
47
48/// Parser mode - controls whether comments are preserved in AST
49#[derive(Debug, Clone, Copy, PartialEq)]
50pub enum ParserMode {
51    /// Standard parsing - skip comments (current behavior, backward compatible)
52    Standard,
53    /// Preserve comments in AST (opt-in for formatters)
54    PreserveComments,
55}
56
57impl Default for ParserMode {
58    fn default() -> Self {
59        ParserMode::Standard
60    }
61}
62
63pub struct Parser {
64    lexer: Lexer,
65    pub current_token: Token,    // Made public for web_cte_parser access
66    in_method_args: bool,        // Track if we're parsing method arguments
67    columns: Vec<String>,        // Known column names for context-aware parsing
68    paren_depth: i32,            // Track parentheses nesting depth
69    paren_depth_stack: Vec<i32>, // Stack to save/restore paren depth for nested contexts
70    _config: ParserConfig,       // Parser configuration including case sensitivity
71    debug_trace: bool,           // Enable detailed token-by-token trace
72    trace_depth: usize,          // Track recursion depth for indented trace
73    function_registry: Arc<FunctionRegistry>, // Function registry for validation
74    generator_registry: Arc<GeneratorRegistry>, // Generator registry for table functions
75    mode: ParserMode,            // Parser mode for comment preservation
76}
77
78impl Parser {
79    #[must_use]
80    pub fn new(input: &str) -> Self {
81        Self::with_mode(input, ParserMode::default())
82    }
83
84    /// Create a new parser with explicit mode for comment preservation
85    #[must_use]
86    pub fn with_mode(input: &str, mode: ParserMode) -> Self {
87        // Choose lexer mode based on parser mode
88        let lexer_mode = match mode {
89            ParserMode::Standard => LexerMode::SkipComments,
90            ParserMode::PreserveComments => LexerMode::PreserveComments,
91        };
92
93        let mut lexer = Lexer::with_mode(input, lexer_mode);
94        let current_token = lexer.next_token();
95        Self {
96            lexer,
97            current_token,
98            in_method_args: false,
99            columns: Vec::new(),
100            paren_depth: 0,
101            paren_depth_stack: Vec::new(),
102            _config: ParserConfig::default(),
103            debug_trace: false,
104            trace_depth: 0,
105            function_registry: Arc::new(FunctionRegistry::new()),
106            generator_registry: Arc::new(GeneratorRegistry::new()),
107            mode,
108        }
109    }
110
111    #[must_use]
112    pub fn with_config(input: &str, config: ParserConfig) -> Self {
113        let mut lexer = Lexer::new(input);
114        let current_token = lexer.next_token();
115        Self {
116            lexer,
117            current_token,
118            in_method_args: false,
119            columns: Vec::new(),
120            paren_depth: 0,
121            paren_depth_stack: Vec::new(),
122            _config: config,
123            debug_trace: false,
124            trace_depth: 0,
125            function_registry: Arc::new(FunctionRegistry::new()),
126            generator_registry: Arc::new(GeneratorRegistry::new()),
127            mode: ParserMode::default(),
128        }
129    }
130
131    #[must_use]
132    pub fn with_columns(mut self, columns: Vec<String>) -> Self {
133        self.columns = columns;
134        self
135    }
136
137    #[must_use]
138    pub fn with_debug_trace(mut self, enabled: bool) -> Self {
139        self.debug_trace = enabled;
140        self
141    }
142
143    #[must_use]
144    pub fn with_function_registry(mut self, registry: Arc<FunctionRegistry>) -> Self {
145        self.function_registry = registry;
146        self
147    }
148
149    #[must_use]
150    pub fn with_generator_registry(mut self, registry: Arc<GeneratorRegistry>) -> Self {
151        self.generator_registry = registry;
152        self
153    }
154
155    fn trace_enter(&mut self, context: &str) {
156        if self.debug_trace {
157            let indent = "  ".repeat(self.trace_depth);
158            eprintln!("{}→ {} | Token: {:?}", indent, context, self.current_token);
159            self.trace_depth += 1;
160        }
161    }
162
163    fn trace_exit(&mut self, context: &str, result: &Result<impl std::fmt::Debug, String>) {
164        if self.debug_trace {
165            self.trace_depth = self.trace_depth.saturating_sub(1);
166            let indent = "  ".repeat(self.trace_depth);
167            match result {
168                Ok(val) => eprintln!("{}← {} ✓ | Result: {:?}", indent, context, val),
169                Err(e) => eprintln!("{}← {} ✗ | Error: {}", indent, context, e),
170            }
171        }
172    }
173
174    fn trace_token(&self, action: &str) {
175        if self.debug_trace {
176            let indent = "  ".repeat(self.trace_depth);
177            eprintln!("{}  {} | Token: {:?}", indent, action, self.current_token);
178        }
179    }
180
181    #[allow(dead_code)]
182    fn peek_token(&self) -> Option<Token> {
183        // Alternative peek that returns owned token
184        let mut temp_lexer = self.lexer.clone();
185        let next_token = temp_lexer.next_token();
186        if matches!(next_token, Token::Eof) {
187            None
188        } else {
189            Some(next_token)
190        }
191    }
192
193    /// Check if current token is one of the reserved keywords that should stop parsing
194    /// Check if an identifier string is a reserved keyword (for backward compatibility)
195    /// This is used when the lexer hasn't properly tokenized keywords and they come through
196    /// as Token::Identifier instead of their proper token types
197    fn is_identifier_reserved(id: &str) -> bool {
198        let id_upper = id.to_uppercase();
199        matches!(
200            id_upper.as_str(),
201            "ORDER" | "HAVING" | "LIMIT" | "OFFSET" | "UNION" | "INTERSECT" | "EXCEPT"
202        )
203    }
204
205    /// Get comparison operator string representation (for autocomplete context)
206    const COMPARISON_OPERATORS: [&'static str; 6] = [" > ", " < ", " >= ", " <= ", " = ", " != "];
207
208    pub fn consume(&mut self, expected: Token) -> Result<(), String> {
209        self.trace_token(&format!("Consuming expected {:?}", expected));
210        if std::mem::discriminant(&self.current_token) == std::mem::discriminant(&expected) {
211            // Track parentheses depth
212            self.update_paren_depth(&expected)?;
213
214            self.current_token = self.lexer.next_token();
215            Ok(())
216        } else {
217            // Provide better error messages for common cases
218            let error_msg = match (&expected, &self.current_token) {
219                (Token::RightParen, Token::Eof) if self.paren_depth > 0 => {
220                    format!(
221                        "Unclosed parenthesis - missing {} closing parenthes{}",
222                        self.paren_depth,
223                        if self.paren_depth == 1 { "is" } else { "es" }
224                    )
225                }
226                (Token::RightParen, _) if self.paren_depth > 0 => {
227                    format!(
228                        "Expected closing parenthesis but found {:?} (currently {} unclosed parenthes{})",
229                        self.current_token,
230                        self.paren_depth,
231                        if self.paren_depth == 1 { "is" } else { "es" }
232                    )
233                }
234                _ => format!("Expected {:?}, found {:?}", expected, self.current_token),
235            };
236            Err(error_msg)
237        }
238    }
239
240    pub fn advance(&mut self) {
241        // Track parentheses depth when advancing
242        match &self.current_token {
243            Token::LeftParen => self.paren_depth += 1,
244            Token::RightParen => {
245                self.paren_depth -= 1;
246                // Note: We don't check for < 0 here because advance() is used
247                // in contexts where we're not necessarily expecting a right paren
248            }
249            _ => {}
250        }
251        let old_token = self.current_token.clone();
252        self.current_token = self.lexer.next_token();
253        if self.debug_trace {
254            let indent = "  ".repeat(self.trace_depth);
255            eprintln!(
256                "{}  Advanced: {:?} → {:?}",
257                indent, old_token, self.current_token
258            );
259        }
260    }
261
262    /// Collect all leading comments before a SQL construct
263    /// This consumes comment tokens and returns them as a Vec<Comment>
264    fn collect_leading_comments(&mut self) -> Vec<Comment> {
265        let mut comments = Vec::new();
266        loop {
267            match &self.current_token {
268                Token::LineComment(text) => {
269                    comments.push(Comment::line(text.clone()));
270                    self.advance();
271                }
272                Token::BlockComment(text) => {
273                    comments.push(Comment::block(text.clone()));
274                    self.advance();
275                }
276                _ => break,
277            }
278        }
279        comments
280    }
281
282    /// Collect a trailing inline comment (on the same line)
283    /// This consumes a single comment token if present
284    fn collect_trailing_comment(&mut self) -> Option<Comment> {
285        match &self.current_token {
286            Token::LineComment(text) => {
287                let comment = Some(Comment::line(text.clone()));
288                self.advance();
289                comment
290            }
291            Token::BlockComment(text) => {
292                let comment = Some(Comment::block(text.clone()));
293                self.advance();
294                comment
295            }
296            _ => None,
297        }
298    }
299
300    fn push_paren_depth(&mut self) {
301        self.paren_depth_stack.push(self.paren_depth);
302        self.paren_depth = 0;
303    }
304
305    fn pop_paren_depth(&mut self) {
306        if let Some(depth) = self.paren_depth_stack.pop() {
307            // Ignore the internal depth - just restore the saved value
308            self.paren_depth = depth;
309        }
310    }
311
312    pub fn parse(&mut self) -> Result<SelectStatement, String> {
313        self.trace_enter("parse");
314
315        // Collect leading comments FIRST (before checking for WITH or SELECT)
316        // This allows comments before WITH clauses to be preserved
317        let leading_comments = if self.mode == ParserMode::PreserveComments {
318            self.collect_leading_comments()
319        } else {
320            vec![]
321        };
322
323        // Now check for WITH clause (after consuming comments)
324        let result = if matches!(self.current_token, Token::With) {
325            let mut stmt = self.parse_with_clause()?;
326            // Attach the leading comments we collected
327            stmt.leading_comments = leading_comments;
328            stmt
329        } else {
330            // For SELECT without WITH, pass comments to inner parser
331            let stmt = self.parse_select_statement_with_comments_public(leading_comments)?;
332            self.check_balanced_parentheses()?;
333            stmt
334        };
335
336        self.trace_exit("parse", &Ok(&result));
337        Ok(result)
338    }
339
340    /// Public wrapper that accepts pre-collected comments and checks parens
341    fn parse_select_statement_with_comments_public(
342        &mut self,
343        comments: Vec<Comment>,
344    ) -> Result<SelectStatement, String> {
345        self.parse_select_statement_with_comments(comments)
346    }
347
348    fn parse_with_clause(&mut self) -> Result<SelectStatement, String> {
349        self.consume(Token::With)?;
350        let ctes = self.parse_cte_list()?;
351
352        // Parse the main SELECT statement - use inner version since we're already tracking parens
353        let mut main_query = self.parse_select_statement_inner_no_comments()?;
354        main_query.ctes = ctes;
355
356        // Check for balanced parentheses at the end of parsing
357        self.check_balanced_parentheses()?;
358
359        Ok(main_query)
360    }
361
362    fn parse_with_clause_inner(&mut self) -> Result<SelectStatement, String> {
363        self.consume(Token::With)?;
364        let ctes = self.parse_cte_list()?;
365
366        // Parse the main SELECT statement (without parenthesis checking for subqueries)
367        let mut main_query = self.parse_select_statement_inner()?;
368        main_query.ctes = ctes;
369
370        Ok(main_query)
371    }
372
373    // Helper function to parse CTE list - eliminates duplication
374    fn parse_cte_list(&mut self) -> Result<Vec<CTE>, String> {
375        let mut ctes = Vec::new();
376
377        // Parse CTEs
378        loop {
379            // Check for WEB keyword before the CTE name (WEB uses an outer marker).
380            // FILE CTEs are detected *inside* the parens after AS, since the design
381            // doc uses `WITH name AS (FILE PATH '...')` rather than `WITH FILE name ...`.
382            let is_web = if matches!(&self.current_token, Token::Web) {
383                self.trace_token("Found WEB keyword for CTE");
384                self.advance();
385                true
386            } else {
387                false
388            };
389
390            // Parse CTE name - allow keywords as CTE names since they're valid identifiers in this context
391            let name = match &self.current_token {
392                Token::Identifier(name) => name.clone(),
393                token => {
394                    // Check if this is a keyword that can be used as an identifier
395                    if let Some(keyword) = token.as_keyword_str() {
396                        // Allow keywords as CTE names (they're valid in this context)
397                        keyword.to_lowercase()
398                    } else {
399                        return Err(format!(
400                            "Expected CTE name after {}",
401                            if is_web { "WEB" } else { "WITH or comma" }
402                        ));
403                    }
404                }
405            };
406            self.advance();
407
408            // Optional column list: WITH t(col1, col2) AS ...
409            let column_list = if matches!(self.current_token, Token::LeftParen) {
410                self.advance();
411                let cols = self.parse_identifier_list()?;
412                self.consume(Token::RightParen)?;
413                Some(cols)
414            } else {
415                None
416            };
417
418            // Expect AS
419            self.consume(Token::As)?;
420
421            let cte_type = if is_web {
422                // Expect opening parenthesis for WEB CTE
423                self.consume(Token::LeftParen)?;
424                // Parse WEB CTE specification using dedicated parser
425                let web_spec = WebCteParser::parse(self)?;
426                // Consume closing parenthesis for WEB CTE
427                self.consume(Token::RightParen)?;
428                CTEType::Web(web_spec)
429            } else {
430                // Push depth BEFORE consuming the opening paren, matching the
431                // original standard-CTE flow. This keeps the `(`...`)` pair
432                // balanced inside the inner context.
433                self.push_paren_depth();
434                self.consume(Token::LeftParen)?;
435
436                let result = if matches!(&self.current_token, Token::File) {
437                    self.trace_token("Found FILE keyword inside CTE parens");
438                    self.advance();
439                    let file_spec = FileCteParser::parse(self)?;
440                    CTEType::File(file_spec)
441                } else {
442                    let query = self.parse_select_statement_inner()?;
443                    CTEType::Standard(query)
444                };
445
446                // Expect closing parenthesis while still in CTE context
447                self.consume(Token::RightParen)?;
448                // Now pop to restore outer depth after consuming both parens
449                self.pop_paren_depth();
450                result
451            };
452
453            ctes.push(CTE {
454                name,
455                column_list,
456                cte_type,
457            });
458
459            // Check for more CTEs
460            if !matches!(self.current_token, Token::Comma) {
461                break;
462            }
463            self.advance();
464        }
465
466        Ok(ctes)
467    }
468
469    /// Helper function to parse an optional table alias (with or without AS keyword)
470    fn parse_optional_alias(&mut self) -> Result<Option<String>, String> {
471        if matches!(self.current_token, Token::As) {
472            self.advance();
473            match &self.current_token {
474                Token::Identifier(name) => {
475                    let alias = name.clone();
476                    self.advance();
477                    Ok(Some(alias))
478                }
479                token => {
480                    // Check if it's a reserved keyword - provide helpful error
481                    if let Some(keyword) = token.as_keyword_str() {
482                        Err(format!(
483                            "Reserved keyword '{}' cannot be used as column alias. Use a different name or quote it with double quotes: \"{}\"",
484                            keyword,
485                            keyword.to_lowercase()
486                        ))
487                    } else {
488                        Err("Expected alias name after AS".to_string())
489                    }
490                }
491            }
492        } else if let Token::Identifier(name) = &self.current_token {
493            // AS is optional for table aliases
494            let alias = name.clone();
495            self.advance();
496            Ok(Some(alias))
497        } else {
498            Ok(None)
499        }
500    }
501
502    /// Helper function to check if an identifier is valid (quoted or regular)
503    fn is_valid_identifier(name: &str) -> bool {
504        if name.starts_with('"') && name.ends_with('"') {
505            // Quoted identifier - always valid
506            true
507        } else {
508            // Regular identifier - check if it's alphanumeric or underscore
509            name.chars().all(|c| c.is_alphanumeric() || c == '_')
510        }
511    }
512
513    /// Helper function to update parentheses depth tracking
514    fn update_paren_depth(&mut self, token: &Token) -> Result<(), String> {
515        match token {
516            Token::LeftParen => self.paren_depth += 1,
517            Token::RightParen => {
518                self.paren_depth -= 1;
519                // Check for extra closing parenthesis
520                if self.paren_depth < 0 {
521                    return Err(
522                        "Unexpected closing parenthesis - no matching opening parenthesis"
523                            .to_string(),
524                    );
525                }
526            }
527            _ => {}
528        }
529        Ok(())
530    }
531
532    /// Helper function to parse comma-separated argument list
533    fn parse_argument_list(&mut self) -> Result<Vec<SqlExpression>, String> {
534        let mut args = Vec::new();
535
536        if !matches!(self.current_token, Token::RightParen) {
537            loop {
538                args.push(self.parse_expression()?);
539
540                if matches!(self.current_token, Token::Comma) {
541                    self.advance();
542                } else {
543                    break;
544                }
545            }
546        }
547
548        Ok(args)
549    }
550
551    /// Helper function to check for balanced parentheses at the end of parsing
552    fn check_balanced_parentheses(&self) -> Result<(), String> {
553        if self.paren_depth > 0 {
554            Err(format!(
555                "Unclosed parenthesis - missing {} closing parenthes{}",
556                self.paren_depth,
557                if self.paren_depth == 1 { "is" } else { "es" }
558            ))
559        } else if self.paren_depth < 0 {
560            Err("Extra closing parenthesis found - no matching opening parenthesis".to_string())
561        } else {
562            Ok(())
563        }
564    }
565
566    /// Check if an expression contains aggregate functions (COUNT, SUM, AVG, etc.)
567    /// This is used to detect unsupported patterns in HAVING clause
568    fn contains_aggregate_function(expr: &SqlExpression) -> bool {
569        match expr {
570            SqlExpression::FunctionCall { name, args, .. } => {
571                // Check if this is an aggregate function
572                let upper_name = name.to_uppercase();
573                let is_aggregate = matches!(
574                    upper_name.as_str(),
575                    "COUNT" | "SUM" | "AVG" | "MIN" | "MAX" | "GROUP_CONCAT" | "STRING_AGG"
576                );
577
578                // If this is an aggregate, return true
579                // Otherwise, recursively check arguments
580                is_aggregate || args.iter().any(Self::contains_aggregate_function)
581            }
582            // Recursively check nested expressions
583            SqlExpression::BinaryOp { left, right, .. } => {
584                Self::contains_aggregate_function(left) || Self::contains_aggregate_function(right)
585            }
586            SqlExpression::Not { expr } => Self::contains_aggregate_function(expr),
587            SqlExpression::MethodCall { args, .. } => {
588                args.iter().any(Self::contains_aggregate_function)
589            }
590            SqlExpression::ChainedMethodCall { base, args, .. } => {
591                Self::contains_aggregate_function(base)
592                    || args.iter().any(Self::contains_aggregate_function)
593            }
594            SqlExpression::CaseExpression {
595                when_branches,
596                else_branch,
597            } => {
598                when_branches.iter().any(|branch| {
599                    Self::contains_aggregate_function(&branch.condition)
600                        || Self::contains_aggregate_function(&branch.result)
601                }) || else_branch
602                    .as_ref()
603                    .map_or(false, |e| Self::contains_aggregate_function(e))
604            }
605            SqlExpression::SimpleCaseExpression {
606                expr,
607                when_branches,
608                else_branch,
609            } => {
610                Self::contains_aggregate_function(expr)
611                    || when_branches.iter().any(|branch| {
612                        Self::contains_aggregate_function(&branch.value)
613                            || Self::contains_aggregate_function(&branch.result)
614                    })
615                    || else_branch
616                        .as_ref()
617                        .map_or(false, |e| Self::contains_aggregate_function(e))
618            }
619            SqlExpression::ScalarSubquery { query } => {
620                // Subqueries can have their own aggregates, but that's fine
621                // We're only checking the outer HAVING clause
622                query
623                    .having
624                    .as_ref()
625                    .map_or(false, |h| Self::contains_aggregate_function(h))
626            }
627            // Leaf nodes - no aggregates
628            SqlExpression::Column(_)
629            | SqlExpression::StringLiteral(_)
630            | SqlExpression::NumberLiteral(_)
631            | SqlExpression::BooleanLiteral(_)
632            | SqlExpression::Null
633            | SqlExpression::DateTimeConstructor { .. }
634            | SqlExpression::DateTimeToday { .. } => false,
635
636            // Window functions contain aggregates by definition
637            SqlExpression::WindowFunction { .. } => true,
638
639            // Between has three parts to check
640            SqlExpression::Between { expr, lower, upper } => {
641                Self::contains_aggregate_function(expr)
642                    || Self::contains_aggregate_function(lower)
643                    || Self::contains_aggregate_function(upper)
644            }
645
646            // IN list - check expr and all values
647            SqlExpression::InList { expr, values } | SqlExpression::NotInList { expr, values } => {
648                Self::contains_aggregate_function(expr)
649                    || values.iter().any(Self::contains_aggregate_function)
650            }
651
652            // IN subquery - check expr and subquery
653            SqlExpression::InSubquery { expr, subquery }
654            | SqlExpression::NotInSubquery { expr, subquery } => {
655                Self::contains_aggregate_function(expr)
656                    || subquery
657                        .having
658                        .as_ref()
659                        .map_or(false, |h| Self::contains_aggregate_function(h))
660            }
661
662            // UNNEST - check column expression
663            SqlExpression::Unnest { column, .. } => Self::contains_aggregate_function(column),
664        }
665    }
666
667    fn parse_select_statement(&mut self) -> Result<SelectStatement, String> {
668        self.trace_enter("parse_select_statement");
669        let result = self.parse_select_statement_inner()?;
670
671        // Check for balanced parentheses at the end of parsing
672        self.check_balanced_parentheses()?;
673
674        Ok(result)
675    }
676
677    fn parse_select_statement_inner(&mut self) -> Result<SelectStatement, String> {
678        // Collect leading comments ONLY in PreserveComments mode
679        let leading_comments = if self.mode == ParserMode::PreserveComments {
680            self.collect_leading_comments()
681        } else {
682            vec![]
683        };
684
685        self.parse_select_statement_with_comments(leading_comments)
686    }
687
688    /// Parse SELECT statement without collecting leading comments
689    /// Used when comments were already collected (e.g., before WITH clause)
690    fn parse_select_statement_inner_no_comments(&mut self) -> Result<SelectStatement, String> {
691        self.parse_select_statement_with_comments(vec![])
692    }
693
694    /// Core SELECT parsing logic - takes pre-collected comments
695    fn parse_select_statement_with_comments(
696        &mut self,
697        leading_comments: Vec<Comment>,
698    ) -> Result<SelectStatement, String> {
699        self.consume(Token::Select)?;
700
701        // Check for DISTINCT keyword
702        let distinct = if matches!(self.current_token, Token::Distinct) {
703            self.advance();
704            true
705        } else {
706            false
707        };
708
709        // Parse SELECT items (supports computed expressions)
710        let select_items = self.parse_select_items()?;
711
712        // Create legacy columns vector for backward compatibility
713        let columns = select_items
714            .iter()
715            .map(|item| match item {
716                SelectItem::Star { .. } => "*".to_string(),
717                SelectItem::StarExclude { .. } => "*".to_string(), // Treated as * in legacy columns
718                SelectItem::Column {
719                    column: col_ref, ..
720                } => col_ref.name.clone(),
721                SelectItem::Expression { alias, .. } => alias.clone(),
722            })
723            .collect();
724
725        // Parse INTO clause (for temporary tables) - comes immediately after SELECT items
726        let into_table = if matches!(self.current_token, Token::Into) {
727            self.advance();
728            Some(self.parse_into_clause()?)
729        } else {
730            None
731        };
732
733        // Parse FROM clause - can be a table name, subquery, or table function
734        let (from_table, from_subquery, from_function, from_alias) = if matches!(
735            self.current_token,
736            Token::From
737        ) {
738            self.advance();
739
740            // Check for table function like RANGE()
741            // Also handle keywords that could be table/CTE names
742            let table_or_function_name = match &self.current_token {
743                Token::Identifier(name) => Some(name.clone()),
744                token => {
745                    // Check if it's a keyword that can be used as table/CTE name
746                    token.as_keyword_str().map(|k| k.to_lowercase())
747                }
748            };
749
750            if let Some(name) = table_or_function_name {
751                // Check if this is a table function by consulting the registry
752                // We need to lookahead to see if there's a parenthesis to distinguish
753                // between a function call and a table with the same name
754                let has_paren = self.peek_token() == Some(Token::LeftParen);
755                if self.debug_trace {
756                    eprintln!(
757                        "  Checking {} for table function, has_paren={}",
758                        name, has_paren
759                    );
760                }
761
762                // Check if it's a known table function or generator
763                // In FROM clause context, prioritize generators over scalar functions
764                let is_table_function = if has_paren {
765                    // First check generator registry (for FROM clause context)
766                    if self.debug_trace {
767                        eprintln!("  Checking generator registry for {}", name.to_uppercase());
768                    }
769                    if let Some(_gen) = self.generator_registry.get(&name.to_uppercase()) {
770                        if self.debug_trace {
771                            eprintln!("  Found {} in generator registry", name);
772                        }
773                        self.trace_token(&format!("Found generator: {}", name));
774                        true
775                    } else {
776                        // Then check if it's a table function in the function registry
777                        if let Some(func) = self.function_registry.get(&name.to_uppercase()) {
778                            let sig = func.signature();
779                            let is_table_fn = sig.category == FunctionCategory::TableFunction;
780                            if self.debug_trace {
781                                eprintln!(
782                                    "  Found {} in function registry, is_table_function={}",
783                                    name, is_table_fn
784                                );
785                            }
786                            if is_table_fn {
787                                self.trace_token(&format!(
788                                    "Found table function in function registry: {}",
789                                    name
790                                ));
791                            }
792                            is_table_fn
793                        } else {
794                            if self.debug_trace {
795                                eprintln!("  {} not found in either registry", name);
796                                self.trace_token(&format!(
797                                    "Not found as generator or table function: {}",
798                                    name
799                                ));
800                            }
801                            false
802                        }
803                    }
804                } else {
805                    if self.debug_trace {
806                        eprintln!("  No parenthesis after {}, treating as table", name);
807                    }
808                    false
809                };
810
811                if is_table_function {
812                    // Parse table function
813                    let function_name = name.clone();
814                    self.advance(); // Skip function name
815
816                    // Parse arguments
817                    self.consume(Token::LeftParen)?;
818                    let args = self.parse_argument_list()?;
819                    self.consume(Token::RightParen)?;
820
821                    // Optional alias
822                    let alias = if matches!(self.current_token, Token::As) {
823                        self.advance();
824                        match &self.current_token {
825                            Token::Identifier(name) => {
826                                let alias = name.clone();
827                                self.advance();
828                                Some(alias)
829                            }
830                            token => {
831                                if let Some(keyword) = token.as_keyword_str() {
832                                    return Err(format!(
833                                            "Reserved keyword '{}' cannot be used as column alias. Use a different name or quote it with double quotes: \"{}\"",
834                                            keyword,
835                                            keyword.to_lowercase()
836                                        ));
837                                } else {
838                                    return Err("Expected alias name after AS".to_string());
839                                }
840                            }
841                        }
842                    } else if let Token::Identifier(name) = &self.current_token {
843                        let alias = name.clone();
844                        self.advance();
845                        Some(alias)
846                    } else {
847                        None
848                    };
849
850                    (
851                        None,
852                        None,
853                        Some(TableFunction::Generator {
854                            name: function_name,
855                            args,
856                        }),
857                        alias,
858                    )
859                } else {
860                    // Not a RANGE, SPLIT, or generator function, so it's a regular table name
861                    let table_name = name.clone();
862                    self.advance();
863
864                    // Check for optional alias
865                    let alias = self.parse_optional_alias()?;
866
867                    (Some(table_name), None, None, alias)
868                }
869            } else if matches!(self.current_token, Token::LeftParen) {
870                // Check for subquery: FROM (SELECT ...) or FROM (WITH ... SELECT ...)
871                self.advance();
872
873                // Parse the subquery - it might start with WITH
874                let subquery = if matches!(self.current_token, Token::With) {
875                    self.parse_with_clause_inner()?
876                } else {
877                    self.parse_select_statement_inner()?
878                };
879
880                self.consume(Token::RightParen)?;
881
882                // Subqueries must have an alias
883                let alias = if matches!(self.current_token, Token::As) {
884                    self.advance();
885                    match &self.current_token {
886                        Token::Identifier(name) => {
887                            let alias = name.clone();
888                            self.advance();
889                            alias
890                        }
891                        token => {
892                            if let Some(keyword) = token.as_keyword_str() {
893                                return Err(format!(
894                                        "Reserved keyword '{}' cannot be used as subquery alias. Use a different name or quote it with double quotes: \"{}\"",
895                                        keyword,
896                                        keyword.to_lowercase()
897                                    ));
898                            } else {
899                                return Err("Expected alias name after AS".to_string());
900                            }
901                        }
902                    }
903                } else {
904                    // AS is optional, but alias is required
905                    match &self.current_token {
906                        Token::Identifier(name) => {
907                            let alias = name.clone();
908                            self.advance();
909                            alias
910                        }
911                        _ => {
912                            return Err(
913                                "Subquery in FROM must have an alias (e.g., AS t)".to_string()
914                            )
915                        }
916                    }
917                };
918
919                (None, Some(Box::new(subquery)), None, Some(alias))
920            } else {
921                // Regular table name - handle identifiers and keywords
922                let table_name = match &self.current_token {
923                    Token::Identifier(table) => table.clone(),
924                    Token::QuotedIdentifier(table) => table.clone(),
925                    token => {
926                        // Check if it's a keyword that can be used as table/CTE name
927                        if let Some(keyword) = token.as_keyword_str() {
928                            keyword.to_lowercase()
929                        } else {
930                            return Err("Expected table name or subquery after FROM".to_string());
931                        }
932                    }
933                };
934
935                self.advance();
936
937                // Check for optional alias
938                let alias = self.parse_optional_alias()?;
939
940                (Some(table_name), None, None, alias)
941            }
942        } else {
943            (None, None, None, None)
944        };
945
946        // Check for PIVOT after FROM table source
947        // PIVOT wraps the FROM table/subquery before JOINs are processed
948        // This creates a PIVOT TableSource that will be processed by PivotExpander transformer
949        let pivot_source = if matches!(self.current_token, Token::Pivot) {
950            // Build a TableSource from the current FROM clause
951            let source = if let Some(ref table_name) = from_table {
952                TableSource::Table(table_name.clone())
953            } else if let Some(ref subquery) = from_subquery {
954                TableSource::DerivedTable {
955                    query: subquery.clone(),
956                    alias: from_alias.clone().unwrap_or_default(),
957                }
958            } else {
959                return Err("PIVOT requires a table or subquery source".to_string());
960            };
961
962            // Parse the PIVOT clause - this wraps the source in a Pivot TableSource
963            let pivoted = self.parse_pivot_clause(source)?;
964            Some(pivoted)
965        } else {
966            None
967        };
968
969        // Parse JOIN clauses
970        let mut joins = Vec::new();
971        while self.is_join_token() {
972            joins.push(self.parse_join_clause()?);
973        }
974
975        let where_clause = if matches!(self.current_token, Token::Where) {
976            self.advance();
977            Some(self.parse_where_clause()?)
978        } else {
979            None
980        };
981
982        let group_by = if matches!(self.current_token, Token::GroupBy) {
983            self.advance();
984            // Parse expressions instead of just identifiers for GROUP BY
985            // This allows GROUP BY TIME_BUCKET(...), CASE ..., etc.
986            Some(self.parse_expression_list()?)
987        } else {
988            None
989        };
990
991        // Parse HAVING clause (must come after GROUP BY)
992        let having = if matches!(self.current_token, Token::Having) {
993            if group_by.is_none() {
994                return Err("HAVING clause requires GROUP BY".to_string());
995            }
996            self.advance();
997            let having_expr = self.parse_expression()?;
998
999            // Note: Aggregate functions in HAVING are now supported via the
1000            // HavingAliasTransformer preprocessing step, which automatically
1001            // adds aliases and rewrites the HAVING clause to use them.
1002
1003            Some(having_expr)
1004        } else {
1005            None
1006        };
1007
1008        // Parse QUALIFY clause (Snowflake-style window function filtering)
1009        // QUALIFY filters on window function results without needing a subquery
1010        // Example: SELECT *, ROW_NUMBER() OVER (...) AS rn FROM t QUALIFY rn <= 3
1011        let qualify = if matches!(self.current_token, Token::Qualify) {
1012            self.advance();
1013            let qualify_expr = self.parse_expression()?;
1014
1015            // Note: QUALIFY is handled by the QualifyToWhereTransformer preprocessing step
1016            // which converts it to WHERE after window functions are lifted to CTEs
1017
1018            Some(qualify_expr)
1019        } else {
1020            None
1021        };
1022
1023        // Parse ORDER BY clause (comes after GROUP BY, HAVING, and QUALIFY)
1024        let order_by = if matches!(self.current_token, Token::OrderBy) {
1025            self.trace_token("Found OrderBy token");
1026            self.advance();
1027            Some(self.parse_order_by_list()?)
1028        } else if let Token::Identifier(s) = &self.current_token {
1029            // This shouldn't happen if the lexer properly tokenizes ORDER BY
1030            // But keeping as fallback for compatibility
1031            if Self::is_identifier_reserved(s) && s.to_uppercase() == "ORDER" {
1032                self.trace_token("Warning: ORDER as identifier instead of OrderBy token");
1033                self.advance(); // consume ORDER
1034                if matches!(&self.current_token, Token::By) {
1035                    self.advance(); // consume BY
1036                    Some(self.parse_order_by_list()?)
1037                } else {
1038                    return Err("Expected BY after ORDER".to_string());
1039                }
1040            } else {
1041                None
1042            }
1043        } else {
1044            None
1045        };
1046
1047        // Parse LIMIT clause
1048        let limit = if matches!(self.current_token, Token::Limit) {
1049            self.advance();
1050            match &self.current_token {
1051                Token::NumberLiteral(num) => {
1052                    let limit_val = num
1053                        .parse::<usize>()
1054                        .map_err(|_| format!("Invalid LIMIT value: {num}"))?;
1055                    self.advance();
1056                    Some(limit_val)
1057                }
1058                _ => return Err("Expected number after LIMIT".to_string()),
1059            }
1060        } else {
1061            None
1062        };
1063
1064        // Parse OFFSET clause
1065        let offset = if matches!(self.current_token, Token::Offset) {
1066            self.advance();
1067            match &self.current_token {
1068                Token::NumberLiteral(num) => {
1069                    let offset_val = num
1070                        .parse::<usize>()
1071                        .map_err(|_| format!("Invalid OFFSET value: {num}"))?;
1072                    self.advance();
1073                    Some(offset_val)
1074                }
1075                _ => return Err("Expected number after OFFSET".to_string()),
1076            }
1077        } else {
1078            None
1079        };
1080
1081        // Parse INTO clause (alternative position - SQL Server also supports INTO after all clauses)
1082        // This handles: SELECT * FROM table WHERE x > 5 INTO #temp
1083        // If INTO was already parsed after SELECT, this will be None (can't have two INTOs)
1084        let into_table = if into_table.is_none() && matches!(self.current_token, Token::Into) {
1085            self.advance();
1086            Some(self.parse_into_clause()?)
1087        } else {
1088            into_table // Keep the one from after SELECT if it exists
1089        };
1090
1091        // Parse UNION/INTERSECT/EXCEPT operations
1092        let set_operations = self.parse_set_operations()?;
1093
1094        // Collect trailing comment ONLY in PreserveComments mode
1095        let trailing_comment = if self.mode == ParserMode::PreserveComments {
1096            self.collect_trailing_comment()
1097        } else {
1098            None
1099        };
1100
1101        // Build unified from_source from parsed components
1102        // PIVOT takes precedence if it exists (it already wraps the base source)
1103        let from_source = if let Some(pivot) = pivot_source {
1104            Some(pivot)
1105        } else if let Some(ref table_name) = from_table {
1106            Some(TableSource::Table(table_name.clone()))
1107        } else if let Some(ref subquery) = from_subquery {
1108            Some(TableSource::DerivedTable {
1109                query: subquery.clone(),
1110                alias: from_alias.clone().unwrap_or_default(),
1111            })
1112        } else if let Some(ref _func) = from_function {
1113            // Table functions don't use TableSource yet, keep as None for now
1114            // TODO: Add TableFunction variant to TableSource
1115            None
1116        } else {
1117            None
1118        };
1119
1120        Ok(SelectStatement {
1121            distinct,
1122            columns,
1123            select_items,
1124            from_source,
1125            #[allow(deprecated)]
1126            from_table,
1127            #[allow(deprecated)]
1128            from_subquery,
1129            #[allow(deprecated)]
1130            from_function,
1131            #[allow(deprecated)]
1132            from_alias,
1133            joins,
1134            where_clause,
1135            order_by,
1136            group_by,
1137            having,
1138            qualify,
1139            limit,
1140            offset,
1141            ctes: Vec::new(), // Will be populated by WITH clause parser
1142            into_table,
1143            set_operations,
1144            leading_comments,
1145            trailing_comment,
1146        })
1147    }
1148
1149    /// Parse UNION/INTERSECT/EXCEPT operations
1150    /// Returns a vector of (operation, select_statement) pairs
1151    fn parse_set_operations(
1152        &mut self,
1153    ) -> Result<Vec<(SetOperation, Box<SelectStatement>)>, String> {
1154        let mut operations = Vec::new();
1155
1156        while matches!(
1157            self.current_token,
1158            Token::Union | Token::Intersect | Token::Except
1159        ) {
1160            // Determine the operation type
1161            let operation = match &self.current_token {
1162                Token::Union => {
1163                    self.advance();
1164                    // Check for ALL keyword
1165                    if let Token::Identifier(id) = &self.current_token {
1166                        if id.to_uppercase() == "ALL" {
1167                            self.advance();
1168                            SetOperation::UnionAll
1169                        } else {
1170                            SetOperation::Union
1171                        }
1172                    } else {
1173                        SetOperation::Union
1174                    }
1175                }
1176                Token::Intersect => {
1177                    self.advance();
1178                    SetOperation::Intersect
1179                }
1180                Token::Except => {
1181                    self.advance();
1182                    SetOperation::Except
1183                }
1184                _ => unreachable!(),
1185            };
1186
1187            // Parse the next SELECT statement
1188            let next_select = self.parse_select_statement_inner()?;
1189
1190            operations.push((operation, Box::new(next_select)));
1191        }
1192
1193        Ok(operations)
1194    }
1195
1196    /// Parse SELECT items that support computed expressions with aliases
1197    fn parse_select_items(&mut self) -> Result<Vec<SelectItem>, String> {
1198        let mut items = Vec::new();
1199
1200        loop {
1201            // Check for qualified star (table.*) or unqualified star (*)
1202            // First check if we have identifier.* pattern
1203            if let Token::Identifier(name) = &self.current_token.clone() {
1204                // Peek ahead to check for .* pattern
1205                let saved_pos = self.lexer.clone();
1206                let saved_token = self.current_token.clone();
1207                let table_name = name.clone();
1208
1209                self.advance();
1210
1211                if matches!(self.current_token, Token::Dot) {
1212                    self.advance();
1213                    if matches!(self.current_token, Token::Star) {
1214                        // This is table.* pattern
1215                        items.push(SelectItem::Star {
1216                            table_prefix: Some(table_name),
1217                            leading_comments: vec![],
1218                            trailing_comment: None,
1219                        });
1220                        self.advance();
1221
1222                        // Continue to next item or end
1223                        if matches!(self.current_token, Token::Comma) {
1224                            self.advance();
1225                            continue;
1226                        } else {
1227                            break;
1228                        }
1229                    }
1230                }
1231
1232                // Not table.*, restore position and continue with normal parsing
1233                self.lexer = saved_pos;
1234                self.current_token = saved_token;
1235            }
1236
1237            // Check for unqualified *
1238            if matches!(self.current_token, Token::Star) {
1239                self.advance(); // consume *
1240
1241                // Check for EXCLUDE clause
1242                if matches!(self.current_token, Token::Exclude) {
1243                    self.advance(); // consume EXCLUDE
1244
1245                    // Expect opening paren
1246                    if !matches!(self.current_token, Token::LeftParen) {
1247                        return Err("Expected '(' after EXCLUDE".to_string());
1248                    }
1249                    self.advance(); // consume (
1250
1251                    // Parse column list
1252                    let mut excluded_columns = Vec::new();
1253                    loop {
1254                        match &self.current_token {
1255                            Token::Identifier(col_name) | Token::QuotedIdentifier(col_name) => {
1256                                excluded_columns.push(col_name.clone());
1257                                self.advance();
1258                            }
1259                            _ => return Err("Expected column name in EXCLUDE list".to_string()),
1260                        }
1261
1262                        // Check for comma or closing paren
1263                        if matches!(self.current_token, Token::Comma) {
1264                            self.advance();
1265                        } else if matches!(self.current_token, Token::RightParen) {
1266                            self.advance(); // consume )
1267                            break;
1268                        } else {
1269                            return Err("Expected ',' or ')' in EXCLUDE list".to_string());
1270                        }
1271                    }
1272
1273                    if excluded_columns.is_empty() {
1274                        return Err("EXCLUDE list cannot be empty".to_string());
1275                    }
1276
1277                    items.push(SelectItem::StarExclude {
1278                        table_prefix: None,
1279                        excluded_columns,
1280                        leading_comments: vec![],
1281                        trailing_comment: None,
1282                    });
1283                } else {
1284                    // Regular * without EXCLUDE
1285                    items.push(SelectItem::Star {
1286                        table_prefix: None,
1287                        leading_comments: vec![],
1288                        trailing_comment: None,
1289                    });
1290                }
1291            } else {
1292                // Parse expression or column
1293                let expr = self.parse_comparison()?; // Use comparison to support IS NULL and other comparisons
1294
1295                // Check for AS alias
1296                let alias = if matches!(self.current_token, Token::As) {
1297                    self.advance();
1298                    match &self.current_token {
1299                        Token::Identifier(alias_name) => {
1300                            let alias = alias_name.clone();
1301                            self.advance();
1302                            alias
1303                        }
1304                        Token::QuotedIdentifier(alias_name) => {
1305                            let alias = alias_name.clone();
1306                            self.advance();
1307                            alias
1308                        }
1309                        token => {
1310                            if let Some(keyword) = token.as_keyword_str() {
1311                                return Err(format!(
1312                                    "Reserved keyword '{}' cannot be used as column alias. Use a different name or quote it with double quotes: \"{}\"",
1313                                    keyword,
1314                                    keyword.to_lowercase()
1315                                ));
1316                            } else {
1317                                return Err("Expected alias name after AS".to_string());
1318                            }
1319                        }
1320                    }
1321                } else {
1322                    // Generate default alias based on expression
1323                    match &expr {
1324                        SqlExpression::Column(col_ref) => col_ref.name.clone(),
1325                        _ => format!("expr_{}", items.len() + 1), // Default alias for computed expressions
1326                    }
1327                };
1328
1329                // Create SelectItem based on expression type
1330                let item = match expr {
1331                    SqlExpression::Column(col_ref) if alias == col_ref.name => {
1332                        // Simple column reference without alias
1333                        SelectItem::Column {
1334                            column: col_ref,
1335                            leading_comments: vec![],
1336                            trailing_comment: None,
1337                        }
1338                    }
1339                    _ => {
1340                        // Computed expression or column with different alias
1341                        SelectItem::Expression {
1342                            expr,
1343                            alias,
1344                            leading_comments: vec![],
1345                            trailing_comment: None,
1346                        }
1347                    }
1348                };
1349
1350                items.push(item);
1351            }
1352
1353            // Check for comma to continue
1354            if matches!(self.current_token, Token::Comma) {
1355                self.advance();
1356            } else {
1357                break;
1358            }
1359        }
1360
1361        Ok(items)
1362    }
1363
1364    fn parse_identifier_list(&mut self) -> Result<Vec<String>, String> {
1365        let mut identifiers = Vec::new();
1366
1367        loop {
1368            match &self.current_token {
1369                Token::Identifier(id) => {
1370                    // Check if this is a reserved keyword that should stop identifier parsing
1371                    if Self::is_identifier_reserved(id) {
1372                        // Stop parsing identifiers if we hit a reserved keyword
1373                        break;
1374                    }
1375                    identifiers.push(id.clone());
1376                    self.advance();
1377                }
1378                Token::QuotedIdentifier(id) => {
1379                    // Handle quoted identifiers like "Customer Id"
1380                    identifiers.push(id.clone());
1381                    self.advance();
1382                }
1383                _ => {
1384                    // Stop parsing if we hit any other token type
1385                    break;
1386                }
1387            }
1388
1389            if matches!(self.current_token, Token::Comma) {
1390                self.advance();
1391            } else {
1392                break;
1393            }
1394        }
1395
1396        if identifiers.is_empty() {
1397            return Err("Expected at least one identifier".to_string());
1398        }
1399
1400        Ok(identifiers)
1401    }
1402
1403    fn parse_window_spec(&mut self) -> Result<WindowSpec, String> {
1404        let mut partition_by = Vec::new();
1405        let mut order_by = Vec::new();
1406
1407        // Check for PARTITION BY
1408        if matches!(self.current_token, Token::Partition) {
1409            self.advance(); // consume PARTITION
1410            if !matches!(self.current_token, Token::By) {
1411                return Err("Expected BY after PARTITION".to_string());
1412            }
1413            self.advance(); // consume BY
1414
1415            // Parse partition columns
1416            partition_by = self.parse_identifier_list()?;
1417        }
1418
1419        // Check for ORDER BY
1420        if matches!(self.current_token, Token::OrderBy) {
1421            self.advance(); // consume ORDER BY (as single token)
1422            order_by = self.parse_order_by_list()?;
1423        } else if let Token::Identifier(s) = &self.current_token {
1424            if Self::is_identifier_reserved(s) && s.to_uppercase() == "ORDER" {
1425                // Handle ORDER BY as two tokens
1426                self.advance(); // consume ORDER
1427                if !matches!(self.current_token, Token::By) {
1428                    return Err("Expected BY after ORDER".to_string());
1429                }
1430                self.advance(); // consume BY
1431                order_by = self.parse_order_by_list()?;
1432            }
1433        }
1434
1435        // Parse optional window frame (ROWS/RANGE BETWEEN ... AND ...)
1436        let mut frame = self.parse_window_frame()?;
1437
1438        // SQL Standard: If ORDER BY is present but no frame is specified,
1439        // default to RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
1440        // This matches behavior of PostgreSQL, MySQL, SQL Server, etc.
1441        if !order_by.is_empty() && frame.is_none() {
1442            frame = Some(WindowFrame {
1443                unit: FrameUnit::Range,
1444                start: FrameBound::UnboundedPreceding,
1445                end: Some(FrameBound::CurrentRow),
1446            });
1447        }
1448
1449        Ok(WindowSpec {
1450            partition_by,
1451            order_by,
1452            frame,
1453        })
1454    }
1455
1456    fn parse_order_by_list(&mut self) -> Result<Vec<OrderByItem>, String> {
1457        let mut order_items = Vec::new();
1458
1459        loop {
1460            // Parse ANY expression (not just column names)
1461            // This supports:
1462            // - Simple columns: region
1463            // - Qualified columns: table.column
1464            // - Aggregate functions: SUM(sales_amount)
1465            // - Arithmetic: sales_amount * 1.1
1466            // - CASE expressions: CASE WHEN ... END
1467            let expr = self.parse_expression()?;
1468
1469            // Check for ASC/DESC
1470            let direction = match &self.current_token {
1471                Token::Asc => {
1472                    self.advance();
1473                    SortDirection::Asc
1474                }
1475                Token::Desc => {
1476                    self.advance();
1477                    SortDirection::Desc
1478                }
1479                _ => SortDirection::Asc, // Default to ASC if not specified
1480            };
1481
1482            order_items.push(OrderByItem { expr, direction });
1483
1484            if matches!(self.current_token, Token::Comma) {
1485                self.advance();
1486            } else {
1487                break;
1488            }
1489        }
1490
1491        Ok(order_items)
1492    }
1493
1494    /// Parse INTO clause for temporary tables
1495    /// Syntax: INTO #table_name
1496    fn parse_into_clause(&mut self) -> Result<IntoTable, String> {
1497        // Expect an identifier starting with #
1498        let name = match &self.current_token {
1499            Token::Identifier(id) if id.starts_with('#') => {
1500                let table_name = id.clone();
1501                self.advance();
1502                table_name
1503            }
1504            Token::Identifier(id) => {
1505                return Err(format!(
1506                    "Temporary table name must start with #, got: {}",
1507                    id
1508                ));
1509            }
1510            _ => {
1511                return Err(
1512                    "Expected temporary table name (starting with #) after INTO".to_string()
1513                );
1514            }
1515        };
1516
1517        Ok(IntoTable { name })
1518    }
1519
1520    fn parse_window_frame(&mut self) -> Result<Option<WindowFrame>, String> {
1521        // Check for ROWS or RANGE keyword
1522        let unit = match &self.current_token {
1523            Token::Rows => {
1524                self.advance();
1525                FrameUnit::Rows
1526            }
1527            Token::Identifier(id) if id.to_uppercase() == "RANGE" => {
1528                // RANGE as window frame unit
1529                self.advance();
1530                FrameUnit::Range
1531            }
1532            _ => return Ok(None), // No window frame specified
1533        };
1534
1535        // Check for BETWEEN or just a single bound
1536        let (start, end) = if let Token::Between = &self.current_token {
1537            self.advance(); // consume BETWEEN
1538                            // Parse start bound
1539            let start = self.parse_frame_bound()?;
1540
1541            // Expect AND
1542            if !matches!(&self.current_token, Token::And) {
1543                return Err("Expected AND after window frame start bound".to_string());
1544            }
1545            self.advance();
1546
1547            // Parse end bound
1548            let end = self.parse_frame_bound()?;
1549            (start, Some(end))
1550        } else {
1551            // Single bound (e.g., "ROWS 5 PRECEDING")
1552            let bound = self.parse_frame_bound()?;
1553            (bound, None)
1554        };
1555
1556        Ok(Some(WindowFrame { unit, start, end }))
1557    }
1558
1559    fn parse_frame_bound(&mut self) -> Result<FrameBound, String> {
1560        match &self.current_token {
1561            Token::Unbounded => {
1562                self.advance();
1563                match &self.current_token {
1564                    Token::Preceding => {
1565                        self.advance();
1566                        Ok(FrameBound::UnboundedPreceding)
1567                    }
1568                    Token::Following => {
1569                        self.advance();
1570                        Ok(FrameBound::UnboundedFollowing)
1571                    }
1572                    _ => Err("Expected PRECEDING or FOLLOWING after UNBOUNDED".to_string()),
1573                }
1574            }
1575            Token::Current => {
1576                self.advance();
1577                if matches!(&self.current_token, Token::Row) {
1578                    self.advance();
1579                    return Ok(FrameBound::CurrentRow);
1580                }
1581                Err("Expected ROW after CURRENT".to_string())
1582            }
1583            Token::NumberLiteral(num) => {
1584                let n: i64 = num
1585                    .parse()
1586                    .map_err(|_| "Invalid number in window frame".to_string())?;
1587                self.advance();
1588                match &self.current_token {
1589                    Token::Preceding => {
1590                        self.advance();
1591                        Ok(FrameBound::Preceding(n))
1592                    }
1593                    Token::Following => {
1594                        self.advance();
1595                        Ok(FrameBound::Following(n))
1596                    }
1597                    _ => Err("Expected PRECEDING or FOLLOWING after number".to_string()),
1598                }
1599            }
1600            _ => Err("Invalid window frame bound".to_string()),
1601        }
1602    }
1603
1604    fn parse_where_clause(&mut self) -> Result<WhereClause, String> {
1605        // Parse the entire WHERE clause as a single expression tree
1606        // The logical operators (AND/OR) are now handled within parse_expression
1607        let expr = self.parse_expression()?;
1608
1609        // Check for unexpected closing parenthesis
1610        if matches!(self.current_token, Token::RightParen) && self.paren_depth <= 0 {
1611            return Err(
1612                "Unexpected closing parenthesis - no matching opening parenthesis".to_string(),
1613            );
1614        }
1615
1616        // Create a single condition with the entire expression
1617        let conditions = vec![Condition {
1618            expr,
1619            connector: None,
1620        }];
1621
1622        Ok(WhereClause { conditions })
1623    }
1624
1625    fn parse_expression(&mut self) -> Result<SqlExpression, String> {
1626        self.trace_enter("parse_expression");
1627        // Start with logical OR as the lowest precedence operator
1628        // The hierarchy is: OR -> AND -> comparison -> additive -> multiplicative -> primary
1629        let mut left = self.parse_logical_or()?;
1630
1631        // Handle IN operator (not preceded by NOT)
1632        // This uses the modular comparison module
1633        left = parse_in_operator(self, left)?;
1634
1635        let result = Ok(left);
1636        self.trace_exit("parse_expression", &result);
1637        result
1638    }
1639
1640    fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
1641        // Use the new modular comparison expression parser
1642        parse_comparison_expr(self)
1643    }
1644
1645    fn parse_additive(&mut self) -> Result<SqlExpression, String> {
1646        // Use the new modular arithmetic expression parser
1647        parse_additive_expr(self)
1648    }
1649
1650    fn parse_multiplicative(&mut self) -> Result<SqlExpression, String> {
1651        // Use the new modular arithmetic expression parser
1652        parse_multiplicative_expr(self)
1653    }
1654
1655    fn parse_logical_or(&mut self) -> Result<SqlExpression, String> {
1656        // Use the new modular logical expression parser
1657        parse_logical_or_expr(self)
1658    }
1659
1660    fn parse_logical_and(&mut self) -> Result<SqlExpression, String> {
1661        // Use the new modular logical expression parser
1662        parse_logical_and_expr(self)
1663    }
1664
1665    fn parse_case_expression(&mut self) -> Result<SqlExpression, String> {
1666        // Use the new modular CASE expression parser
1667        parse_case_expr(self)
1668    }
1669
1670    fn parse_primary(&mut self) -> Result<SqlExpression, String> {
1671        // Use the new modular primary expression parser
1672        // Clone the necessary data to avoid borrowing issues
1673        let columns = self.columns.clone();
1674        let in_method_args = self.in_method_args;
1675        let ctx = PrimaryExpressionContext {
1676            columns: &columns,
1677            in_method_args,
1678        };
1679        parse_primary_expr(self, &ctx)
1680    }
1681
1682    // Keep the old implementation temporarily for reference (will be removed)
1683    fn parse_method_args(&mut self) -> Result<Vec<SqlExpression>, String> {
1684        // Set flag to indicate we're parsing method arguments
1685        self.in_method_args = true;
1686
1687        let args = self.parse_argument_list()?;
1688
1689        // Clear the flag
1690        self.in_method_args = false;
1691
1692        Ok(args)
1693    }
1694
1695    fn parse_function_args(&mut self) -> Result<(Vec<SqlExpression>, bool), String> {
1696        let mut args = Vec::new();
1697        let mut has_distinct = false;
1698
1699        if !matches!(self.current_token, Token::RightParen) {
1700            // Check if first argument starts with DISTINCT
1701            if matches!(self.current_token, Token::Distinct) {
1702                self.advance(); // consume DISTINCT
1703                has_distinct = true;
1704            }
1705
1706            // Parse the expression (either after DISTINCT or directly)
1707            args.push(self.parse_additive()?);
1708
1709            // Parse any remaining arguments (DISTINCT only applies to first arg for aggregates)
1710            while matches!(self.current_token, Token::Comma) {
1711                self.advance();
1712                args.push(self.parse_additive()?);
1713            }
1714        }
1715
1716        Ok((args, has_distinct))
1717    }
1718
1719    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
1720        let mut expressions = Vec::new();
1721
1722        loop {
1723            expressions.push(self.parse_expression()?);
1724
1725            if matches!(self.current_token, Token::Comma) {
1726                self.advance();
1727            } else {
1728                break;
1729            }
1730        }
1731
1732        Ok(expressions)
1733    }
1734
1735    #[must_use]
1736    pub fn get_position(&self) -> usize {
1737        self.lexer.get_position()
1738    }
1739
1740    // Check if current token is a JOIN-related token
1741    fn is_join_token(&self) -> bool {
1742        matches!(
1743            self.current_token,
1744            Token::Join | Token::Inner | Token::Left | Token::Right | Token::Full | Token::Cross
1745        )
1746    }
1747
1748    // Parse a JOIN clause
1749    fn parse_join_clause(&mut self) -> Result<JoinClause, String> {
1750        // Determine join type
1751        let join_type = match &self.current_token {
1752            Token::Join => {
1753                self.advance();
1754                JoinType::Inner // Default JOIN is INNER JOIN
1755            }
1756            Token::Inner => {
1757                self.advance();
1758                if !matches!(self.current_token, Token::Join) {
1759                    return Err("Expected JOIN after INNER".to_string());
1760                }
1761                self.advance();
1762                JoinType::Inner
1763            }
1764            Token::Left => {
1765                self.advance();
1766                // Handle optional OUTER keyword
1767                if matches!(self.current_token, Token::Outer) {
1768                    self.advance();
1769                }
1770                if !matches!(self.current_token, Token::Join) {
1771                    return Err("Expected JOIN after LEFT".to_string());
1772                }
1773                self.advance();
1774                JoinType::Left
1775            }
1776            Token::Right => {
1777                self.advance();
1778                // Handle optional OUTER keyword
1779                if matches!(self.current_token, Token::Outer) {
1780                    self.advance();
1781                }
1782                if !matches!(self.current_token, Token::Join) {
1783                    return Err("Expected JOIN after RIGHT".to_string());
1784                }
1785                self.advance();
1786                JoinType::Right
1787            }
1788            Token::Full => {
1789                self.advance();
1790                // Handle optional OUTER keyword
1791                if matches!(self.current_token, Token::Outer) {
1792                    self.advance();
1793                }
1794                if !matches!(self.current_token, Token::Join) {
1795                    return Err("Expected JOIN after FULL".to_string());
1796                }
1797                self.advance();
1798                JoinType::Full
1799            }
1800            Token::Cross => {
1801                self.advance();
1802                if !matches!(self.current_token, Token::Join) {
1803                    return Err("Expected JOIN after CROSS".to_string());
1804                }
1805                self.advance();
1806                JoinType::Cross
1807            }
1808            _ => return Err("Expected JOIN keyword".to_string()),
1809        };
1810
1811        // Parse the table being joined
1812        let (table, alias) = self.parse_join_table_source()?;
1813
1814        // Parse ON condition (required for all joins except CROSS JOIN)
1815        let condition = if join_type == JoinType::Cross {
1816            // CROSS JOIN doesn't have ON condition - create empty condition
1817            JoinCondition { conditions: vec![] }
1818        } else {
1819            if !matches!(self.current_token, Token::On) {
1820                return Err("Expected ON keyword after JOIN table".to_string());
1821            }
1822            self.advance();
1823            self.parse_join_condition()?
1824        };
1825
1826        Ok(JoinClause {
1827            join_type,
1828            table,
1829            alias,
1830            condition,
1831        })
1832    }
1833
1834    fn parse_join_table_source(&mut self) -> Result<(TableSource, Option<String>), String> {
1835        let table = match &self.current_token {
1836            Token::Identifier(name) => {
1837                let table_name = name.clone();
1838                self.advance();
1839                TableSource::Table(table_name)
1840            }
1841            Token::LeftParen => {
1842                // Subquery as table source
1843                self.advance();
1844                let subquery = self.parse_select_statement_inner()?;
1845                if !matches!(self.current_token, Token::RightParen) {
1846                    return Err("Expected ')' after subquery".to_string());
1847                }
1848                self.advance();
1849
1850                // Subqueries must have an alias
1851                let alias = match &self.current_token {
1852                    Token::Identifier(alias_name) => {
1853                        let alias = alias_name.clone();
1854                        self.advance();
1855                        alias
1856                    }
1857                    Token::As => {
1858                        self.advance();
1859                        match &self.current_token {
1860                            Token::Identifier(alias_name) => {
1861                                let alias = alias_name.clone();
1862                                self.advance();
1863                                alias
1864                            }
1865                            _ => return Err("Expected alias after AS keyword".to_string()),
1866                        }
1867                    }
1868                    _ => return Err("Subqueries must have an alias".to_string()),
1869                };
1870
1871                return Ok((
1872                    TableSource::DerivedTable {
1873                        query: Box::new(subquery),
1874                        alias: alias.clone(),
1875                    },
1876                    Some(alias),
1877                ));
1878            }
1879            _ => return Err("Expected table name or subquery in JOIN clause".to_string()),
1880        };
1881
1882        // Check for optional alias
1883        let alias = match &self.current_token {
1884            Token::Identifier(alias_name) => {
1885                let alias = alias_name.clone();
1886                self.advance();
1887                Some(alias)
1888            }
1889            Token::As => {
1890                self.advance();
1891                match &self.current_token {
1892                    Token::Identifier(alias_name) => {
1893                        let alias = alias_name.clone();
1894                        self.advance();
1895                        Some(alias)
1896                    }
1897                    _ => return Err("Expected alias after AS keyword".to_string()),
1898                }
1899            }
1900            _ => None,
1901        };
1902
1903        Ok((table, alias))
1904    }
1905
1906    fn parse_join_condition(&mut self) -> Result<JoinCondition, String> {
1907        let mut conditions = Vec::new();
1908
1909        // Parse first condition
1910        conditions.push(self.parse_single_join_condition()?);
1911
1912        // Parse additional conditions connected by AND
1913        while matches!(self.current_token, Token::And) {
1914            self.advance(); // consume AND
1915            conditions.push(self.parse_single_join_condition()?);
1916        }
1917
1918        Ok(JoinCondition { conditions })
1919    }
1920
1921    fn parse_single_join_condition(&mut self) -> Result<SingleJoinCondition, String> {
1922        // Parse left side as additive expression (stops before comparison operators)
1923        // This allows the comparison operator to be explicitly parsed by this function
1924        let left_expr = self.parse_additive()?;
1925
1926        // Parse operator
1927        let operator = match &self.current_token {
1928            Token::Equal => JoinOperator::Equal,
1929            Token::NotEqual => JoinOperator::NotEqual,
1930            Token::LessThan => JoinOperator::LessThan,
1931            Token::LessThanOrEqual => JoinOperator::LessThanOrEqual,
1932            Token::GreaterThan => JoinOperator::GreaterThan,
1933            Token::GreaterThanOrEqual => JoinOperator::GreaterThanOrEqual,
1934            _ => return Err("Expected comparison operator in JOIN condition".to_string()),
1935        };
1936        self.advance();
1937
1938        // Parse right side as additive expression (stops before comparison operators)
1939        let right_expr = self.parse_additive()?;
1940
1941        Ok(SingleJoinCondition {
1942            left_expr,
1943            operator,
1944            right_expr,
1945        })
1946    }
1947
1948    fn parse_column_reference(&mut self) -> Result<String, String> {
1949        match &self.current_token {
1950            Token::Identifier(name) => {
1951                let mut column_ref = name.clone();
1952                self.advance();
1953
1954                // Check for table.column notation
1955                if matches!(self.current_token, Token::Dot) {
1956                    self.advance();
1957                    match &self.current_token {
1958                        Token::Identifier(col_name) => {
1959                            column_ref.push('.');
1960                            column_ref.push_str(col_name);
1961                            self.advance();
1962                        }
1963                        _ => return Err("Expected column name after '.'".to_string()),
1964                    }
1965                }
1966
1967                Ok(column_ref)
1968            }
1969            _ => Err("Expected column reference".to_string()),
1970        }
1971    }
1972
1973    // ===== PIVOT Parsing =====
1974
1975    /// Parse a PIVOT clause after a table source
1976    /// Syntax: PIVOT (aggregate_function FOR pivot_column IN (value1, value2, ...))
1977    fn parse_pivot_clause(&mut self, source: TableSource) -> Result<TableSource, String> {
1978        // Consume PIVOT keyword
1979        self.consume(Token::Pivot)?;
1980
1981        // Consume opening parenthesis
1982        self.consume(Token::LeftParen)?;
1983
1984        // Parse aggregate function (e.g., MAX(AmountEaten))
1985        let aggregate = self.parse_pivot_aggregate()?;
1986
1987        // Parse FOR keyword
1988        self.consume(Token::For)?;
1989
1990        // Parse pivot column name
1991        let pivot_column = match &self.current_token {
1992            Token::Identifier(col) => {
1993                let column = col.clone();
1994                self.advance();
1995                column
1996            }
1997            Token::QuotedIdentifier(col) => {
1998                let column = col.clone();
1999                self.advance();
2000                column
2001            }
2002            _ => return Err("Expected column name after FOR in PIVOT".to_string()),
2003        };
2004
2005        // Parse IN keyword
2006        if !matches!(self.current_token, Token::In) {
2007            return Err("Expected IN keyword in PIVOT clause".to_string());
2008        }
2009        self.advance();
2010
2011        // Parse pivot values
2012        let pivot_values = self.parse_pivot_in_clause()?;
2013
2014        // Consume closing parenthesis for PIVOT
2015        self.consume(Token::RightParen)?;
2016
2017        // Check for optional alias
2018        let alias = self.parse_optional_alias()?;
2019
2020        Ok(TableSource::Pivot {
2021            source: Box::new(source),
2022            aggregate,
2023            pivot_column,
2024            pivot_values,
2025            alias,
2026        })
2027    }
2028
2029    /// Parse the aggregate function specification in PIVOT
2030    /// Example: MAX(AmountEaten), SUM(sales), COUNT(*)
2031    fn parse_pivot_aggregate(&mut self) -> Result<PivotAggregate, String> {
2032        // Parse aggregate function name
2033        let function = match &self.current_token {
2034            Token::Identifier(name) => {
2035                let func_name = name.to_uppercase();
2036                // Validate it's an aggregate function
2037                match func_name.as_str() {
2038                    "MAX" | "MIN" | "SUM" | "AVG" | "COUNT" => {
2039                        self.advance();
2040                        func_name
2041                    }
2042                    _ => {
2043                        return Err(format!(
2044                            "Expected aggregate function (MAX, MIN, SUM, AVG, COUNT), got {}",
2045                            func_name
2046                        ))
2047                    }
2048                }
2049            }
2050            _ => return Err("Expected aggregate function in PIVOT".to_string()),
2051        };
2052
2053        // Consume opening parenthesis
2054        self.consume(Token::LeftParen)?;
2055
2056        // Parse column name (or * for COUNT)
2057        let column = match &self.current_token {
2058            Token::Identifier(col) => {
2059                let column = col.clone();
2060                self.advance();
2061                column
2062            }
2063            Token::QuotedIdentifier(col) => {
2064                let column = col.clone();
2065                self.advance();
2066                column
2067            }
2068            Token::Star => {
2069                // COUNT(*) is allowed
2070                if function == "COUNT" {
2071                    self.advance();
2072                    "*".to_string()
2073                } else {
2074                    return Err(format!("Only COUNT can use *, not {}", function));
2075                }
2076            }
2077            _ => return Err("Expected column name in aggregate function".to_string()),
2078        };
2079
2080        // Consume closing parenthesis
2081        self.consume(Token::RightParen)?;
2082
2083        Ok(PivotAggregate { function, column })
2084    }
2085
2086    /// Parse the IN clause values in PIVOT
2087    /// Example: IN ('Sammich', 'Pickle', 'Apple')
2088    /// Returns vector of pivot values
2089    fn parse_pivot_in_clause(&mut self) -> Result<Vec<String>, String> {
2090        // Consume opening parenthesis
2091        self.consume(Token::LeftParen)?;
2092
2093        let mut values = Vec::new();
2094
2095        // Parse first value
2096        match &self.current_token {
2097            Token::StringLiteral(val) => {
2098                values.push(val.clone());
2099                self.advance();
2100            }
2101            Token::Identifier(val) => {
2102                // Allow unquoted identifiers as well
2103                values.push(val.clone());
2104                self.advance();
2105            }
2106            Token::NumberLiteral(val) => {
2107                // Allow numeric values
2108                values.push(val.clone());
2109                self.advance();
2110            }
2111            _ => return Err("Expected value in PIVOT IN clause".to_string()),
2112        }
2113
2114        // Parse additional values separated by commas
2115        while matches!(self.current_token, Token::Comma) {
2116            self.advance(); // consume comma
2117
2118            match &self.current_token {
2119                Token::StringLiteral(val) => {
2120                    values.push(val.clone());
2121                    self.advance();
2122                }
2123                Token::Identifier(val) => {
2124                    values.push(val.clone());
2125                    self.advance();
2126                }
2127                Token::NumberLiteral(val) => {
2128                    values.push(val.clone());
2129                    self.advance();
2130                }
2131                _ => return Err("Expected value after comma in PIVOT IN clause".to_string()),
2132            }
2133        }
2134
2135        // Consume closing parenthesis
2136        self.consume(Token::RightParen)?;
2137
2138        if values.is_empty() {
2139            return Err("PIVOT IN clause must have at least one value".to_string());
2140        }
2141
2142        Ok(values)
2143    }
2144}
2145
2146// Context detection for cursor position
2147#[derive(Debug, Clone)]
2148pub enum CursorContext {
2149    SelectClause,
2150    FromClause,
2151    WhereClause,
2152    OrderByClause,
2153    AfterColumn(String),
2154    AfterLogicalOp(LogicalOp),
2155    AfterComparisonOp(String, String), // column_name, operator
2156    InMethodCall(String, String),      // object, method
2157    InExpression,
2158    Unknown,
2159}
2160
2161/// Safe UTF-8 string slicing that ensures we don't slice in the middle of a character
2162fn safe_slice_to(s: &str, pos: usize) -> &str {
2163    if pos >= s.len() {
2164        return s;
2165    }
2166
2167    // Find the nearest valid character boundary at or before pos
2168    let mut safe_pos = pos;
2169    while safe_pos > 0 && !s.is_char_boundary(safe_pos) {
2170        safe_pos -= 1;
2171    }
2172
2173    &s[..safe_pos]
2174}
2175
2176/// Safe UTF-8 string slicing from a position to the end
2177fn safe_slice_from(s: &str, pos: usize) -> &str {
2178    if pos >= s.len() {
2179        return "";
2180    }
2181
2182    // Find the nearest valid character boundary at or after pos
2183    let mut safe_pos = pos;
2184    while safe_pos < s.len() && !s.is_char_boundary(safe_pos) {
2185        safe_pos += 1;
2186    }
2187
2188    &s[safe_pos..]
2189}
2190
2191#[must_use]
2192pub fn detect_cursor_context(query: &str, cursor_pos: usize) -> (CursorContext, Option<String>) {
2193    let truncated = safe_slice_to(query, cursor_pos);
2194    let mut parser = Parser::new(truncated);
2195
2196    // Try to parse as much as possible
2197    if let Ok(stmt) = parser.parse() {
2198        let (ctx, partial) = analyze_statement(&stmt, truncated, cursor_pos);
2199        #[cfg(test)]
2200        println!("analyze_statement returned: {ctx:?}, {partial:?} for query: '{truncated}'");
2201        (ctx, partial)
2202    } else {
2203        // Partial parse - analyze what we have
2204        let (ctx, partial) = analyze_partial(truncated, cursor_pos);
2205        #[cfg(test)]
2206        println!("analyze_partial returned: {ctx:?}, {partial:?} for query: '{truncated}'");
2207        (ctx, partial)
2208    }
2209}
2210
2211#[must_use]
2212pub fn tokenize_query(query: &str) -> Vec<String> {
2213    let mut lexer = Lexer::new(query);
2214    let tokens = lexer.tokenize_all();
2215    tokens.iter().map(|t| format!("{t:?}")).collect()
2216}
2217
2218#[must_use]
2219/// Helper function to find the start of a quoted string searching backwards
2220fn find_quote_start(bytes: &[u8], mut pos: usize) -> Option<usize> {
2221    // Skip the closing quote and search backwards
2222    if pos > 0 {
2223        pos -= 1;
2224        while pos > 0 {
2225            if bytes[pos] == b'"' {
2226                // Check if it's not an escaped quote
2227                if pos == 0 || bytes[pos - 1] != b'\\' {
2228                    return Some(pos);
2229                }
2230            }
2231            pos -= 1;
2232        }
2233        // Check position 0 separately
2234        if bytes[0] == b'"' {
2235            return Some(0);
2236        }
2237    }
2238    None
2239}
2240
2241/// Helper function to handle method call context after validation
2242fn handle_method_call_context(col_name: &str, after_dot: &str) -> (CursorContext, Option<String>) {
2243    // Check if there's a partial method name after the dot
2244    let partial_method = if after_dot.is_empty() {
2245        None
2246    } else if after_dot.chars().all(|c| c.is_alphanumeric() || c == '_') {
2247        Some(after_dot.to_string())
2248    } else {
2249        None
2250    };
2251
2252    // For AfterColumn context, strip quotes if present for consistency
2253    let col_name_for_context =
2254        if col_name.starts_with('"') && col_name.ends_with('"') && col_name.len() > 2 {
2255            col_name[1..col_name.len() - 1].to_string()
2256        } else {
2257            col_name.to_string()
2258        };
2259
2260    (
2261        CursorContext::AfterColumn(col_name_for_context),
2262        partial_method,
2263    )
2264}
2265
2266/// Helper function to check if we're after a comparison operator
2267fn check_after_comparison_operator(query: &str) -> Option<(CursorContext, Option<String>)> {
2268    for op in &Parser::COMPARISON_OPERATORS {
2269        if let Some(op_pos) = query.rfind(op) {
2270            let before_op = safe_slice_to(query, op_pos);
2271            let after_op_start = op_pos + op.len();
2272            let after_op = if after_op_start < query.len() {
2273                &query[after_op_start..]
2274            } else {
2275                ""
2276            };
2277
2278            // Check if we have a column name before the operator
2279            if let Some(col_name) = before_op.split_whitespace().last() {
2280                if col_name.chars().all(|c| c.is_alphanumeric() || c == '_') {
2281                    // Check if we're at or near the end of the query
2282                    let after_op_trimmed = after_op.trim();
2283                    if after_op_trimmed.is_empty()
2284                        || (after_op_trimmed
2285                            .chars()
2286                            .all(|c| c.is_alphanumeric() || c == '_')
2287                            && !after_op_trimmed.contains('('))
2288                    {
2289                        let partial = if after_op_trimmed.is_empty() {
2290                            None
2291                        } else {
2292                            Some(after_op_trimmed.to_string())
2293                        };
2294                        return Some((
2295                            CursorContext::AfterComparisonOp(
2296                                col_name.to_string(),
2297                                op.trim().to_string(),
2298                            ),
2299                            partial,
2300                        ));
2301                    }
2302                }
2303            }
2304        }
2305    }
2306    None
2307}
2308
2309fn analyze_statement(
2310    stmt: &SelectStatement,
2311    query: &str,
2312    _cursor_pos: usize,
2313) -> (CursorContext, Option<String>) {
2314    // First check for method call context (e.g., "columnName." or "columnName.Con")
2315    let trimmed = query.trim();
2316
2317    // Check if we're after a comparison operator (e.g., "createdDate > ")
2318    if let Some(result) = check_after_comparison_operator(query) {
2319        return result;
2320    }
2321
2322    // First check if we're after AND/OR - this takes precedence
2323    // Helper function to check if string ends with a logical operator
2324    let ends_with_logical_op = |s: &str| -> bool {
2325        let s_upper = s.to_uppercase();
2326        s_upper.ends_with(" AND") || s_upper.ends_with(" OR")
2327    };
2328
2329    if ends_with_logical_op(trimmed) {
2330        // Don't check for method context if we're clearly after a logical operator
2331    } else {
2332        // Look for the last dot in the query
2333        if let Some(dot_pos) = trimmed.rfind('.') {
2334            // Check if we're after a column name and dot
2335            let before_dot = safe_slice_to(trimmed, dot_pos);
2336            let after_dot_start = dot_pos + 1;
2337            let after_dot = if after_dot_start < trimmed.len() {
2338                &trimmed[after_dot_start..]
2339            } else {
2340                ""
2341            };
2342
2343            // Check if the part after dot looks like an incomplete method call
2344            // (not a complete method call like "Contains(...)")
2345            if !after_dot.contains('(') {
2346                // Try to extract the column name - could be quoted or regular
2347                let col_name = if before_dot.ends_with('"') {
2348                    // Handle quoted identifier - search backwards for matching opening quote
2349                    let bytes = before_dot.as_bytes();
2350                    let pos = before_dot.len() - 1; // Position of closing quote
2351
2352                    find_quote_start(bytes, pos).map(|start| safe_slice_from(before_dot, start))
2353                } else {
2354                    // Regular identifier - get the last word, handling parentheses
2355                    // Strip all leading parentheses
2356                    before_dot
2357                        .split_whitespace()
2358                        .last()
2359                        .map(|word| word.trim_start_matches('('))
2360                };
2361
2362                if let Some(col_name) = col_name {
2363                    // For quoted identifiers, keep the quotes, for regular identifiers check validity
2364                    let is_valid = Parser::is_valid_identifier(col_name);
2365
2366                    if is_valid {
2367                        return handle_method_call_context(col_name, after_dot);
2368                    }
2369                }
2370            }
2371        }
2372    }
2373
2374    // Check if we're in WHERE clause
2375    if let Some(where_clause) = &stmt.where_clause {
2376        // Check if query ends with AND/OR (with or without trailing space/partial)
2377        let trimmed_upper = trimmed.to_uppercase();
2378        if trimmed_upper.ends_with(" AND") || trimmed_upper.ends_with(" OR") {
2379            let op = if trimmed_upper.ends_with(" AND") {
2380                LogicalOp::And
2381            } else {
2382                LogicalOp::Or
2383            };
2384            return (CursorContext::AfterLogicalOp(op), None);
2385        }
2386
2387        // Check if we have AND/OR followed by a partial word
2388        let query_upper = query.to_uppercase();
2389        if let Some(and_pos) = query_upper.rfind(" AND ") {
2390            let after_and = safe_slice_from(query, and_pos + 5);
2391            let partial = extract_partial_at_end(after_and);
2392            if partial.is_some() {
2393                return (CursorContext::AfterLogicalOp(LogicalOp::And), partial);
2394            }
2395        }
2396
2397        if let Some(or_pos) = query_upper.rfind(" OR ") {
2398            let after_or = safe_slice_from(query, or_pos + 4);
2399            let partial = extract_partial_at_end(after_or);
2400            if partial.is_some() {
2401                return (CursorContext::AfterLogicalOp(LogicalOp::Or), partial);
2402            }
2403        }
2404
2405        if let Some(last_condition) = where_clause.conditions.last() {
2406            if let Some(connector) = &last_condition.connector {
2407                // We're after AND/OR
2408                return (
2409                    CursorContext::AfterLogicalOp(connector.clone()),
2410                    extract_partial_at_end(query),
2411                );
2412            }
2413        }
2414        // We're in WHERE clause but not after AND/OR
2415        return (CursorContext::WhereClause, extract_partial_at_end(query));
2416    }
2417
2418    // Check if we're after ORDER BY
2419    let query_upper = query.to_uppercase();
2420    if query_upper.ends_with(" ORDER BY") {
2421        return (CursorContext::OrderByClause, None);
2422    }
2423
2424    // Check other contexts based on what's in the statement
2425    if stmt.order_by.is_some() {
2426        return (CursorContext::OrderByClause, extract_partial_at_end(query));
2427    }
2428
2429    if stmt.from_table.is_some() && stmt.where_clause.is_none() && stmt.order_by.is_none() {
2430        return (CursorContext::FromClause, extract_partial_at_end(query));
2431    }
2432
2433    if !stmt.columns.is_empty() && stmt.from_table.is_none() {
2434        return (CursorContext::SelectClause, extract_partial_at_end(query));
2435    }
2436
2437    (CursorContext::Unknown, None)
2438}
2439
2440/// Helper function to find the last occurrence of a token type in the token stream
2441fn find_last_token(tokens: &[(usize, usize, Token)], target: &Token) -> Option<usize> {
2442    tokens
2443        .iter()
2444        .rposition(|(_, _, t)| t == target)
2445        .map(|idx| tokens[idx].0)
2446}
2447
2448/// Helper function to find the last occurrence of any matching token
2449fn find_last_matching_token<F>(
2450    tokens: &[(usize, usize, Token)],
2451    predicate: F,
2452) -> Option<(usize, &Token)>
2453where
2454    F: Fn(&Token) -> bool,
2455{
2456    tokens
2457        .iter()
2458        .rposition(|(_, _, t)| predicate(t))
2459        .map(|idx| (tokens[idx].0, &tokens[idx].2))
2460}
2461
2462/// Helper function to check if we're in a specific clause based on tokens
2463fn is_in_clause(
2464    tokens: &[(usize, usize, Token)],
2465    clause_token: Token,
2466    exclude_tokens: &[Token],
2467) -> bool {
2468    // Find the last occurrence of the clause token
2469    if let Some(clause_pos) = find_last_token(tokens, &clause_token) {
2470        // Check if any exclude tokens appear after it
2471        for (pos, _, token) in tokens.iter() {
2472            if *pos > clause_pos && exclude_tokens.contains(token) {
2473                return false;
2474            }
2475        }
2476        return true;
2477    }
2478    false
2479}
2480
2481fn analyze_partial(query: &str, cursor_pos: usize) -> (CursorContext, Option<String>) {
2482    // Tokenize the query up to cursor position
2483    let mut lexer = Lexer::new(query);
2484    let tokens = lexer.tokenize_all_with_positions();
2485
2486    let trimmed = query.trim();
2487
2488    #[cfg(test)]
2489    {
2490        if trimmed.contains("\"Last Name\"") {
2491            eprintln!("DEBUG analyze_partial: query='{query}', trimmed='{trimmed}'");
2492        }
2493    }
2494
2495    // Check if we're after a comparison operator (e.g., "createdDate > ")
2496    if let Some(result) = check_after_comparison_operator(query) {
2497        return result;
2498    }
2499
2500    // Look for the last dot in the query (method call context) - check this FIRST
2501    // before AND/OR detection to properly handle cases like "AND (Country."
2502    if let Some(dot_pos) = trimmed.rfind('.') {
2503        #[cfg(test)]
2504        {
2505            if trimmed.contains("\"Last Name\"") {
2506                eprintln!("DEBUG: Found dot at position {dot_pos}");
2507            }
2508        }
2509        // Check if we're after a column name and dot
2510        let before_dot = &trimmed[..dot_pos];
2511        let after_dot = &trimmed[dot_pos + 1..];
2512
2513        // Check if the part after dot looks like an incomplete method call
2514        // (not a complete method call like "Contains(...)")
2515        if !after_dot.contains('(') {
2516            // Try to extract the column name before the dot
2517            // It could be a quoted identifier like "Last Name" or a regular identifier
2518            let col_name = if before_dot.ends_with('"') {
2519                // Handle quoted identifier - search backwards for matching opening quote
2520                let bytes = before_dot.as_bytes();
2521                let pos = before_dot.len() - 1; // Position of closing quote
2522
2523                #[cfg(test)]
2524                {
2525                    if trimmed.contains("\"Last Name\"") {
2526                        eprintln!("DEBUG: before_dot='{before_dot}', looking for opening quote");
2527                    }
2528                }
2529
2530                let found_start = find_quote_start(bytes, pos);
2531
2532                if let Some(start) = found_start {
2533                    // Extract the full quoted identifier including quotes
2534                    let result = safe_slice_from(before_dot, start);
2535                    #[cfg(test)]
2536                    {
2537                        if trimmed.contains("\"Last Name\"") {
2538                            eprintln!("DEBUG: Extracted quoted identifier: '{result}'");
2539                        }
2540                    }
2541                    Some(result)
2542                } else {
2543                    #[cfg(test)]
2544                    {
2545                        if trimmed.contains("\"Last Name\"") {
2546                            eprintln!("DEBUG: No opening quote found!");
2547                        }
2548                    }
2549                    None
2550                }
2551            } else {
2552                // Regular identifier - get the last word, handling parentheses
2553                // Strip all leading parentheses
2554                before_dot
2555                    .split_whitespace()
2556                    .last()
2557                    .map(|word| word.trim_start_matches('('))
2558            };
2559
2560            if let Some(col_name) = col_name {
2561                #[cfg(test)]
2562                {
2563                    if trimmed.contains("\"Last Name\"") {
2564                        eprintln!("DEBUG: col_name = '{col_name}'");
2565                    }
2566                }
2567
2568                // For quoted identifiers, keep the quotes, for regular identifiers check validity
2569                let is_valid = Parser::is_valid_identifier(col_name);
2570
2571                #[cfg(test)]
2572                {
2573                    if trimmed.contains("\"Last Name\"") {
2574                        eprintln!("DEBUG: is_valid = {is_valid}");
2575                    }
2576                }
2577
2578                if is_valid {
2579                    return handle_method_call_context(col_name, after_dot);
2580                }
2581            }
2582        }
2583    }
2584
2585    // Check if we're after AND/OR using tokens - but only after checking for method calls
2586    if let Some((pos, token)) =
2587        find_last_matching_token(&tokens, |t| matches!(t, Token::And | Token::Or))
2588    {
2589        // Check if cursor is after the logical operator
2590        let token_end_pos = if matches!(token, Token::And) {
2591            pos + 3 // "AND" is 3 characters
2592        } else {
2593            pos + 2 // "OR" is 2 characters
2594        };
2595
2596        if cursor_pos > token_end_pos {
2597            // Extract any partial word after the operator
2598            let after_op = safe_slice_from(query, token_end_pos + 1); // +1 for the space
2599            let partial = extract_partial_at_end(after_op);
2600            let op = if matches!(token, Token::And) {
2601                LogicalOp::And
2602            } else {
2603                LogicalOp::Or
2604            };
2605            return (CursorContext::AfterLogicalOp(op), partial);
2606        }
2607    }
2608
2609    // Check if the last token is AND or OR (handles case where it's at the very end)
2610    if let Some((_, _, last_token)) = tokens.last() {
2611        if matches!(last_token, Token::And | Token::Or) {
2612            let op = if matches!(last_token, Token::And) {
2613                LogicalOp::And
2614            } else {
2615                LogicalOp::Or
2616            };
2617            return (CursorContext::AfterLogicalOp(op), None);
2618        }
2619    }
2620
2621    // Check if we're in ORDER BY clause using tokens
2622    if let Some(order_pos) = find_last_token(&tokens, &Token::OrderBy) {
2623        // Check if there's a BY token after ORDER
2624        let has_by = tokens
2625            .iter()
2626            .any(|(pos, _, t)| *pos > order_pos && matches!(t, Token::By));
2627        if has_by
2628            || tokens
2629                .last()
2630                .map_or(false, |(_, _, t)| matches!(t, Token::OrderBy))
2631        {
2632            return (CursorContext::OrderByClause, extract_partial_at_end(query));
2633        }
2634    }
2635
2636    // Check if we're in WHERE clause using tokens
2637    if is_in_clause(&tokens, Token::Where, &[Token::OrderBy, Token::GroupBy]) {
2638        return (CursorContext::WhereClause, extract_partial_at_end(query));
2639    }
2640
2641    // Check if we're in FROM clause using tokens
2642    if is_in_clause(
2643        &tokens,
2644        Token::From,
2645        &[Token::Where, Token::OrderBy, Token::GroupBy],
2646    ) {
2647        return (CursorContext::FromClause, extract_partial_at_end(query));
2648    }
2649
2650    // Check if we're in SELECT clause using tokens
2651    if find_last_token(&tokens, &Token::Select).is_some()
2652        && find_last_token(&tokens, &Token::From).is_none()
2653    {
2654        return (CursorContext::SelectClause, extract_partial_at_end(query));
2655    }
2656
2657    (CursorContext::Unknown, None)
2658}
2659
2660fn extract_partial_at_end(query: &str) -> Option<String> {
2661    let trimmed = query.trim();
2662
2663    // First check if the last word itself starts with a quote (unclosed quoted identifier being typed)
2664    if let Some(last_word) = trimmed.split_whitespace().last() {
2665        if last_word.starts_with('"') && !last_word.ends_with('"') {
2666            // This is an unclosed quoted identifier like "Cust
2667            return Some(last_word.to_string());
2668        }
2669    }
2670
2671    // Regular identifier extraction
2672    let last_word = trimmed.split_whitespace().last()?;
2673
2674    // Check if it's a partial identifier (not a keyword or operator)
2675    // First check if it's alphanumeric (potential identifier)
2676    if last_word.chars().all(|c| c.is_alphanumeric() || c == '_') {
2677        // Use lexer to determine if it's a keyword or identifier
2678        if !is_sql_keyword(last_word) {
2679            Some(last_word.to_string())
2680        } else {
2681            None
2682        }
2683    } else {
2684        None
2685    }
2686}
2687
2688// Implement the ParsePrimary trait for Parser to use the modular expression parsing
2689impl ParsePrimary for Parser {
2690    fn current_token(&self) -> &Token {
2691        &self.current_token
2692    }
2693
2694    fn advance(&mut self) {
2695        self.advance();
2696    }
2697
2698    fn consume(&mut self, expected: Token) -> Result<(), String> {
2699        self.consume(expected)
2700    }
2701
2702    fn parse_case_expression(&mut self) -> Result<SqlExpression, String> {
2703        self.parse_case_expression()
2704    }
2705
2706    fn parse_function_args(&mut self) -> Result<(Vec<SqlExpression>, bool), String> {
2707        self.parse_function_args()
2708    }
2709
2710    fn parse_window_spec(&mut self) -> Result<WindowSpec, String> {
2711        self.parse_window_spec()
2712    }
2713
2714    fn parse_logical_or(&mut self) -> Result<SqlExpression, String> {
2715        self.parse_logical_or()
2716    }
2717
2718    fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
2719        self.parse_comparison()
2720    }
2721
2722    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2723        self.parse_expression_list()
2724    }
2725
2726    fn parse_subquery(&mut self) -> Result<SelectStatement, String> {
2727        // Parse subquery without parenthesis balance validation
2728        if matches!(self.current_token, Token::With) {
2729            self.parse_with_clause_inner()
2730        } else {
2731            self.parse_select_statement_inner()
2732        }
2733    }
2734}
2735
2736// Implement the ExpressionParser trait for Parser to use the modular expression parsing
2737impl ExpressionParser for Parser {
2738    fn current_token(&self) -> &Token {
2739        &self.current_token
2740    }
2741
2742    fn advance(&mut self) {
2743        // Call the main advance method directly to avoid recursion
2744        match &self.current_token {
2745            Token::LeftParen => self.paren_depth += 1,
2746            Token::RightParen => {
2747                self.paren_depth -= 1;
2748            }
2749            _ => {}
2750        }
2751        self.current_token = self.lexer.next_token();
2752    }
2753
2754    fn peek(&self) -> Option<&Token> {
2755        // We can't return a reference to a token from a temporary lexer,
2756        // so we need a different approach. For now, let's use a workaround
2757        // that checks the next token type without consuming it.
2758        // This is a limitation of the current design.
2759        // A proper fix would be to store the peeked token in the Parser struct.
2760        None // TODO: Implement proper lookahead
2761    }
2762
2763    fn is_at_end(&self) -> bool {
2764        matches!(self.current_token, Token::Eof)
2765    }
2766
2767    fn consume(&mut self, expected: Token) -> Result<(), String> {
2768        // Call the main consume method to avoid recursion
2769        if std::mem::discriminant(&self.current_token) == std::mem::discriminant(&expected) {
2770            self.update_paren_depth(&expected)?;
2771            self.current_token = self.lexer.next_token();
2772            Ok(())
2773        } else {
2774            Err(format!(
2775                "Expected {:?}, found {:?}",
2776                expected, self.current_token
2777            ))
2778        }
2779    }
2780
2781    fn parse_identifier(&mut self) -> Result<String, String> {
2782        if let Token::Identifier(id) = &self.current_token {
2783            let id = id.clone();
2784            self.advance();
2785            Ok(id)
2786        } else {
2787            Err(format!(
2788                "Expected identifier, found {:?}",
2789                self.current_token
2790            ))
2791        }
2792    }
2793}
2794
2795// Implement the ParseArithmetic trait for Parser to use the modular arithmetic parsing
2796impl ParseArithmetic for Parser {
2797    fn current_token(&self) -> &Token {
2798        &self.current_token
2799    }
2800
2801    fn advance(&mut self) {
2802        self.advance();
2803    }
2804
2805    fn consume(&mut self, expected: Token) -> Result<(), String> {
2806        self.consume(expected)
2807    }
2808
2809    fn parse_primary(&mut self) -> Result<SqlExpression, String> {
2810        self.parse_primary()
2811    }
2812
2813    fn parse_multiplicative(&mut self) -> Result<SqlExpression, String> {
2814        self.parse_multiplicative()
2815    }
2816
2817    fn parse_method_args(&mut self) -> Result<Vec<SqlExpression>, String> {
2818        self.parse_method_args()
2819    }
2820}
2821
2822// Implement the ParseComparison trait for Parser to use the modular comparison parsing
2823impl ParseComparison for Parser {
2824    fn current_token(&self) -> &Token {
2825        &self.current_token
2826    }
2827
2828    fn advance(&mut self) {
2829        self.advance();
2830    }
2831
2832    fn consume(&mut self, expected: Token) -> Result<(), String> {
2833        self.consume(expected)
2834    }
2835
2836    fn parse_primary(&mut self) -> Result<SqlExpression, String> {
2837        self.parse_primary()
2838    }
2839
2840    fn parse_additive(&mut self) -> Result<SqlExpression, String> {
2841        self.parse_additive()
2842    }
2843
2844    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2845        self.parse_expression_list()
2846    }
2847
2848    fn parse_subquery(&mut self) -> Result<SelectStatement, String> {
2849        // Parse subquery without parenthesis balance validation
2850        if matches!(self.current_token, Token::With) {
2851            self.parse_with_clause_inner()
2852        } else {
2853            self.parse_select_statement_inner()
2854        }
2855    }
2856}
2857
2858// Implement the ParseLogical trait for Parser to use the modular logical parsing
2859impl ParseLogical for Parser {
2860    fn current_token(&self) -> &Token {
2861        &self.current_token
2862    }
2863
2864    fn advance(&mut self) {
2865        self.advance();
2866    }
2867
2868    fn consume(&mut self, expected: Token) -> Result<(), String> {
2869        self.consume(expected)
2870    }
2871
2872    fn parse_logical_and(&mut self) -> Result<SqlExpression, String> {
2873        self.parse_logical_and()
2874    }
2875
2876    fn parse_base_logical_expression(&mut self) -> Result<SqlExpression, String> {
2877        // This is the base for logical AND - it should parse comparison expressions
2878        // to avoid infinite recursion with parse_expression
2879        self.parse_comparison()
2880    }
2881
2882    fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
2883        self.parse_comparison()
2884    }
2885
2886    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2887        self.parse_expression_list()
2888    }
2889}
2890
2891// Implement the ParseCase trait for Parser to use the modular CASE parsing
2892impl ParseCase for Parser {
2893    fn current_token(&self) -> &Token {
2894        &self.current_token
2895    }
2896
2897    fn advance(&mut self) {
2898        self.advance();
2899    }
2900
2901    fn consume(&mut self, expected: Token) -> Result<(), String> {
2902        self.consume(expected)
2903    }
2904
2905    fn parse_expression(&mut self) -> Result<SqlExpression, String> {
2906        self.parse_expression()
2907    }
2908}
2909
2910fn is_sql_keyword(word: &str) -> bool {
2911    // Use the lexer to check if this word produces a keyword token
2912    let mut lexer = Lexer::new(word);
2913    let token = lexer.next_token();
2914
2915    // Check if it's a keyword token (not an identifier)
2916    !matches!(token, Token::Identifier(_) | Token::Eof)
2917}
2918
2919#[cfg(test)]
2920mod tests {
2921    use super::*;
2922
2923    /// Test that Parser::new() defaults to Standard mode (backward compatible)
2924    #[test]
2925    fn test_parser_mode_default_is_standard() {
2926        let sql = "-- Leading comment\nSELECT * FROM users";
2927        let mut parser = Parser::new(sql);
2928        let stmt = parser.parse().unwrap();
2929
2930        // In Standard mode, comments should be empty
2931        assert!(stmt.leading_comments.is_empty());
2932        assert!(stmt.trailing_comment.is_none());
2933    }
2934
2935    /// Test that PreserveComments mode collects leading comments
2936    #[test]
2937    fn test_parser_mode_preserve_leading_comments() {
2938        let sql = "-- Important query\n-- Author: Alice\nSELECT id, name FROM users";
2939        let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2940        let stmt = parser.parse().unwrap();
2941
2942        // Should have 2 leading comments
2943        assert_eq!(stmt.leading_comments.len(), 2);
2944        assert!(stmt.leading_comments[0].is_line_comment);
2945        assert!(stmt.leading_comments[0].text.contains("Important query"));
2946        assert!(stmt.leading_comments[1].text.contains("Author: Alice"));
2947    }
2948
2949    /// Test that PreserveComments mode collects trailing comments
2950    #[test]
2951    fn test_parser_mode_preserve_trailing_comment() {
2952        let sql = "SELECT * FROM users -- Fetch all users";
2953        let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2954        let stmt = parser.parse().unwrap();
2955
2956        // Should have trailing comment
2957        assert!(stmt.trailing_comment.is_some());
2958        let comment = stmt.trailing_comment.unwrap();
2959        assert!(comment.is_line_comment);
2960        assert!(comment.text.contains("Fetch all users"));
2961    }
2962
2963    /// Test that PreserveComments mode handles block comments
2964    #[test]
2965    fn test_parser_mode_preserve_block_comments() {
2966        let sql = "/* Query explanation */\nSELECT * FROM users";
2967        let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2968        let stmt = parser.parse().unwrap();
2969
2970        // Should have leading block comment
2971        assert_eq!(stmt.leading_comments.len(), 1);
2972        assert!(!stmt.leading_comments[0].is_line_comment); // It's a block comment
2973        assert!(stmt.leading_comments[0].text.contains("Query explanation"));
2974    }
2975
2976    /// Test that PreserveComments mode collects both leading and trailing
2977    #[test]
2978    fn test_parser_mode_preserve_both_comments() {
2979        let sql = "-- Leading\nSELECT * FROM users -- Trailing";
2980        let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2981        let stmt = parser.parse().unwrap();
2982
2983        // Should have both
2984        assert_eq!(stmt.leading_comments.len(), 1);
2985        assert!(stmt.leading_comments[0].text.contains("Leading"));
2986        assert!(stmt.trailing_comment.is_some());
2987        assert!(stmt.trailing_comment.unwrap().text.contains("Trailing"));
2988    }
2989
2990    /// Test that Standard mode has zero performance overhead (no comment parsing)
2991    #[test]
2992    fn test_parser_mode_standard_ignores_comments() {
2993        let sql = "-- Comment 1\n/* Comment 2 */\nSELECT * FROM users -- Comment 3";
2994        let mut parser = Parser::with_mode(sql, ParserMode::Standard);
2995        let stmt = parser.parse().unwrap();
2996
2997        // Comments should be completely ignored
2998        assert!(stmt.leading_comments.is_empty());
2999        assert!(stmt.trailing_comment.is_none());
3000
3001        // But query should still parse correctly
3002        assert_eq!(stmt.select_items.len(), 1);
3003        assert_eq!(stmt.from_table, Some("users".to_string()));
3004    }
3005
3006    /// Test backward compatibility - existing code using Parser::new() unchanged
3007    #[test]
3008    fn test_parser_backward_compatibility() {
3009        let sql = "SELECT id, name FROM users WHERE active = true";
3010
3011        // Old way (still works, defaults to Standard mode)
3012        let mut parser1 = Parser::new(sql);
3013        let stmt1 = parser1.parse().unwrap();
3014
3015        // Explicit Standard mode (same behavior)
3016        let mut parser2 = Parser::with_mode(sql, ParserMode::Standard);
3017        let stmt2 = parser2.parse().unwrap();
3018
3019        // Both should produce identical ASTs (comments are empty in both)
3020        assert_eq!(stmt1.select_items.len(), stmt2.select_items.len());
3021        assert_eq!(stmt1.from_table, stmt2.from_table);
3022        assert_eq!(stmt1.where_clause.is_some(), stmt2.where_clause.is_some());
3023        assert!(stmt1.leading_comments.is_empty());
3024        assert!(stmt2.leading_comments.is_empty());
3025    }
3026
3027    /// Test PIVOT parsing - currently returns error as execution is not implemented
3028    #[test]
3029    fn test_pivot_parsing_not_yet_supported() {
3030        let sql = "SELECT * FROM food_eaten PIVOT (MAX(AmountEaten) FOR FoodName IN ('Sammich', 'Pickle', 'Apple'))";
3031        let mut parser = Parser::new(sql);
3032        let result = parser.parse();
3033
3034        // PIVOT is now fully supported! Verify parsing succeeds
3035        assert!(result.is_ok());
3036        let stmt = result.unwrap();
3037
3038        // Verify from_source contains a PIVOT
3039        assert!(stmt.from_source.is_some());
3040        if let Some(crate::sql::parser::ast::TableSource::Pivot { .. }) = stmt.from_source {
3041            // Success!
3042        } else {
3043            panic!("Expected from_source to be a Pivot variant");
3044        }
3045    }
3046
3047    /// Test PIVOT syntax with different aggregate functions
3048    #[test]
3049    fn test_pivot_aggregate_functions() {
3050        // Test with SUM - PIVOT is now fully supported!
3051        let sql = "SELECT * FROM sales PIVOT (SUM(amount) FOR month IN ('Jan', 'Feb', 'Mar'))";
3052        let mut parser = Parser::new(sql);
3053        let result = parser.parse();
3054        assert!(result.is_ok());
3055
3056        // Test with COUNT
3057        let sql2 = "SELECT * FROM sales PIVOT (COUNT(*) FOR month IN ('Jan', 'Feb'))";
3058        let mut parser2 = Parser::new(sql2);
3059        let result2 = parser2.parse();
3060        assert!(result2.is_ok());
3061
3062        // Test with AVG
3063        let sql3 = "SELECT * FROM sales PIVOT (AVG(price) FOR category IN ('A', 'B'))";
3064        let mut parser3 = Parser::new(sql3);
3065        let result3 = parser3.parse();
3066        assert!(result3.is_ok());
3067    }
3068
3069    /// Test PIVOT with subquery source
3070    #[test]
3071    fn test_pivot_with_subquery() {
3072        let sql = "SELECT * FROM (SELECT * FROM food_eaten WHERE Id > 5) AS t \
3073                   PIVOT (MAX(AmountEaten) FOR FoodName IN ('Sammich', 'Pickle'))";
3074        let mut parser = Parser::new(sql);
3075        let result = parser.parse();
3076
3077        // PIVOT with subquery is now fully supported!
3078        assert!(result.is_ok());
3079        let stmt = result.unwrap();
3080        assert!(stmt.from_source.is_some());
3081    }
3082
3083    /// Test PIVOT with alias
3084    #[test]
3085    fn test_pivot_with_alias() {
3086        let sql =
3087            "SELECT * FROM sales PIVOT (SUM(amount) FOR month IN ('Jan', 'Feb')) AS pivot_table";
3088        let mut parser = Parser::new(sql);
3089        let result = parser.parse();
3090
3091        // PIVOT with alias is now fully supported!
3092        assert!(result.is_ok());
3093        let stmt = result.unwrap();
3094        assert!(stmt.from_source.is_some());
3095    }
3096}