sql_cli/sql/
recursive_parser.rs

1// Keep chrono imports for the parser implementation
2
3// Re-exports for backward compatibility - these serve as both imports and re-exports
4pub use super::parser::ast::{
5    CTEType, Comment, Condition, DataFormat, FrameBound, FrameUnit, HttpMethod, IntoTable,
6    JoinClause, JoinCondition, JoinOperator, JoinType, LogicalOp, OrderByColumn, OrderByItem,
7    SelectItem, SelectStatement, SetOperation, SingleJoinCondition, SortDirection, SqlExpression,
8    TableFunction, TableSource, WebCTESpec, WhenBranch, WhereClause, WindowFrame, WindowSpec, CTE,
9};
10pub use super::parser::legacy::{ParseContext, ParseState, Schema, SqlParser, SqlToken, TableInfo};
11pub use super::parser::lexer::{Lexer, LexerMode, Token};
12pub use super::parser::ParserConfig;
13
14// Re-export formatting functions for backward compatibility
15pub use super::parser::formatter::{format_ast_tree, format_sql_pretty, format_sql_pretty_compact};
16
17// New AST-based formatter
18pub use super::parser::ast_formatter::{format_sql_ast, format_sql_ast_with_config, FormatConfig};
19
20// Import the new expression modules
21use super::parser::expressions::arithmetic::{
22    parse_additive as parse_additive_expr, parse_multiplicative as parse_multiplicative_expr,
23    ParseArithmetic,
24};
25use super::parser::expressions::case::{parse_case_expression as parse_case_expr, ParseCase};
26use super::parser::expressions::comparison::{
27    parse_comparison as parse_comparison_expr, parse_in_operator, ParseComparison,
28};
29use super::parser::expressions::logical::{
30    parse_logical_and as parse_logical_and_expr, parse_logical_or as parse_logical_or_expr,
31    ParseLogical,
32};
33use super::parser::expressions::primary::{
34    parse_primary as parse_primary_expr, ParsePrimary, PrimaryExpressionContext,
35};
36use super::parser::expressions::ExpressionParser;
37
38// Import function registry to check for function existence
39use crate::sql::functions::{FunctionCategory, FunctionRegistry};
40use crate::sql::generators::GeneratorRegistry;
41use std::sync::Arc;
42
43// Import Web CTE parser
44use super::parser::web_cte_parser::WebCteParser;
45
46/// Parser mode - controls whether comments are preserved in AST
47#[derive(Debug, Clone, Copy, PartialEq)]
48pub enum ParserMode {
49    /// Standard parsing - skip comments (current behavior, backward compatible)
50    Standard,
51    /// Preserve comments in AST (opt-in for formatters)
52    PreserveComments,
53}
54
55impl Default for ParserMode {
56    fn default() -> Self {
57        ParserMode::Standard
58    }
59}
60
61pub struct Parser {
62    lexer: Lexer,
63    pub current_token: Token,    // Made public for web_cte_parser access
64    in_method_args: bool,        // Track if we're parsing method arguments
65    columns: Vec<String>,        // Known column names for context-aware parsing
66    paren_depth: i32,            // Track parentheses nesting depth
67    paren_depth_stack: Vec<i32>, // Stack to save/restore paren depth for nested contexts
68    _config: ParserConfig,       // Parser configuration including case sensitivity
69    debug_trace: bool,           // Enable detailed token-by-token trace
70    trace_depth: usize,          // Track recursion depth for indented trace
71    function_registry: Arc<FunctionRegistry>, // Function registry for validation
72    generator_registry: Arc<GeneratorRegistry>, // Generator registry for table functions
73    mode: ParserMode,            // Parser mode for comment preservation
74}
75
76impl Parser {
77    #[must_use]
78    pub fn new(input: &str) -> Self {
79        Self::with_mode(input, ParserMode::default())
80    }
81
82    /// Create a new parser with explicit mode for comment preservation
83    #[must_use]
84    pub fn with_mode(input: &str, mode: ParserMode) -> Self {
85        // Choose lexer mode based on parser mode
86        let lexer_mode = match mode {
87            ParserMode::Standard => LexerMode::SkipComments,
88            ParserMode::PreserveComments => LexerMode::PreserveComments,
89        };
90
91        let mut lexer = Lexer::with_mode(input, lexer_mode);
92        let current_token = lexer.next_token();
93        Self {
94            lexer,
95            current_token,
96            in_method_args: false,
97            columns: Vec::new(),
98            paren_depth: 0,
99            paren_depth_stack: Vec::new(),
100            _config: ParserConfig::default(),
101            debug_trace: false,
102            trace_depth: 0,
103            function_registry: Arc::new(FunctionRegistry::new()),
104            generator_registry: Arc::new(GeneratorRegistry::new()),
105            mode,
106        }
107    }
108
109    #[must_use]
110    pub fn with_config(input: &str, config: ParserConfig) -> Self {
111        let mut lexer = Lexer::new(input);
112        let current_token = lexer.next_token();
113        Self {
114            lexer,
115            current_token,
116            in_method_args: false,
117            columns: Vec::new(),
118            paren_depth: 0,
119            paren_depth_stack: Vec::new(),
120            _config: config,
121            debug_trace: false,
122            trace_depth: 0,
123            function_registry: Arc::new(FunctionRegistry::new()),
124            generator_registry: Arc::new(GeneratorRegistry::new()),
125            mode: ParserMode::default(),
126        }
127    }
128
129    #[must_use]
130    pub fn with_columns(mut self, columns: Vec<String>) -> Self {
131        self.columns = columns;
132        self
133    }
134
135    #[must_use]
136    pub fn with_debug_trace(mut self, enabled: bool) -> Self {
137        self.debug_trace = enabled;
138        self
139    }
140
141    #[must_use]
142    pub fn with_function_registry(mut self, registry: Arc<FunctionRegistry>) -> Self {
143        self.function_registry = registry;
144        self
145    }
146
147    #[must_use]
148    pub fn with_generator_registry(mut self, registry: Arc<GeneratorRegistry>) -> Self {
149        self.generator_registry = registry;
150        self
151    }
152
153    fn trace_enter(&mut self, context: &str) {
154        if self.debug_trace {
155            let indent = "  ".repeat(self.trace_depth);
156            eprintln!("{}→ {} | Token: {:?}", indent, context, self.current_token);
157            self.trace_depth += 1;
158        }
159    }
160
161    fn trace_exit(&mut self, context: &str, result: &Result<impl std::fmt::Debug, String>) {
162        if self.debug_trace {
163            self.trace_depth = self.trace_depth.saturating_sub(1);
164            let indent = "  ".repeat(self.trace_depth);
165            match result {
166                Ok(val) => eprintln!("{}← {} ✓ | Result: {:?}", indent, context, val),
167                Err(e) => eprintln!("{}← {} ✗ | Error: {}", indent, context, e),
168            }
169        }
170    }
171
172    fn trace_token(&self, action: &str) {
173        if self.debug_trace {
174            let indent = "  ".repeat(self.trace_depth);
175            eprintln!("{}  {} | Token: {:?}", indent, action, self.current_token);
176        }
177    }
178
179    #[allow(dead_code)]
180    fn peek_token(&self) -> Option<Token> {
181        // Alternative peek that returns owned token
182        let mut temp_lexer = self.lexer.clone();
183        let next_token = temp_lexer.next_token();
184        if matches!(next_token, Token::Eof) {
185            None
186        } else {
187            Some(next_token)
188        }
189    }
190
191    /// Check if current token is one of the reserved keywords that should stop parsing
192    /// Check if an identifier string is a reserved keyword (for backward compatibility)
193    /// This is used when the lexer hasn't properly tokenized keywords and they come through
194    /// as Token::Identifier instead of their proper token types
195    fn is_identifier_reserved(id: &str) -> bool {
196        let id_upper = id.to_uppercase();
197        matches!(
198            id_upper.as_str(),
199            "ORDER" | "HAVING" | "LIMIT" | "OFFSET" | "UNION" | "INTERSECT" | "EXCEPT"
200        )
201    }
202
203    /// Get comparison operator string representation (for autocomplete context)
204    const COMPARISON_OPERATORS: [&'static str; 6] = [" > ", " < ", " >= ", " <= ", " = ", " != "];
205
206    pub fn consume(&mut self, expected: Token) -> Result<(), String> {
207        self.trace_token(&format!("Consuming expected {:?}", expected));
208        if std::mem::discriminant(&self.current_token) == std::mem::discriminant(&expected) {
209            // Track parentheses depth
210            self.update_paren_depth(&expected)?;
211
212            self.current_token = self.lexer.next_token();
213            Ok(())
214        } else {
215            // Provide better error messages for common cases
216            let error_msg = match (&expected, &self.current_token) {
217                (Token::RightParen, Token::Eof) if self.paren_depth > 0 => {
218                    format!(
219                        "Unclosed parenthesis - missing {} closing parenthes{}",
220                        self.paren_depth,
221                        if self.paren_depth == 1 { "is" } else { "es" }
222                    )
223                }
224                (Token::RightParen, _) if self.paren_depth > 0 => {
225                    format!(
226                        "Expected closing parenthesis but found {:?} (currently {} unclosed parenthes{})",
227                        self.current_token,
228                        self.paren_depth,
229                        if self.paren_depth == 1 { "is" } else { "es" }
230                    )
231                }
232                _ => format!("Expected {:?}, found {:?}", expected, self.current_token),
233            };
234            Err(error_msg)
235        }
236    }
237
238    pub fn advance(&mut self) {
239        // Track parentheses depth when advancing
240        match &self.current_token {
241            Token::LeftParen => self.paren_depth += 1,
242            Token::RightParen => {
243                self.paren_depth -= 1;
244                // Note: We don't check for < 0 here because advance() is used
245                // in contexts where we're not necessarily expecting a right paren
246            }
247            _ => {}
248        }
249        let old_token = self.current_token.clone();
250        self.current_token = self.lexer.next_token();
251        if self.debug_trace {
252            let indent = "  ".repeat(self.trace_depth);
253            eprintln!(
254                "{}  Advanced: {:?} → {:?}",
255                indent, old_token, self.current_token
256            );
257        }
258    }
259
260    /// Collect all leading comments before a SQL construct
261    /// This consumes comment tokens and returns them as a Vec<Comment>
262    fn collect_leading_comments(&mut self) -> Vec<Comment> {
263        let mut comments = Vec::new();
264        loop {
265            match &self.current_token {
266                Token::LineComment(text) => {
267                    comments.push(Comment::line(text.clone()));
268                    self.advance();
269                }
270                Token::BlockComment(text) => {
271                    comments.push(Comment::block(text.clone()));
272                    self.advance();
273                }
274                _ => break,
275            }
276        }
277        comments
278    }
279
280    /// Collect a trailing inline comment (on the same line)
281    /// This consumes a single comment token if present
282    fn collect_trailing_comment(&mut self) -> Option<Comment> {
283        match &self.current_token {
284            Token::LineComment(text) => {
285                let comment = Some(Comment::line(text.clone()));
286                self.advance();
287                comment
288            }
289            Token::BlockComment(text) => {
290                let comment = Some(Comment::block(text.clone()));
291                self.advance();
292                comment
293            }
294            _ => None,
295        }
296    }
297
298    fn push_paren_depth(&mut self) {
299        self.paren_depth_stack.push(self.paren_depth);
300        self.paren_depth = 0;
301    }
302
303    fn pop_paren_depth(&mut self) {
304        if let Some(depth) = self.paren_depth_stack.pop() {
305            // Ignore the internal depth - just restore the saved value
306            self.paren_depth = depth;
307        }
308    }
309
310    pub fn parse(&mut self) -> Result<SelectStatement, String> {
311        self.trace_enter("parse");
312
313        // Collect leading comments FIRST (before checking for WITH or SELECT)
314        // This allows comments before WITH clauses to be preserved
315        let leading_comments = if self.mode == ParserMode::PreserveComments {
316            self.collect_leading_comments()
317        } else {
318            vec![]
319        };
320
321        // Now check for WITH clause (after consuming comments)
322        let result = if matches!(self.current_token, Token::With) {
323            let mut stmt = self.parse_with_clause()?;
324            // Attach the leading comments we collected
325            stmt.leading_comments = leading_comments;
326            stmt
327        } else {
328            // For SELECT without WITH, pass comments to inner parser
329            let stmt = self.parse_select_statement_with_comments_public(leading_comments)?;
330            self.check_balanced_parentheses()?;
331            stmt
332        };
333
334        self.trace_exit("parse", &Ok(&result));
335        Ok(result)
336    }
337
338    /// Public wrapper that accepts pre-collected comments and checks parens
339    fn parse_select_statement_with_comments_public(
340        &mut self,
341        comments: Vec<Comment>,
342    ) -> Result<SelectStatement, String> {
343        self.parse_select_statement_with_comments(comments)
344    }
345
346    fn parse_with_clause(&mut self) -> Result<SelectStatement, String> {
347        self.consume(Token::With)?;
348        let ctes = self.parse_cte_list()?;
349
350        // Parse the main SELECT statement - use inner version since we're already tracking parens
351        let mut main_query = self.parse_select_statement_inner_no_comments()?;
352        main_query.ctes = ctes;
353
354        // Check for balanced parentheses at the end of parsing
355        self.check_balanced_parentheses()?;
356
357        Ok(main_query)
358    }
359
360    fn parse_with_clause_inner(&mut self) -> Result<SelectStatement, String> {
361        self.consume(Token::With)?;
362        let ctes = self.parse_cte_list()?;
363
364        // Parse the main SELECT statement (without parenthesis checking for subqueries)
365        let mut main_query = self.parse_select_statement_inner()?;
366        main_query.ctes = ctes;
367
368        Ok(main_query)
369    }
370
371    // Helper function to parse CTE list - eliminates duplication
372    fn parse_cte_list(&mut self) -> Result<Vec<CTE>, String> {
373        let mut ctes = Vec::new();
374
375        // Parse CTEs
376        loop {
377            // Check for WEB keyword for each CTE (can be different for each one)
378            let is_web = if matches!(&self.current_token, Token::Web) {
379                self.trace_token("Found WEB keyword for CTE");
380                self.advance();
381                true
382            } else {
383                false
384            };
385
386            // Parse CTE name
387            let name = match &self.current_token {
388                Token::Identifier(name) => name.clone(),
389                _ => {
390                    return Err(format!(
391                        "Expected CTE name after {}",
392                        if is_web { "WEB" } else { "WITH or comma" }
393                    ))
394                }
395            };
396            self.advance();
397
398            // Optional column list: WITH t(col1, col2) AS ...
399            let column_list = if matches!(self.current_token, Token::LeftParen) {
400                self.advance();
401                let cols = self.parse_identifier_list()?;
402                self.consume(Token::RightParen)?;
403                Some(cols)
404            } else {
405                None
406            };
407
408            // Expect AS
409            self.consume(Token::As)?;
410
411            let cte_type = if is_web {
412                // Expect opening parenthesis for WEB CTE
413                self.consume(Token::LeftParen)?;
414                // Parse WEB CTE specification using dedicated parser
415                let web_spec = WebCteParser::parse(self)?;
416                // Consume closing parenthesis for WEB CTE
417                self.consume(Token::RightParen)?;
418                CTEType::Web(web_spec)
419            } else {
420                // For standard CTEs, push depth BEFORE consuming opening paren
421                // This ensures the paren is counted in the inner context
422                self.push_paren_depth();
423                // Now consume opening parenthesis
424                self.consume(Token::LeftParen)?;
425                let query = self.parse_select_statement_inner()?;
426                // Expect closing parenthesis while still in CTE context
427                self.consume(Token::RightParen)?;
428                // Now pop to restore outer depth after consuming both parens
429                self.pop_paren_depth();
430                CTEType::Standard(query)
431            };
432
433            ctes.push(CTE {
434                name,
435                column_list,
436                cte_type,
437            });
438
439            // Check for more CTEs
440            if !matches!(self.current_token, Token::Comma) {
441                break;
442            }
443            self.advance();
444        }
445
446        Ok(ctes)
447    }
448
449    /// Helper function to parse an optional table alias (with or without AS keyword)
450    fn parse_optional_alias(&mut self) -> Result<Option<String>, String> {
451        if matches!(self.current_token, Token::As) {
452            self.advance();
453            match &self.current_token {
454                Token::Identifier(name) => {
455                    let alias = name.clone();
456                    self.advance();
457                    Ok(Some(alias))
458                }
459                token => {
460                    // Check if it's a reserved keyword - provide helpful error
461                    if let Some(keyword) = token.as_keyword_str() {
462                        Err(format!(
463                            "Reserved keyword '{}' cannot be used as column alias. Use a different name or quote it with double quotes: \"{}\"",
464                            keyword,
465                            keyword.to_lowercase()
466                        ))
467                    } else {
468                        Err("Expected alias name after AS".to_string())
469                    }
470                }
471            }
472        } else if let Token::Identifier(name) = &self.current_token {
473            // AS is optional for table aliases
474            let alias = name.clone();
475            self.advance();
476            Ok(Some(alias))
477        } else {
478            Ok(None)
479        }
480    }
481
482    /// Helper function to check if an identifier is valid (quoted or regular)
483    fn is_valid_identifier(name: &str) -> bool {
484        if name.starts_with('"') && name.ends_with('"') {
485            // Quoted identifier - always valid
486            true
487        } else {
488            // Regular identifier - check if it's alphanumeric or underscore
489            name.chars().all(|c| c.is_alphanumeric() || c == '_')
490        }
491    }
492
493    /// Helper function to update parentheses depth tracking
494    fn update_paren_depth(&mut self, token: &Token) -> Result<(), String> {
495        match token {
496            Token::LeftParen => self.paren_depth += 1,
497            Token::RightParen => {
498                self.paren_depth -= 1;
499                // Check for extra closing parenthesis
500                if self.paren_depth < 0 {
501                    return Err(
502                        "Unexpected closing parenthesis - no matching opening parenthesis"
503                            .to_string(),
504                    );
505                }
506            }
507            _ => {}
508        }
509        Ok(())
510    }
511
512    /// Helper function to parse comma-separated argument list
513    fn parse_argument_list(&mut self) -> Result<Vec<SqlExpression>, String> {
514        let mut args = Vec::new();
515
516        if !matches!(self.current_token, Token::RightParen) {
517            loop {
518                args.push(self.parse_expression()?);
519
520                if matches!(self.current_token, Token::Comma) {
521                    self.advance();
522                } else {
523                    break;
524                }
525            }
526        }
527
528        Ok(args)
529    }
530
531    /// Helper function to check for balanced parentheses at the end of parsing
532    fn check_balanced_parentheses(&self) -> Result<(), String> {
533        if self.paren_depth > 0 {
534            Err(format!(
535                "Unclosed parenthesis - missing {} closing parenthes{}",
536                self.paren_depth,
537                if self.paren_depth == 1 { "is" } else { "es" }
538            ))
539        } else if self.paren_depth < 0 {
540            Err("Extra closing parenthesis found - no matching opening parenthesis".to_string())
541        } else {
542            Ok(())
543        }
544    }
545
546    /// Check if an expression contains aggregate functions (COUNT, SUM, AVG, etc.)
547    /// This is used to detect unsupported patterns in HAVING clause
548    fn contains_aggregate_function(expr: &SqlExpression) -> bool {
549        match expr {
550            SqlExpression::FunctionCall { name, args, .. } => {
551                // Check if this is an aggregate function
552                let upper_name = name.to_uppercase();
553                let is_aggregate = matches!(
554                    upper_name.as_str(),
555                    "COUNT" | "SUM" | "AVG" | "MIN" | "MAX" | "GROUP_CONCAT" | "STRING_AGG"
556                );
557
558                // If this is an aggregate, return true
559                // Otherwise, recursively check arguments
560                is_aggregate || args.iter().any(Self::contains_aggregate_function)
561            }
562            // Recursively check nested expressions
563            SqlExpression::BinaryOp { left, right, .. } => {
564                Self::contains_aggregate_function(left) || Self::contains_aggregate_function(right)
565            }
566            SqlExpression::Not { expr } => Self::contains_aggregate_function(expr),
567            SqlExpression::MethodCall { args, .. } => {
568                args.iter().any(Self::contains_aggregate_function)
569            }
570            SqlExpression::ChainedMethodCall { base, args, .. } => {
571                Self::contains_aggregate_function(base)
572                    || args.iter().any(Self::contains_aggregate_function)
573            }
574            SqlExpression::CaseExpression {
575                when_branches,
576                else_branch,
577            } => {
578                when_branches.iter().any(|branch| {
579                    Self::contains_aggregate_function(&branch.condition)
580                        || Self::contains_aggregate_function(&branch.result)
581                }) || else_branch
582                    .as_ref()
583                    .map_or(false, |e| Self::contains_aggregate_function(e))
584            }
585            SqlExpression::SimpleCaseExpression {
586                expr,
587                when_branches,
588                else_branch,
589            } => {
590                Self::contains_aggregate_function(expr)
591                    || when_branches.iter().any(|branch| {
592                        Self::contains_aggregate_function(&branch.value)
593                            || Self::contains_aggregate_function(&branch.result)
594                    })
595                    || else_branch
596                        .as_ref()
597                        .map_or(false, |e| Self::contains_aggregate_function(e))
598            }
599            SqlExpression::ScalarSubquery { query } => {
600                // Subqueries can have their own aggregates, but that's fine
601                // We're only checking the outer HAVING clause
602                query
603                    .having
604                    .as_ref()
605                    .map_or(false, |h| Self::contains_aggregate_function(h))
606            }
607            // Leaf nodes - no aggregates
608            SqlExpression::Column(_)
609            | SqlExpression::StringLiteral(_)
610            | SqlExpression::NumberLiteral(_)
611            | SqlExpression::BooleanLiteral(_)
612            | SqlExpression::Null
613            | SqlExpression::DateTimeConstructor { .. }
614            | SqlExpression::DateTimeToday { .. } => false,
615
616            // Window functions contain aggregates by definition
617            SqlExpression::WindowFunction { .. } => true,
618
619            // Between has three parts to check
620            SqlExpression::Between { expr, lower, upper } => {
621                Self::contains_aggregate_function(expr)
622                    || Self::contains_aggregate_function(lower)
623                    || Self::contains_aggregate_function(upper)
624            }
625
626            // IN list - check expr and all values
627            SqlExpression::InList { expr, values } | SqlExpression::NotInList { expr, values } => {
628                Self::contains_aggregate_function(expr)
629                    || values.iter().any(Self::contains_aggregate_function)
630            }
631
632            // IN subquery - check expr and subquery
633            SqlExpression::InSubquery { expr, subquery }
634            | SqlExpression::NotInSubquery { expr, subquery } => {
635                Self::contains_aggregate_function(expr)
636                    || subquery
637                        .having
638                        .as_ref()
639                        .map_or(false, |h| Self::contains_aggregate_function(h))
640            }
641
642            // UNNEST - check column expression
643            SqlExpression::Unnest { column, .. } => Self::contains_aggregate_function(column),
644        }
645    }
646
647    fn parse_select_statement(&mut self) -> Result<SelectStatement, String> {
648        self.trace_enter("parse_select_statement");
649        let result = self.parse_select_statement_inner()?;
650
651        // Check for balanced parentheses at the end of parsing
652        self.check_balanced_parentheses()?;
653
654        Ok(result)
655    }
656
657    fn parse_select_statement_inner(&mut self) -> Result<SelectStatement, String> {
658        // Collect leading comments ONLY in PreserveComments mode
659        let leading_comments = if self.mode == ParserMode::PreserveComments {
660            self.collect_leading_comments()
661        } else {
662            vec![]
663        };
664
665        self.parse_select_statement_with_comments(leading_comments)
666    }
667
668    /// Parse SELECT statement without collecting leading comments
669    /// Used when comments were already collected (e.g., before WITH clause)
670    fn parse_select_statement_inner_no_comments(&mut self) -> Result<SelectStatement, String> {
671        self.parse_select_statement_with_comments(vec![])
672    }
673
674    /// Core SELECT parsing logic - takes pre-collected comments
675    fn parse_select_statement_with_comments(
676        &mut self,
677        leading_comments: Vec<Comment>,
678    ) -> Result<SelectStatement, String> {
679        self.consume(Token::Select)?;
680
681        // Check for DISTINCT keyword
682        let distinct = if matches!(self.current_token, Token::Distinct) {
683            self.advance();
684            true
685        } else {
686            false
687        };
688
689        // Parse SELECT items (supports computed expressions)
690        let select_items = self.parse_select_items()?;
691
692        // Create legacy columns vector for backward compatibility
693        let columns = select_items
694            .iter()
695            .map(|item| match item {
696                SelectItem::Star { .. } => "*".to_string(),
697                SelectItem::StarExclude { .. } => "*".to_string(), // Treated as * in legacy columns
698                SelectItem::Column {
699                    column: col_ref, ..
700                } => col_ref.name.clone(),
701                SelectItem::Expression { alias, .. } => alias.clone(),
702            })
703            .collect();
704
705        // Parse INTO clause (for temporary tables) - comes immediately after SELECT items
706        let into_table = if matches!(self.current_token, Token::Into) {
707            self.advance();
708            Some(self.parse_into_clause()?)
709        } else {
710            None
711        };
712
713        // Parse FROM clause - can be a table name, subquery, or table function
714        let (from_table, from_subquery, from_function, from_alias) = if matches!(
715            self.current_token,
716            Token::From
717        ) {
718            self.advance();
719
720            // Check for table function like RANGE()
721            if let Token::Identifier(name) = &self.current_token.clone() {
722                // Check if this is a table function by consulting the registry
723                // We need to lookahead to see if there's a parenthesis to distinguish
724                // between a function call and a table with the same name
725                let has_paren = self.peek_token() == Some(Token::LeftParen);
726                if self.debug_trace {
727                    eprintln!(
728                        "  Checking {} for table function, has_paren={}",
729                        name, has_paren
730                    );
731                }
732
733                // Check if it's a known table function or generator
734                // In FROM clause context, prioritize generators over scalar functions
735                let is_table_function = if has_paren {
736                    // First check generator registry (for FROM clause context)
737                    if self.debug_trace {
738                        eprintln!("  Checking generator registry for {}", name.to_uppercase());
739                    }
740                    if let Some(_gen) = self.generator_registry.get(&name.to_uppercase()) {
741                        if self.debug_trace {
742                            eprintln!("  Found {} in generator registry", name);
743                        }
744                        self.trace_token(&format!("Found generator: {}", name));
745                        true
746                    } else {
747                        // Then check if it's a table function in the function registry
748                        if let Some(func) = self.function_registry.get(&name.to_uppercase()) {
749                            let sig = func.signature();
750                            let is_table_fn = sig.category == FunctionCategory::TableFunction;
751                            if self.debug_trace {
752                                eprintln!(
753                                    "  Found {} in function registry, is_table_function={}",
754                                    name, is_table_fn
755                                );
756                            }
757                            if is_table_fn {
758                                self.trace_token(&format!(
759                                    "Found table function in function registry: {}",
760                                    name
761                                ));
762                            }
763                            is_table_fn
764                        } else {
765                            if self.debug_trace {
766                                eprintln!("  {} not found in either registry", name);
767                                self.trace_token(&format!(
768                                    "Not found as generator or table function: {}",
769                                    name
770                                ));
771                            }
772                            false
773                        }
774                    }
775                } else {
776                    if self.debug_trace {
777                        eprintln!("  No parenthesis after {}, treating as table", name);
778                    }
779                    false
780                };
781
782                if is_table_function {
783                    // Parse table function
784                    let function_name = name.clone();
785                    self.advance(); // Skip function name
786
787                    // Parse arguments
788                    self.consume(Token::LeftParen)?;
789                    let args = self.parse_argument_list()?;
790                    self.consume(Token::RightParen)?;
791
792                    // Optional alias
793                    let alias = if matches!(self.current_token, Token::As) {
794                        self.advance();
795                        match &self.current_token {
796                            Token::Identifier(name) => {
797                                let alias = name.clone();
798                                self.advance();
799                                Some(alias)
800                            }
801                            token => {
802                                if let Some(keyword) = token.as_keyword_str() {
803                                    return Err(format!(
804                                            "Reserved keyword '{}' cannot be used as column alias. Use a different name or quote it with double quotes: \"{}\"",
805                                            keyword,
806                                            keyword.to_lowercase()
807                                        ));
808                                } else {
809                                    return Err("Expected alias name after AS".to_string());
810                                }
811                            }
812                        }
813                    } else if let Token::Identifier(name) = &self.current_token {
814                        let alias = name.clone();
815                        self.advance();
816                        Some(alias)
817                    } else {
818                        None
819                    };
820
821                    (
822                        None,
823                        None,
824                        Some(TableFunction::Generator {
825                            name: function_name,
826                            args,
827                        }),
828                        alias,
829                    )
830                } else {
831                    // Not a RANGE, SPLIT, or generator function, so it's a regular table name
832                    let table_name = name.clone();
833                    self.advance();
834
835                    // Check for optional alias
836                    let alias = self.parse_optional_alias()?;
837
838                    (Some(table_name), None, None, alias)
839                }
840            } else if matches!(self.current_token, Token::LeftParen) {
841                // Check for subquery: FROM (SELECT ...) or FROM (WITH ... SELECT ...)
842                self.advance();
843
844                // Parse the subquery - it might start with WITH
845                let subquery = if matches!(self.current_token, Token::With) {
846                    self.parse_with_clause_inner()?
847                } else {
848                    self.parse_select_statement_inner()?
849                };
850
851                self.consume(Token::RightParen)?;
852
853                // Subqueries must have an alias
854                let alias = if matches!(self.current_token, Token::As) {
855                    self.advance();
856                    match &self.current_token {
857                        Token::Identifier(name) => {
858                            let alias = name.clone();
859                            self.advance();
860                            alias
861                        }
862                        token => {
863                            if let Some(keyword) = token.as_keyword_str() {
864                                return Err(format!(
865                                        "Reserved keyword '{}' cannot be used as subquery alias. Use a different name or quote it with double quotes: \"{}\"",
866                                        keyword,
867                                        keyword.to_lowercase()
868                                    ));
869                            } else {
870                                return Err("Expected alias name after AS".to_string());
871                            }
872                        }
873                    }
874                } else {
875                    // AS is optional, but alias is required
876                    match &self.current_token {
877                        Token::Identifier(name) => {
878                            let alias = name.clone();
879                            self.advance();
880                            alias
881                        }
882                        _ => {
883                            return Err(
884                                "Subquery in FROM must have an alias (e.g., AS t)".to_string()
885                            )
886                        }
887                    }
888                };
889
890                (None, Some(Box::new(subquery)), None, Some(alias))
891            } else {
892                // Regular table name
893                match &self.current_token {
894                    Token::Identifier(table) => {
895                        let table_name = table.clone();
896                        self.advance();
897
898                        // Check for optional alias
899                        let alias = self.parse_optional_alias()?;
900
901                        (Some(table_name), None, None, alias)
902                    }
903                    Token::QuotedIdentifier(table) => {
904                        // Handle quoted table names
905                        let table_name = table.clone();
906                        self.advance();
907
908                        // Check for optional alias
909                        let alias = self.parse_optional_alias()?;
910
911                        (Some(table_name), None, None, alias)
912                    }
913                    _ => return Err("Expected table name or subquery after FROM".to_string()),
914                }
915            }
916        } else {
917            (None, None, None, None)
918        };
919
920        // Parse JOIN clauses
921        let mut joins = Vec::new();
922        while self.is_join_token() {
923            joins.push(self.parse_join_clause()?);
924        }
925
926        let where_clause = if matches!(self.current_token, Token::Where) {
927            self.advance();
928            Some(self.parse_where_clause()?)
929        } else {
930            None
931        };
932
933        let group_by = if matches!(self.current_token, Token::GroupBy) {
934            self.advance();
935            // Parse expressions instead of just identifiers for GROUP BY
936            // This allows GROUP BY TIME_BUCKET(...), CASE ..., etc.
937            Some(self.parse_expression_list()?)
938        } else {
939            None
940        };
941
942        // Parse HAVING clause (must come after GROUP BY)
943        let having = if matches!(self.current_token, Token::Having) {
944            if group_by.is_none() {
945                return Err("HAVING clause requires GROUP BY".to_string());
946            }
947            self.advance();
948            let having_expr = self.parse_expression()?;
949
950            // Note: Aggregate functions in HAVING are now supported via the
951            // HavingAliasTransformer preprocessing step, which automatically
952            // adds aliases and rewrites the HAVING clause to use them.
953
954            Some(having_expr)
955        } else {
956            None
957        };
958
959        // Parse QUALIFY clause (Snowflake-style window function filtering)
960        // QUALIFY filters on window function results without needing a subquery
961        // Example: SELECT *, ROW_NUMBER() OVER (...) AS rn FROM t QUALIFY rn <= 3
962        let qualify = if matches!(self.current_token, Token::Qualify) {
963            self.advance();
964            let qualify_expr = self.parse_expression()?;
965
966            // Note: QUALIFY is handled by the QualifyToWhereTransformer preprocessing step
967            // which converts it to WHERE after window functions are lifted to CTEs
968
969            Some(qualify_expr)
970        } else {
971            None
972        };
973
974        // Parse ORDER BY clause (comes after GROUP BY, HAVING, and QUALIFY)
975        let order_by = if matches!(self.current_token, Token::OrderBy) {
976            self.trace_token("Found OrderBy token");
977            self.advance();
978            Some(self.parse_order_by_list()?)
979        } else if let Token::Identifier(s) = &self.current_token {
980            // This shouldn't happen if the lexer properly tokenizes ORDER BY
981            // But keeping as fallback for compatibility
982            if Self::is_identifier_reserved(s) && s.to_uppercase() == "ORDER" {
983                self.trace_token("Warning: ORDER as identifier instead of OrderBy token");
984                self.advance(); // consume ORDER
985                if matches!(&self.current_token, Token::By) {
986                    self.advance(); // consume BY
987                    Some(self.parse_order_by_list()?)
988                } else {
989                    return Err("Expected BY after ORDER".to_string());
990                }
991            } else {
992                None
993            }
994        } else {
995            None
996        };
997
998        // Parse LIMIT clause
999        let limit = if matches!(self.current_token, Token::Limit) {
1000            self.advance();
1001            match &self.current_token {
1002                Token::NumberLiteral(num) => {
1003                    let limit_val = num
1004                        .parse::<usize>()
1005                        .map_err(|_| format!("Invalid LIMIT value: {num}"))?;
1006                    self.advance();
1007                    Some(limit_val)
1008                }
1009                _ => return Err("Expected number after LIMIT".to_string()),
1010            }
1011        } else {
1012            None
1013        };
1014
1015        // Parse OFFSET clause
1016        let offset = if matches!(self.current_token, Token::Offset) {
1017            self.advance();
1018            match &self.current_token {
1019                Token::NumberLiteral(num) => {
1020                    let offset_val = num
1021                        .parse::<usize>()
1022                        .map_err(|_| format!("Invalid OFFSET value: {num}"))?;
1023                    self.advance();
1024                    Some(offset_val)
1025                }
1026                _ => return Err("Expected number after OFFSET".to_string()),
1027            }
1028        } else {
1029            None
1030        };
1031
1032        // Parse INTO clause (alternative position - SQL Server also supports INTO after all clauses)
1033        // This handles: SELECT * FROM table WHERE x > 5 INTO #temp
1034        // If INTO was already parsed after SELECT, this will be None (can't have two INTOs)
1035        let into_table = if into_table.is_none() && matches!(self.current_token, Token::Into) {
1036            self.advance();
1037            Some(self.parse_into_clause()?)
1038        } else {
1039            into_table // Keep the one from after SELECT if it exists
1040        };
1041
1042        // Parse UNION/INTERSECT/EXCEPT operations
1043        let set_operations = self.parse_set_operations()?;
1044
1045        // Collect trailing comment ONLY in PreserveComments mode
1046        let trailing_comment = if self.mode == ParserMode::PreserveComments {
1047            self.collect_trailing_comment()
1048        } else {
1049            None
1050        };
1051
1052        Ok(SelectStatement {
1053            distinct,
1054            columns,
1055            select_items,
1056            from_table,
1057            from_subquery,
1058            from_function,
1059            from_alias,
1060            joins,
1061            where_clause,
1062            order_by,
1063            group_by,
1064            having,
1065            qualify,
1066            limit,
1067            offset,
1068            ctes: Vec::new(), // Will be populated by WITH clause parser
1069            into_table,
1070            set_operations,
1071            leading_comments,
1072            trailing_comment,
1073        })
1074    }
1075
1076    /// Parse UNION/INTERSECT/EXCEPT operations
1077    /// Returns a vector of (operation, select_statement) pairs
1078    fn parse_set_operations(
1079        &mut self,
1080    ) -> Result<Vec<(SetOperation, Box<SelectStatement>)>, String> {
1081        let mut operations = Vec::new();
1082
1083        while matches!(
1084            self.current_token,
1085            Token::Union | Token::Intersect | Token::Except
1086        ) {
1087            // Determine the operation type
1088            let operation = match &self.current_token {
1089                Token::Union => {
1090                    self.advance();
1091                    // Check for ALL keyword
1092                    if let Token::Identifier(id) = &self.current_token {
1093                        if id.to_uppercase() == "ALL" {
1094                            self.advance();
1095                            SetOperation::UnionAll
1096                        } else {
1097                            SetOperation::Union
1098                        }
1099                    } else {
1100                        SetOperation::Union
1101                    }
1102                }
1103                Token::Intersect => {
1104                    self.advance();
1105                    SetOperation::Intersect
1106                }
1107                Token::Except => {
1108                    self.advance();
1109                    SetOperation::Except
1110                }
1111                _ => unreachable!(),
1112            };
1113
1114            // Parse the next SELECT statement
1115            let next_select = self.parse_select_statement_inner()?;
1116
1117            operations.push((operation, Box::new(next_select)));
1118        }
1119
1120        Ok(operations)
1121    }
1122
1123    /// Parse SELECT items that support computed expressions with aliases
1124    fn parse_select_items(&mut self) -> Result<Vec<SelectItem>, String> {
1125        let mut items = Vec::new();
1126
1127        loop {
1128            // Check for qualified star (table.*) or unqualified star (*)
1129            // First check if we have identifier.* pattern
1130            if let Token::Identifier(name) = &self.current_token.clone() {
1131                // Peek ahead to check for .* pattern
1132                let saved_pos = self.lexer.clone();
1133                let saved_token = self.current_token.clone();
1134                let table_name = name.clone();
1135
1136                self.advance();
1137
1138                if matches!(self.current_token, Token::Dot) {
1139                    self.advance();
1140                    if matches!(self.current_token, Token::Star) {
1141                        // This is table.* pattern
1142                        items.push(SelectItem::Star {
1143                            table_prefix: Some(table_name),
1144                            leading_comments: vec![],
1145                            trailing_comment: None,
1146                        });
1147                        self.advance();
1148
1149                        // Continue to next item or end
1150                        if matches!(self.current_token, Token::Comma) {
1151                            self.advance();
1152                            continue;
1153                        } else {
1154                            break;
1155                        }
1156                    }
1157                }
1158
1159                // Not table.*, restore position and continue with normal parsing
1160                self.lexer = saved_pos;
1161                self.current_token = saved_token;
1162            }
1163
1164            // Check for unqualified *
1165            if matches!(self.current_token, Token::Star) {
1166                self.advance(); // consume *
1167
1168                // Check for EXCLUDE clause
1169                if matches!(self.current_token, Token::Exclude) {
1170                    self.advance(); // consume EXCLUDE
1171
1172                    // Expect opening paren
1173                    if !matches!(self.current_token, Token::LeftParen) {
1174                        return Err("Expected '(' after EXCLUDE".to_string());
1175                    }
1176                    self.advance(); // consume (
1177
1178                    // Parse column list
1179                    let mut excluded_columns = Vec::new();
1180                    loop {
1181                        match &self.current_token {
1182                            Token::Identifier(col_name) | Token::QuotedIdentifier(col_name) => {
1183                                excluded_columns.push(col_name.clone());
1184                                self.advance();
1185                            }
1186                            _ => return Err("Expected column name in EXCLUDE list".to_string()),
1187                        }
1188
1189                        // Check for comma or closing paren
1190                        if matches!(self.current_token, Token::Comma) {
1191                            self.advance();
1192                        } else if matches!(self.current_token, Token::RightParen) {
1193                            self.advance(); // consume )
1194                            break;
1195                        } else {
1196                            return Err("Expected ',' or ')' in EXCLUDE list".to_string());
1197                        }
1198                    }
1199
1200                    if excluded_columns.is_empty() {
1201                        return Err("EXCLUDE list cannot be empty".to_string());
1202                    }
1203
1204                    items.push(SelectItem::StarExclude {
1205                        table_prefix: None,
1206                        excluded_columns,
1207                        leading_comments: vec![],
1208                        trailing_comment: None,
1209                    });
1210                } else {
1211                    // Regular * without EXCLUDE
1212                    items.push(SelectItem::Star {
1213                        table_prefix: None,
1214                        leading_comments: vec![],
1215                        trailing_comment: None,
1216                    });
1217                }
1218            } else {
1219                // Parse expression or column
1220                let expr = self.parse_comparison()?; // Use comparison to support IS NULL and other comparisons
1221
1222                // Check for AS alias
1223                let alias = if matches!(self.current_token, Token::As) {
1224                    self.advance();
1225                    match &self.current_token {
1226                        Token::Identifier(alias_name) => {
1227                            let alias = alias_name.clone();
1228                            self.advance();
1229                            alias
1230                        }
1231                        Token::QuotedIdentifier(alias_name) => {
1232                            let alias = alias_name.clone();
1233                            self.advance();
1234                            alias
1235                        }
1236                        token => {
1237                            if let Some(keyword) = token.as_keyword_str() {
1238                                return Err(format!(
1239                                    "Reserved keyword '{}' cannot be used as column alias. Use a different name or quote it with double quotes: \"{}\"",
1240                                    keyword,
1241                                    keyword.to_lowercase()
1242                                ));
1243                            } else {
1244                                return Err("Expected alias name after AS".to_string());
1245                            }
1246                        }
1247                    }
1248                } else {
1249                    // Generate default alias based on expression
1250                    match &expr {
1251                        SqlExpression::Column(col_ref) => col_ref.name.clone(),
1252                        _ => format!("expr_{}", items.len() + 1), // Default alias for computed expressions
1253                    }
1254                };
1255
1256                // Create SelectItem based on expression type
1257                let item = match expr {
1258                    SqlExpression::Column(col_ref) if alias == col_ref.name => {
1259                        // Simple column reference without alias
1260                        SelectItem::Column {
1261                            column: col_ref,
1262                            leading_comments: vec![],
1263                            trailing_comment: None,
1264                        }
1265                    }
1266                    _ => {
1267                        // Computed expression or column with different alias
1268                        SelectItem::Expression {
1269                            expr,
1270                            alias,
1271                            leading_comments: vec![],
1272                            trailing_comment: None,
1273                        }
1274                    }
1275                };
1276
1277                items.push(item);
1278            }
1279
1280            // Check for comma to continue
1281            if matches!(self.current_token, Token::Comma) {
1282                self.advance();
1283            } else {
1284                break;
1285            }
1286        }
1287
1288        Ok(items)
1289    }
1290
1291    fn parse_identifier_list(&mut self) -> Result<Vec<String>, String> {
1292        let mut identifiers = Vec::new();
1293
1294        loop {
1295            match &self.current_token {
1296                Token::Identifier(id) => {
1297                    // Check if this is a reserved keyword that should stop identifier parsing
1298                    if Self::is_identifier_reserved(id) {
1299                        // Stop parsing identifiers if we hit a reserved keyword
1300                        break;
1301                    }
1302                    identifiers.push(id.clone());
1303                    self.advance();
1304                }
1305                Token::QuotedIdentifier(id) => {
1306                    // Handle quoted identifiers like "Customer Id"
1307                    identifiers.push(id.clone());
1308                    self.advance();
1309                }
1310                _ => {
1311                    // Stop parsing if we hit any other token type
1312                    break;
1313                }
1314            }
1315
1316            if matches!(self.current_token, Token::Comma) {
1317                self.advance();
1318            } else {
1319                break;
1320            }
1321        }
1322
1323        if identifiers.is_empty() {
1324            return Err("Expected at least one identifier".to_string());
1325        }
1326
1327        Ok(identifiers)
1328    }
1329
1330    fn parse_window_spec(&mut self) -> Result<WindowSpec, String> {
1331        let mut partition_by = Vec::new();
1332        let mut order_by = Vec::new();
1333
1334        // Check for PARTITION BY
1335        if matches!(self.current_token, Token::Partition) {
1336            self.advance(); // consume PARTITION
1337            if !matches!(self.current_token, Token::By) {
1338                return Err("Expected BY after PARTITION".to_string());
1339            }
1340            self.advance(); // consume BY
1341
1342            // Parse partition columns
1343            partition_by = self.parse_identifier_list()?;
1344        }
1345
1346        // Check for ORDER BY
1347        if matches!(self.current_token, Token::OrderBy) {
1348            self.advance(); // consume ORDER BY (as single token)
1349            order_by = self.parse_order_by_list()?;
1350        } else if let Token::Identifier(s) = &self.current_token {
1351            if Self::is_identifier_reserved(s) && s.to_uppercase() == "ORDER" {
1352                // Handle ORDER BY as two tokens
1353                self.advance(); // consume ORDER
1354                if !matches!(self.current_token, Token::By) {
1355                    return Err("Expected BY after ORDER".to_string());
1356                }
1357                self.advance(); // consume BY
1358                order_by = self.parse_order_by_list()?;
1359            }
1360        }
1361
1362        // Parse optional window frame (ROWS/RANGE BETWEEN ... AND ...)
1363        let frame = self.parse_window_frame()?;
1364
1365        Ok(WindowSpec {
1366            partition_by,
1367            order_by,
1368            frame,
1369        })
1370    }
1371
1372    fn parse_order_by_list(&mut self) -> Result<Vec<OrderByItem>, String> {
1373        let mut order_items = Vec::new();
1374
1375        loop {
1376            // Parse ANY expression (not just column names)
1377            // This supports:
1378            // - Simple columns: region
1379            // - Qualified columns: table.column
1380            // - Aggregate functions: SUM(sales_amount)
1381            // - Arithmetic: sales_amount * 1.1
1382            // - CASE expressions: CASE WHEN ... END
1383            let expr = self.parse_expression()?;
1384
1385            // Check for ASC/DESC
1386            let direction = match &self.current_token {
1387                Token::Asc => {
1388                    self.advance();
1389                    SortDirection::Asc
1390                }
1391                Token::Desc => {
1392                    self.advance();
1393                    SortDirection::Desc
1394                }
1395                _ => SortDirection::Asc, // Default to ASC if not specified
1396            };
1397
1398            order_items.push(OrderByItem { expr, direction });
1399
1400            if matches!(self.current_token, Token::Comma) {
1401                self.advance();
1402            } else {
1403                break;
1404            }
1405        }
1406
1407        Ok(order_items)
1408    }
1409
1410    /// Parse INTO clause for temporary tables
1411    /// Syntax: INTO #table_name
1412    fn parse_into_clause(&mut self) -> Result<IntoTable, String> {
1413        // Expect an identifier starting with #
1414        let name = match &self.current_token {
1415            Token::Identifier(id) if id.starts_with('#') => {
1416                let table_name = id.clone();
1417                self.advance();
1418                table_name
1419            }
1420            Token::Identifier(id) => {
1421                return Err(format!(
1422                    "Temporary table name must start with #, got: {}",
1423                    id
1424                ));
1425            }
1426            _ => {
1427                return Err(
1428                    "Expected temporary table name (starting with #) after INTO".to_string()
1429                );
1430            }
1431        };
1432
1433        Ok(IntoTable { name })
1434    }
1435
1436    fn parse_window_frame(&mut self) -> Result<Option<WindowFrame>, String> {
1437        // Check for ROWS or RANGE keyword
1438        let unit = match &self.current_token {
1439            Token::Rows => {
1440                self.advance();
1441                FrameUnit::Rows
1442            }
1443            Token::Identifier(id) if id.to_uppercase() == "RANGE" => {
1444                // RANGE as window frame unit
1445                self.advance();
1446                FrameUnit::Range
1447            }
1448            _ => return Ok(None), // No window frame specified
1449        };
1450
1451        // Check for BETWEEN or just a single bound
1452        let (start, end) = if let Token::Between = &self.current_token {
1453            self.advance(); // consume BETWEEN
1454                            // Parse start bound
1455            let start = self.parse_frame_bound()?;
1456
1457            // Expect AND
1458            if !matches!(&self.current_token, Token::And) {
1459                return Err("Expected AND after window frame start bound".to_string());
1460            }
1461            self.advance();
1462
1463            // Parse end bound
1464            let end = self.parse_frame_bound()?;
1465            (start, Some(end))
1466        } else {
1467            // Single bound (e.g., "ROWS 5 PRECEDING")
1468            let bound = self.parse_frame_bound()?;
1469            (bound, None)
1470        };
1471
1472        Ok(Some(WindowFrame { unit, start, end }))
1473    }
1474
1475    fn parse_frame_bound(&mut self) -> Result<FrameBound, String> {
1476        match &self.current_token {
1477            Token::Unbounded => {
1478                self.advance();
1479                match &self.current_token {
1480                    Token::Preceding => {
1481                        self.advance();
1482                        Ok(FrameBound::UnboundedPreceding)
1483                    }
1484                    Token::Following => {
1485                        self.advance();
1486                        Ok(FrameBound::UnboundedFollowing)
1487                    }
1488                    _ => Err("Expected PRECEDING or FOLLOWING after UNBOUNDED".to_string()),
1489                }
1490            }
1491            Token::Current => {
1492                self.advance();
1493                if matches!(&self.current_token, Token::Row) {
1494                    self.advance();
1495                    return Ok(FrameBound::CurrentRow);
1496                }
1497                Err("Expected ROW after CURRENT".to_string())
1498            }
1499            Token::NumberLiteral(num) => {
1500                let n: i64 = num
1501                    .parse()
1502                    .map_err(|_| "Invalid number in window frame".to_string())?;
1503                self.advance();
1504                match &self.current_token {
1505                    Token::Preceding => {
1506                        self.advance();
1507                        Ok(FrameBound::Preceding(n))
1508                    }
1509                    Token::Following => {
1510                        self.advance();
1511                        Ok(FrameBound::Following(n))
1512                    }
1513                    _ => Err("Expected PRECEDING or FOLLOWING after number".to_string()),
1514                }
1515            }
1516            _ => Err("Invalid window frame bound".to_string()),
1517        }
1518    }
1519
1520    fn parse_where_clause(&mut self) -> Result<WhereClause, String> {
1521        // Parse the entire WHERE clause as a single expression tree
1522        // The logical operators (AND/OR) are now handled within parse_expression
1523        let expr = self.parse_expression()?;
1524
1525        // Check for unexpected closing parenthesis
1526        if matches!(self.current_token, Token::RightParen) && self.paren_depth <= 0 {
1527            return Err(
1528                "Unexpected closing parenthesis - no matching opening parenthesis".to_string(),
1529            );
1530        }
1531
1532        // Create a single condition with the entire expression
1533        let conditions = vec![Condition {
1534            expr,
1535            connector: None,
1536        }];
1537
1538        Ok(WhereClause { conditions })
1539    }
1540
1541    fn parse_expression(&mut self) -> Result<SqlExpression, String> {
1542        self.trace_enter("parse_expression");
1543        // Start with logical OR as the lowest precedence operator
1544        // The hierarchy is: OR -> AND -> comparison -> additive -> multiplicative -> primary
1545        let mut left = self.parse_logical_or()?;
1546
1547        // Handle IN operator (not preceded by NOT)
1548        // This uses the modular comparison module
1549        left = parse_in_operator(self, left)?;
1550
1551        let result = Ok(left);
1552        self.trace_exit("parse_expression", &result);
1553        result
1554    }
1555
1556    fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
1557        // Use the new modular comparison expression parser
1558        parse_comparison_expr(self)
1559    }
1560
1561    fn parse_additive(&mut self) -> Result<SqlExpression, String> {
1562        // Use the new modular arithmetic expression parser
1563        parse_additive_expr(self)
1564    }
1565
1566    fn parse_multiplicative(&mut self) -> Result<SqlExpression, String> {
1567        // Use the new modular arithmetic expression parser
1568        parse_multiplicative_expr(self)
1569    }
1570
1571    fn parse_logical_or(&mut self) -> Result<SqlExpression, String> {
1572        // Use the new modular logical expression parser
1573        parse_logical_or_expr(self)
1574    }
1575
1576    fn parse_logical_and(&mut self) -> Result<SqlExpression, String> {
1577        // Use the new modular logical expression parser
1578        parse_logical_and_expr(self)
1579    }
1580
1581    fn parse_case_expression(&mut self) -> Result<SqlExpression, String> {
1582        // Use the new modular CASE expression parser
1583        parse_case_expr(self)
1584    }
1585
1586    fn parse_primary(&mut self) -> Result<SqlExpression, String> {
1587        // Use the new modular primary expression parser
1588        // Clone the necessary data to avoid borrowing issues
1589        let columns = self.columns.clone();
1590        let in_method_args = self.in_method_args;
1591        let ctx = PrimaryExpressionContext {
1592            columns: &columns,
1593            in_method_args,
1594        };
1595        parse_primary_expr(self, &ctx)
1596    }
1597
1598    // Keep the old implementation temporarily for reference (will be removed)
1599    fn parse_method_args(&mut self) -> Result<Vec<SqlExpression>, String> {
1600        // Set flag to indicate we're parsing method arguments
1601        self.in_method_args = true;
1602
1603        let args = self.parse_argument_list()?;
1604
1605        // Clear the flag
1606        self.in_method_args = false;
1607
1608        Ok(args)
1609    }
1610
1611    fn parse_function_args(&mut self) -> Result<(Vec<SqlExpression>, bool), String> {
1612        let mut args = Vec::new();
1613        let mut has_distinct = false;
1614
1615        if !matches!(self.current_token, Token::RightParen) {
1616            // Check if first argument starts with DISTINCT
1617            if matches!(self.current_token, Token::Distinct) {
1618                self.advance(); // consume DISTINCT
1619                has_distinct = true;
1620            }
1621
1622            // Parse the expression (either after DISTINCT or directly)
1623            args.push(self.parse_additive()?);
1624
1625            // Parse any remaining arguments (DISTINCT only applies to first arg for aggregates)
1626            while matches!(self.current_token, Token::Comma) {
1627                self.advance();
1628                args.push(self.parse_additive()?);
1629            }
1630        }
1631
1632        Ok((args, has_distinct))
1633    }
1634
1635    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
1636        let mut expressions = Vec::new();
1637
1638        loop {
1639            expressions.push(self.parse_expression()?);
1640
1641            if matches!(self.current_token, Token::Comma) {
1642                self.advance();
1643            } else {
1644                break;
1645            }
1646        }
1647
1648        Ok(expressions)
1649    }
1650
1651    #[must_use]
1652    pub fn get_position(&self) -> usize {
1653        self.lexer.get_position()
1654    }
1655
1656    // Check if current token is a JOIN-related token
1657    fn is_join_token(&self) -> bool {
1658        matches!(
1659            self.current_token,
1660            Token::Join | Token::Inner | Token::Left | Token::Right | Token::Full | Token::Cross
1661        )
1662    }
1663
1664    // Parse a JOIN clause
1665    fn parse_join_clause(&mut self) -> Result<JoinClause, String> {
1666        // Determine join type
1667        let join_type = match &self.current_token {
1668            Token::Join => {
1669                self.advance();
1670                JoinType::Inner // Default JOIN is INNER JOIN
1671            }
1672            Token::Inner => {
1673                self.advance();
1674                if !matches!(self.current_token, Token::Join) {
1675                    return Err("Expected JOIN after INNER".to_string());
1676                }
1677                self.advance();
1678                JoinType::Inner
1679            }
1680            Token::Left => {
1681                self.advance();
1682                // Handle optional OUTER keyword
1683                if matches!(self.current_token, Token::Outer) {
1684                    self.advance();
1685                }
1686                if !matches!(self.current_token, Token::Join) {
1687                    return Err("Expected JOIN after LEFT".to_string());
1688                }
1689                self.advance();
1690                JoinType::Left
1691            }
1692            Token::Right => {
1693                self.advance();
1694                // Handle optional OUTER keyword
1695                if matches!(self.current_token, Token::Outer) {
1696                    self.advance();
1697                }
1698                if !matches!(self.current_token, Token::Join) {
1699                    return Err("Expected JOIN after RIGHT".to_string());
1700                }
1701                self.advance();
1702                JoinType::Right
1703            }
1704            Token::Full => {
1705                self.advance();
1706                // Handle optional OUTER keyword
1707                if matches!(self.current_token, Token::Outer) {
1708                    self.advance();
1709                }
1710                if !matches!(self.current_token, Token::Join) {
1711                    return Err("Expected JOIN after FULL".to_string());
1712                }
1713                self.advance();
1714                JoinType::Full
1715            }
1716            Token::Cross => {
1717                self.advance();
1718                if !matches!(self.current_token, Token::Join) {
1719                    return Err("Expected JOIN after CROSS".to_string());
1720                }
1721                self.advance();
1722                JoinType::Cross
1723            }
1724            _ => return Err("Expected JOIN keyword".to_string()),
1725        };
1726
1727        // Parse the table being joined
1728        let (table, alias) = self.parse_join_table_source()?;
1729
1730        // Parse ON condition (required for all joins except CROSS JOIN)
1731        let condition = if join_type == JoinType::Cross {
1732            // CROSS JOIN doesn't have ON condition - create empty condition
1733            JoinCondition { conditions: vec![] }
1734        } else {
1735            if !matches!(self.current_token, Token::On) {
1736                return Err("Expected ON keyword after JOIN table".to_string());
1737            }
1738            self.advance();
1739            self.parse_join_condition()?
1740        };
1741
1742        Ok(JoinClause {
1743            join_type,
1744            table,
1745            alias,
1746            condition,
1747        })
1748    }
1749
1750    fn parse_join_table_source(&mut self) -> Result<(TableSource, Option<String>), String> {
1751        let table = match &self.current_token {
1752            Token::Identifier(name) => {
1753                let table_name = name.clone();
1754                self.advance();
1755                TableSource::Table(table_name)
1756            }
1757            Token::LeftParen => {
1758                // Subquery as table source
1759                self.advance();
1760                let subquery = self.parse_select_statement_inner()?;
1761                if !matches!(self.current_token, Token::RightParen) {
1762                    return Err("Expected ')' after subquery".to_string());
1763                }
1764                self.advance();
1765
1766                // Subqueries must have an alias
1767                let alias = match &self.current_token {
1768                    Token::Identifier(alias_name) => {
1769                        let alias = alias_name.clone();
1770                        self.advance();
1771                        alias
1772                    }
1773                    Token::As => {
1774                        self.advance();
1775                        match &self.current_token {
1776                            Token::Identifier(alias_name) => {
1777                                let alias = alias_name.clone();
1778                                self.advance();
1779                                alias
1780                            }
1781                            _ => return Err("Expected alias after AS keyword".to_string()),
1782                        }
1783                    }
1784                    _ => return Err("Subqueries must have an alias".to_string()),
1785                };
1786
1787                return Ok((
1788                    TableSource::DerivedTable {
1789                        query: Box::new(subquery),
1790                        alias: alias.clone(),
1791                    },
1792                    Some(alias),
1793                ));
1794            }
1795            _ => return Err("Expected table name or subquery in JOIN clause".to_string()),
1796        };
1797
1798        // Check for optional alias
1799        let alias = match &self.current_token {
1800            Token::Identifier(alias_name) => {
1801                let alias = alias_name.clone();
1802                self.advance();
1803                Some(alias)
1804            }
1805            Token::As => {
1806                self.advance();
1807                match &self.current_token {
1808                    Token::Identifier(alias_name) => {
1809                        let alias = alias_name.clone();
1810                        self.advance();
1811                        Some(alias)
1812                    }
1813                    _ => return Err("Expected alias after AS keyword".to_string()),
1814                }
1815            }
1816            _ => None,
1817        };
1818
1819        Ok((table, alias))
1820    }
1821
1822    fn parse_join_condition(&mut self) -> Result<JoinCondition, String> {
1823        let mut conditions = Vec::new();
1824
1825        // Parse first condition
1826        conditions.push(self.parse_single_join_condition()?);
1827
1828        // Parse additional conditions connected by AND
1829        while matches!(self.current_token, Token::And) {
1830            self.advance(); // consume AND
1831            conditions.push(self.parse_single_join_condition()?);
1832        }
1833
1834        Ok(JoinCondition { conditions })
1835    }
1836
1837    fn parse_single_join_condition(&mut self) -> Result<SingleJoinCondition, String> {
1838        // Parse left side as additive expression (stops before comparison operators)
1839        // This allows the comparison operator to be explicitly parsed by this function
1840        let left_expr = self.parse_additive()?;
1841
1842        // Parse operator
1843        let operator = match &self.current_token {
1844            Token::Equal => JoinOperator::Equal,
1845            Token::NotEqual => JoinOperator::NotEqual,
1846            Token::LessThan => JoinOperator::LessThan,
1847            Token::LessThanOrEqual => JoinOperator::LessThanOrEqual,
1848            Token::GreaterThan => JoinOperator::GreaterThan,
1849            Token::GreaterThanOrEqual => JoinOperator::GreaterThanOrEqual,
1850            _ => return Err("Expected comparison operator in JOIN condition".to_string()),
1851        };
1852        self.advance();
1853
1854        // Parse right side as additive expression (stops before comparison operators)
1855        let right_expr = self.parse_additive()?;
1856
1857        Ok(SingleJoinCondition {
1858            left_expr,
1859            operator,
1860            right_expr,
1861        })
1862    }
1863
1864    fn parse_column_reference(&mut self) -> Result<String, String> {
1865        match &self.current_token {
1866            Token::Identifier(name) => {
1867                let mut column_ref = name.clone();
1868                self.advance();
1869
1870                // Check for table.column notation
1871                if matches!(self.current_token, Token::Dot) {
1872                    self.advance();
1873                    match &self.current_token {
1874                        Token::Identifier(col_name) => {
1875                            column_ref.push('.');
1876                            column_ref.push_str(col_name);
1877                            self.advance();
1878                        }
1879                        _ => return Err("Expected column name after '.'".to_string()),
1880                    }
1881                }
1882
1883                Ok(column_ref)
1884            }
1885            _ => Err("Expected column reference".to_string()),
1886        }
1887    }
1888}
1889
1890// Context detection for cursor position
1891#[derive(Debug, Clone)]
1892pub enum CursorContext {
1893    SelectClause,
1894    FromClause,
1895    WhereClause,
1896    OrderByClause,
1897    AfterColumn(String),
1898    AfterLogicalOp(LogicalOp),
1899    AfterComparisonOp(String, String), // column_name, operator
1900    InMethodCall(String, String),      // object, method
1901    InExpression,
1902    Unknown,
1903}
1904
1905/// Safe UTF-8 string slicing that ensures we don't slice in the middle of a character
1906fn safe_slice_to(s: &str, pos: usize) -> &str {
1907    if pos >= s.len() {
1908        return s;
1909    }
1910
1911    // Find the nearest valid character boundary at or before pos
1912    let mut safe_pos = pos;
1913    while safe_pos > 0 && !s.is_char_boundary(safe_pos) {
1914        safe_pos -= 1;
1915    }
1916
1917    &s[..safe_pos]
1918}
1919
1920/// Safe UTF-8 string slicing from a position to the end
1921fn safe_slice_from(s: &str, pos: usize) -> &str {
1922    if pos >= s.len() {
1923        return "";
1924    }
1925
1926    // Find the nearest valid character boundary at or after pos
1927    let mut safe_pos = pos;
1928    while safe_pos < s.len() && !s.is_char_boundary(safe_pos) {
1929        safe_pos += 1;
1930    }
1931
1932    &s[safe_pos..]
1933}
1934
1935#[must_use]
1936pub fn detect_cursor_context(query: &str, cursor_pos: usize) -> (CursorContext, Option<String>) {
1937    let truncated = safe_slice_to(query, cursor_pos);
1938    let mut parser = Parser::new(truncated);
1939
1940    // Try to parse as much as possible
1941    if let Ok(stmt) = parser.parse() {
1942        let (ctx, partial) = analyze_statement(&stmt, truncated, cursor_pos);
1943        #[cfg(test)]
1944        println!("analyze_statement returned: {ctx:?}, {partial:?} for query: '{truncated}'");
1945        (ctx, partial)
1946    } else {
1947        // Partial parse - analyze what we have
1948        let (ctx, partial) = analyze_partial(truncated, cursor_pos);
1949        #[cfg(test)]
1950        println!("analyze_partial returned: {ctx:?}, {partial:?} for query: '{truncated}'");
1951        (ctx, partial)
1952    }
1953}
1954
1955#[must_use]
1956pub fn tokenize_query(query: &str) -> Vec<String> {
1957    let mut lexer = Lexer::new(query);
1958    let tokens = lexer.tokenize_all();
1959    tokens.iter().map(|t| format!("{t:?}")).collect()
1960}
1961
1962#[must_use]
1963/// Helper function to find the start of a quoted string searching backwards
1964fn find_quote_start(bytes: &[u8], mut pos: usize) -> Option<usize> {
1965    // Skip the closing quote and search backwards
1966    if pos > 0 {
1967        pos -= 1;
1968        while pos > 0 {
1969            if bytes[pos] == b'"' {
1970                // Check if it's not an escaped quote
1971                if pos == 0 || bytes[pos - 1] != b'\\' {
1972                    return Some(pos);
1973                }
1974            }
1975            pos -= 1;
1976        }
1977        // Check position 0 separately
1978        if bytes[0] == b'"' {
1979            return Some(0);
1980        }
1981    }
1982    None
1983}
1984
1985/// Helper function to handle method call context after validation
1986fn handle_method_call_context(col_name: &str, after_dot: &str) -> (CursorContext, Option<String>) {
1987    // Check if there's a partial method name after the dot
1988    let partial_method = if after_dot.is_empty() {
1989        None
1990    } else if after_dot.chars().all(|c| c.is_alphanumeric() || c == '_') {
1991        Some(after_dot.to_string())
1992    } else {
1993        None
1994    };
1995
1996    // For AfterColumn context, strip quotes if present for consistency
1997    let col_name_for_context =
1998        if col_name.starts_with('"') && col_name.ends_with('"') && col_name.len() > 2 {
1999            col_name[1..col_name.len() - 1].to_string()
2000        } else {
2001            col_name.to_string()
2002        };
2003
2004    (
2005        CursorContext::AfterColumn(col_name_for_context),
2006        partial_method,
2007    )
2008}
2009
2010/// Helper function to check if we're after a comparison operator
2011fn check_after_comparison_operator(query: &str) -> Option<(CursorContext, Option<String>)> {
2012    for op in &Parser::COMPARISON_OPERATORS {
2013        if let Some(op_pos) = query.rfind(op) {
2014            let before_op = safe_slice_to(query, op_pos);
2015            let after_op_start = op_pos + op.len();
2016            let after_op = if after_op_start < query.len() {
2017                &query[after_op_start..]
2018            } else {
2019                ""
2020            };
2021
2022            // Check if we have a column name before the operator
2023            if let Some(col_name) = before_op.split_whitespace().last() {
2024                if col_name.chars().all(|c| c.is_alphanumeric() || c == '_') {
2025                    // Check if we're at or near the end of the query
2026                    let after_op_trimmed = after_op.trim();
2027                    if after_op_trimmed.is_empty()
2028                        || (after_op_trimmed
2029                            .chars()
2030                            .all(|c| c.is_alphanumeric() || c == '_')
2031                            && !after_op_trimmed.contains('('))
2032                    {
2033                        let partial = if after_op_trimmed.is_empty() {
2034                            None
2035                        } else {
2036                            Some(after_op_trimmed.to_string())
2037                        };
2038                        return Some((
2039                            CursorContext::AfterComparisonOp(
2040                                col_name.to_string(),
2041                                op.trim().to_string(),
2042                            ),
2043                            partial,
2044                        ));
2045                    }
2046                }
2047            }
2048        }
2049    }
2050    None
2051}
2052
2053fn analyze_statement(
2054    stmt: &SelectStatement,
2055    query: &str,
2056    _cursor_pos: usize,
2057) -> (CursorContext, Option<String>) {
2058    // First check for method call context (e.g., "columnName." or "columnName.Con")
2059    let trimmed = query.trim();
2060
2061    // Check if we're after a comparison operator (e.g., "createdDate > ")
2062    if let Some(result) = check_after_comparison_operator(query) {
2063        return result;
2064    }
2065
2066    // First check if we're after AND/OR - this takes precedence
2067    // Helper function to check if string ends with a logical operator
2068    let ends_with_logical_op = |s: &str| -> bool {
2069        let s_upper = s.to_uppercase();
2070        s_upper.ends_with(" AND") || s_upper.ends_with(" OR")
2071    };
2072
2073    if ends_with_logical_op(trimmed) {
2074        // Don't check for method context if we're clearly after a logical operator
2075    } else {
2076        // Look for the last dot in the query
2077        if let Some(dot_pos) = trimmed.rfind('.') {
2078            // Check if we're after a column name and dot
2079            let before_dot = safe_slice_to(trimmed, dot_pos);
2080            let after_dot_start = dot_pos + 1;
2081            let after_dot = if after_dot_start < trimmed.len() {
2082                &trimmed[after_dot_start..]
2083            } else {
2084                ""
2085            };
2086
2087            // Check if the part after dot looks like an incomplete method call
2088            // (not a complete method call like "Contains(...)")
2089            if !after_dot.contains('(') {
2090                // Try to extract the column name - could be quoted or regular
2091                let col_name = if before_dot.ends_with('"') {
2092                    // Handle quoted identifier - search backwards for matching opening quote
2093                    let bytes = before_dot.as_bytes();
2094                    let pos = before_dot.len() - 1; // Position of closing quote
2095
2096                    find_quote_start(bytes, pos).map(|start| safe_slice_from(before_dot, start))
2097                } else {
2098                    // Regular identifier - get the last word, handling parentheses
2099                    // Strip all leading parentheses
2100                    before_dot
2101                        .split_whitespace()
2102                        .last()
2103                        .map(|word| word.trim_start_matches('('))
2104                };
2105
2106                if let Some(col_name) = col_name {
2107                    // For quoted identifiers, keep the quotes, for regular identifiers check validity
2108                    let is_valid = Parser::is_valid_identifier(col_name);
2109
2110                    if is_valid {
2111                        return handle_method_call_context(col_name, after_dot);
2112                    }
2113                }
2114            }
2115        }
2116    }
2117
2118    // Check if we're in WHERE clause
2119    if let Some(where_clause) = &stmt.where_clause {
2120        // Check if query ends with AND/OR (with or without trailing space/partial)
2121        let trimmed_upper = trimmed.to_uppercase();
2122        if trimmed_upper.ends_with(" AND") || trimmed_upper.ends_with(" OR") {
2123            let op = if trimmed_upper.ends_with(" AND") {
2124                LogicalOp::And
2125            } else {
2126                LogicalOp::Or
2127            };
2128            return (CursorContext::AfterLogicalOp(op), None);
2129        }
2130
2131        // Check if we have AND/OR followed by a partial word
2132        let query_upper = query.to_uppercase();
2133        if let Some(and_pos) = query_upper.rfind(" AND ") {
2134            let after_and = safe_slice_from(query, and_pos + 5);
2135            let partial = extract_partial_at_end(after_and);
2136            if partial.is_some() {
2137                return (CursorContext::AfterLogicalOp(LogicalOp::And), partial);
2138            }
2139        }
2140
2141        if let Some(or_pos) = query_upper.rfind(" OR ") {
2142            let after_or = safe_slice_from(query, or_pos + 4);
2143            let partial = extract_partial_at_end(after_or);
2144            if partial.is_some() {
2145                return (CursorContext::AfterLogicalOp(LogicalOp::Or), partial);
2146            }
2147        }
2148
2149        if let Some(last_condition) = where_clause.conditions.last() {
2150            if let Some(connector) = &last_condition.connector {
2151                // We're after AND/OR
2152                return (
2153                    CursorContext::AfterLogicalOp(connector.clone()),
2154                    extract_partial_at_end(query),
2155                );
2156            }
2157        }
2158        // We're in WHERE clause but not after AND/OR
2159        return (CursorContext::WhereClause, extract_partial_at_end(query));
2160    }
2161
2162    // Check if we're after ORDER BY
2163    let query_upper = query.to_uppercase();
2164    if query_upper.ends_with(" ORDER BY") {
2165        return (CursorContext::OrderByClause, None);
2166    }
2167
2168    // Check other contexts based on what's in the statement
2169    if stmt.order_by.is_some() {
2170        return (CursorContext::OrderByClause, extract_partial_at_end(query));
2171    }
2172
2173    if stmt.from_table.is_some() && stmt.where_clause.is_none() && stmt.order_by.is_none() {
2174        return (CursorContext::FromClause, extract_partial_at_end(query));
2175    }
2176
2177    if !stmt.columns.is_empty() && stmt.from_table.is_none() {
2178        return (CursorContext::SelectClause, extract_partial_at_end(query));
2179    }
2180
2181    (CursorContext::Unknown, None)
2182}
2183
2184/// Helper function to find the last occurrence of a token type in the token stream
2185fn find_last_token(tokens: &[(usize, usize, Token)], target: &Token) -> Option<usize> {
2186    tokens
2187        .iter()
2188        .rposition(|(_, _, t)| t == target)
2189        .map(|idx| tokens[idx].0)
2190}
2191
2192/// Helper function to find the last occurrence of any matching token
2193fn find_last_matching_token<F>(
2194    tokens: &[(usize, usize, Token)],
2195    predicate: F,
2196) -> Option<(usize, &Token)>
2197where
2198    F: Fn(&Token) -> bool,
2199{
2200    tokens
2201        .iter()
2202        .rposition(|(_, _, t)| predicate(t))
2203        .map(|idx| (tokens[idx].0, &tokens[idx].2))
2204}
2205
2206/// Helper function to check if we're in a specific clause based on tokens
2207fn is_in_clause(
2208    tokens: &[(usize, usize, Token)],
2209    clause_token: Token,
2210    exclude_tokens: &[Token],
2211) -> bool {
2212    // Find the last occurrence of the clause token
2213    if let Some(clause_pos) = find_last_token(tokens, &clause_token) {
2214        // Check if any exclude tokens appear after it
2215        for (pos, _, token) in tokens.iter() {
2216            if *pos > clause_pos && exclude_tokens.contains(token) {
2217                return false;
2218            }
2219        }
2220        return true;
2221    }
2222    false
2223}
2224
2225fn analyze_partial(query: &str, cursor_pos: usize) -> (CursorContext, Option<String>) {
2226    // Tokenize the query up to cursor position
2227    let mut lexer = Lexer::new(query);
2228    let tokens = lexer.tokenize_all_with_positions();
2229
2230    let trimmed = query.trim();
2231
2232    #[cfg(test)]
2233    {
2234        if trimmed.contains("\"Last Name\"") {
2235            eprintln!("DEBUG analyze_partial: query='{query}', trimmed='{trimmed}'");
2236        }
2237    }
2238
2239    // Check if we're after a comparison operator (e.g., "createdDate > ")
2240    if let Some(result) = check_after_comparison_operator(query) {
2241        return result;
2242    }
2243
2244    // Look for the last dot in the query (method call context) - check this FIRST
2245    // before AND/OR detection to properly handle cases like "AND (Country."
2246    if let Some(dot_pos) = trimmed.rfind('.') {
2247        #[cfg(test)]
2248        {
2249            if trimmed.contains("\"Last Name\"") {
2250                eprintln!("DEBUG: Found dot at position {dot_pos}");
2251            }
2252        }
2253        // Check if we're after a column name and dot
2254        let before_dot = &trimmed[..dot_pos];
2255        let after_dot = &trimmed[dot_pos + 1..];
2256
2257        // Check if the part after dot looks like an incomplete method call
2258        // (not a complete method call like "Contains(...)")
2259        if !after_dot.contains('(') {
2260            // Try to extract the column name before the dot
2261            // It could be a quoted identifier like "Last Name" or a regular identifier
2262            let col_name = if before_dot.ends_with('"') {
2263                // Handle quoted identifier - search backwards for matching opening quote
2264                let bytes = before_dot.as_bytes();
2265                let pos = before_dot.len() - 1; // Position of closing quote
2266
2267                #[cfg(test)]
2268                {
2269                    if trimmed.contains("\"Last Name\"") {
2270                        eprintln!("DEBUG: before_dot='{before_dot}', looking for opening quote");
2271                    }
2272                }
2273
2274                let found_start = find_quote_start(bytes, pos);
2275
2276                if let Some(start) = found_start {
2277                    // Extract the full quoted identifier including quotes
2278                    let result = safe_slice_from(before_dot, start);
2279                    #[cfg(test)]
2280                    {
2281                        if trimmed.contains("\"Last Name\"") {
2282                            eprintln!("DEBUG: Extracted quoted identifier: '{result}'");
2283                        }
2284                    }
2285                    Some(result)
2286                } else {
2287                    #[cfg(test)]
2288                    {
2289                        if trimmed.contains("\"Last Name\"") {
2290                            eprintln!("DEBUG: No opening quote found!");
2291                        }
2292                    }
2293                    None
2294                }
2295            } else {
2296                // Regular identifier - get the last word, handling parentheses
2297                // Strip all leading parentheses
2298                before_dot
2299                    .split_whitespace()
2300                    .last()
2301                    .map(|word| word.trim_start_matches('('))
2302            };
2303
2304            if let Some(col_name) = col_name {
2305                #[cfg(test)]
2306                {
2307                    if trimmed.contains("\"Last Name\"") {
2308                        eprintln!("DEBUG: col_name = '{col_name}'");
2309                    }
2310                }
2311
2312                // For quoted identifiers, keep the quotes, for regular identifiers check validity
2313                let is_valid = Parser::is_valid_identifier(col_name);
2314
2315                #[cfg(test)]
2316                {
2317                    if trimmed.contains("\"Last Name\"") {
2318                        eprintln!("DEBUG: is_valid = {is_valid}");
2319                    }
2320                }
2321
2322                if is_valid {
2323                    return handle_method_call_context(col_name, after_dot);
2324                }
2325            }
2326        }
2327    }
2328
2329    // Check if we're after AND/OR using tokens - but only after checking for method calls
2330    if let Some((pos, token)) =
2331        find_last_matching_token(&tokens, |t| matches!(t, Token::And | Token::Or))
2332    {
2333        // Check if cursor is after the logical operator
2334        let token_end_pos = if matches!(token, Token::And) {
2335            pos + 3 // "AND" is 3 characters
2336        } else {
2337            pos + 2 // "OR" is 2 characters
2338        };
2339
2340        if cursor_pos > token_end_pos {
2341            // Extract any partial word after the operator
2342            let after_op = safe_slice_from(query, token_end_pos + 1); // +1 for the space
2343            let partial = extract_partial_at_end(after_op);
2344            let op = if matches!(token, Token::And) {
2345                LogicalOp::And
2346            } else {
2347                LogicalOp::Or
2348            };
2349            return (CursorContext::AfterLogicalOp(op), partial);
2350        }
2351    }
2352
2353    // Check if the last token is AND or OR (handles case where it's at the very end)
2354    if let Some((_, _, last_token)) = tokens.last() {
2355        if matches!(last_token, Token::And | Token::Or) {
2356            let op = if matches!(last_token, Token::And) {
2357                LogicalOp::And
2358            } else {
2359                LogicalOp::Or
2360            };
2361            return (CursorContext::AfterLogicalOp(op), None);
2362        }
2363    }
2364
2365    // Check if we're in ORDER BY clause using tokens
2366    if let Some(order_pos) = find_last_token(&tokens, &Token::OrderBy) {
2367        // Check if there's a BY token after ORDER
2368        let has_by = tokens
2369            .iter()
2370            .any(|(pos, _, t)| *pos > order_pos && matches!(t, Token::By));
2371        if has_by
2372            || tokens
2373                .last()
2374                .map_or(false, |(_, _, t)| matches!(t, Token::OrderBy))
2375        {
2376            return (CursorContext::OrderByClause, extract_partial_at_end(query));
2377        }
2378    }
2379
2380    // Check if we're in WHERE clause using tokens
2381    if is_in_clause(&tokens, Token::Where, &[Token::OrderBy, Token::GroupBy]) {
2382        return (CursorContext::WhereClause, extract_partial_at_end(query));
2383    }
2384
2385    // Check if we're in FROM clause using tokens
2386    if is_in_clause(
2387        &tokens,
2388        Token::From,
2389        &[Token::Where, Token::OrderBy, Token::GroupBy],
2390    ) {
2391        return (CursorContext::FromClause, extract_partial_at_end(query));
2392    }
2393
2394    // Check if we're in SELECT clause using tokens
2395    if find_last_token(&tokens, &Token::Select).is_some()
2396        && find_last_token(&tokens, &Token::From).is_none()
2397    {
2398        return (CursorContext::SelectClause, extract_partial_at_end(query));
2399    }
2400
2401    (CursorContext::Unknown, None)
2402}
2403
2404fn extract_partial_at_end(query: &str) -> Option<String> {
2405    let trimmed = query.trim();
2406
2407    // First check if the last word itself starts with a quote (unclosed quoted identifier being typed)
2408    if let Some(last_word) = trimmed.split_whitespace().last() {
2409        if last_word.starts_with('"') && !last_word.ends_with('"') {
2410            // This is an unclosed quoted identifier like "Cust
2411            return Some(last_word.to_string());
2412        }
2413    }
2414
2415    // Regular identifier extraction
2416    let last_word = trimmed.split_whitespace().last()?;
2417
2418    // Check if it's a partial identifier (not a keyword or operator)
2419    // First check if it's alphanumeric (potential identifier)
2420    if last_word.chars().all(|c| c.is_alphanumeric() || c == '_') {
2421        // Use lexer to determine if it's a keyword or identifier
2422        if !is_sql_keyword(last_word) {
2423            Some(last_word.to_string())
2424        } else {
2425            None
2426        }
2427    } else {
2428        None
2429    }
2430}
2431
2432// Implement the ParsePrimary trait for Parser to use the modular expression parsing
2433impl ParsePrimary for Parser {
2434    fn current_token(&self) -> &Token {
2435        &self.current_token
2436    }
2437
2438    fn advance(&mut self) {
2439        self.advance();
2440    }
2441
2442    fn consume(&mut self, expected: Token) -> Result<(), String> {
2443        self.consume(expected)
2444    }
2445
2446    fn parse_case_expression(&mut self) -> Result<SqlExpression, String> {
2447        self.parse_case_expression()
2448    }
2449
2450    fn parse_function_args(&mut self) -> Result<(Vec<SqlExpression>, bool), String> {
2451        self.parse_function_args()
2452    }
2453
2454    fn parse_window_spec(&mut self) -> Result<WindowSpec, String> {
2455        self.parse_window_spec()
2456    }
2457
2458    fn parse_logical_or(&mut self) -> Result<SqlExpression, String> {
2459        self.parse_logical_or()
2460    }
2461
2462    fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
2463        self.parse_comparison()
2464    }
2465
2466    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2467        self.parse_expression_list()
2468    }
2469
2470    fn parse_subquery(&mut self) -> Result<SelectStatement, String> {
2471        // Parse subquery without parenthesis balance validation
2472        if matches!(self.current_token, Token::With) {
2473            self.parse_with_clause_inner()
2474        } else {
2475            self.parse_select_statement_inner()
2476        }
2477    }
2478}
2479
2480// Implement the ExpressionParser trait for Parser to use the modular expression parsing
2481impl ExpressionParser for Parser {
2482    fn current_token(&self) -> &Token {
2483        &self.current_token
2484    }
2485
2486    fn advance(&mut self) {
2487        // Call the main advance method directly to avoid recursion
2488        match &self.current_token {
2489            Token::LeftParen => self.paren_depth += 1,
2490            Token::RightParen => {
2491                self.paren_depth -= 1;
2492            }
2493            _ => {}
2494        }
2495        self.current_token = self.lexer.next_token();
2496    }
2497
2498    fn peek(&self) -> Option<&Token> {
2499        // We can't return a reference to a token from a temporary lexer,
2500        // so we need a different approach. For now, let's use a workaround
2501        // that checks the next token type without consuming it.
2502        // This is a limitation of the current design.
2503        // A proper fix would be to store the peeked token in the Parser struct.
2504        None // TODO: Implement proper lookahead
2505    }
2506
2507    fn is_at_end(&self) -> bool {
2508        matches!(self.current_token, Token::Eof)
2509    }
2510
2511    fn consume(&mut self, expected: Token) -> Result<(), String> {
2512        // Call the main consume method to avoid recursion
2513        if std::mem::discriminant(&self.current_token) == std::mem::discriminant(&expected) {
2514            self.update_paren_depth(&expected)?;
2515            self.current_token = self.lexer.next_token();
2516            Ok(())
2517        } else {
2518            Err(format!(
2519                "Expected {:?}, found {:?}",
2520                expected, self.current_token
2521            ))
2522        }
2523    }
2524
2525    fn parse_identifier(&mut self) -> Result<String, String> {
2526        if let Token::Identifier(id) = &self.current_token {
2527            let id = id.clone();
2528            self.advance();
2529            Ok(id)
2530        } else {
2531            Err(format!(
2532                "Expected identifier, found {:?}",
2533                self.current_token
2534            ))
2535        }
2536    }
2537}
2538
2539// Implement the ParseArithmetic trait for Parser to use the modular arithmetic parsing
2540impl ParseArithmetic for Parser {
2541    fn current_token(&self) -> &Token {
2542        &self.current_token
2543    }
2544
2545    fn advance(&mut self) {
2546        self.advance();
2547    }
2548
2549    fn consume(&mut self, expected: Token) -> Result<(), String> {
2550        self.consume(expected)
2551    }
2552
2553    fn parse_primary(&mut self) -> Result<SqlExpression, String> {
2554        self.parse_primary()
2555    }
2556
2557    fn parse_multiplicative(&mut self) -> Result<SqlExpression, String> {
2558        self.parse_multiplicative()
2559    }
2560
2561    fn parse_method_args(&mut self) -> Result<Vec<SqlExpression>, String> {
2562        self.parse_method_args()
2563    }
2564}
2565
2566// Implement the ParseComparison trait for Parser to use the modular comparison parsing
2567impl ParseComparison for Parser {
2568    fn current_token(&self) -> &Token {
2569        &self.current_token
2570    }
2571
2572    fn advance(&mut self) {
2573        self.advance();
2574    }
2575
2576    fn consume(&mut self, expected: Token) -> Result<(), String> {
2577        self.consume(expected)
2578    }
2579
2580    fn parse_primary(&mut self) -> Result<SqlExpression, String> {
2581        self.parse_primary()
2582    }
2583
2584    fn parse_additive(&mut self) -> Result<SqlExpression, String> {
2585        self.parse_additive()
2586    }
2587
2588    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2589        self.parse_expression_list()
2590    }
2591
2592    fn parse_subquery(&mut self) -> Result<SelectStatement, String> {
2593        // Parse subquery without parenthesis balance validation
2594        if matches!(self.current_token, Token::With) {
2595            self.parse_with_clause_inner()
2596        } else {
2597            self.parse_select_statement_inner()
2598        }
2599    }
2600}
2601
2602// Implement the ParseLogical trait for Parser to use the modular logical parsing
2603impl ParseLogical for Parser {
2604    fn current_token(&self) -> &Token {
2605        &self.current_token
2606    }
2607
2608    fn advance(&mut self) {
2609        self.advance();
2610    }
2611
2612    fn consume(&mut self, expected: Token) -> Result<(), String> {
2613        self.consume(expected)
2614    }
2615
2616    fn parse_logical_and(&mut self) -> Result<SqlExpression, String> {
2617        self.parse_logical_and()
2618    }
2619
2620    fn parse_base_logical_expression(&mut self) -> Result<SqlExpression, String> {
2621        // This is the base for logical AND - it should parse comparison expressions
2622        // to avoid infinite recursion with parse_expression
2623        self.parse_comparison()
2624    }
2625
2626    fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
2627        self.parse_comparison()
2628    }
2629
2630    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2631        self.parse_expression_list()
2632    }
2633}
2634
2635// Implement the ParseCase trait for Parser to use the modular CASE parsing
2636impl ParseCase for Parser {
2637    fn current_token(&self) -> &Token {
2638        &self.current_token
2639    }
2640
2641    fn advance(&mut self) {
2642        self.advance();
2643    }
2644
2645    fn consume(&mut self, expected: Token) -> Result<(), String> {
2646        self.consume(expected)
2647    }
2648
2649    fn parse_expression(&mut self) -> Result<SqlExpression, String> {
2650        self.parse_expression()
2651    }
2652}
2653
2654fn is_sql_keyword(word: &str) -> bool {
2655    // Use the lexer to check if this word produces a keyword token
2656    let mut lexer = Lexer::new(word);
2657    let token = lexer.next_token();
2658
2659    // Check if it's a keyword token (not an identifier)
2660    !matches!(token, Token::Identifier(_) | Token::Eof)
2661}
2662
2663#[cfg(test)]
2664mod tests {
2665    use super::*;
2666
2667    /// Test that Parser::new() defaults to Standard mode (backward compatible)
2668    #[test]
2669    fn test_parser_mode_default_is_standard() {
2670        let sql = "-- Leading comment\nSELECT * FROM users";
2671        let mut parser = Parser::new(sql);
2672        let stmt = parser.parse().unwrap();
2673
2674        // In Standard mode, comments should be empty
2675        assert!(stmt.leading_comments.is_empty());
2676        assert!(stmt.trailing_comment.is_none());
2677    }
2678
2679    /// Test that PreserveComments mode collects leading comments
2680    #[test]
2681    fn test_parser_mode_preserve_leading_comments() {
2682        let sql = "-- Important query\n-- Author: Alice\nSELECT id, name FROM users";
2683        let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2684        let stmt = parser.parse().unwrap();
2685
2686        // Should have 2 leading comments
2687        assert_eq!(stmt.leading_comments.len(), 2);
2688        assert!(stmt.leading_comments[0].is_line_comment);
2689        assert!(stmt.leading_comments[0].text.contains("Important query"));
2690        assert!(stmt.leading_comments[1].text.contains("Author: Alice"));
2691    }
2692
2693    /// Test that PreserveComments mode collects trailing comments
2694    #[test]
2695    fn test_parser_mode_preserve_trailing_comment() {
2696        let sql = "SELECT * FROM users -- Fetch all users";
2697        let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2698        let stmt = parser.parse().unwrap();
2699
2700        // Should have trailing comment
2701        assert!(stmt.trailing_comment.is_some());
2702        let comment = stmt.trailing_comment.unwrap();
2703        assert!(comment.is_line_comment);
2704        assert!(comment.text.contains("Fetch all users"));
2705    }
2706
2707    /// Test that PreserveComments mode handles block comments
2708    #[test]
2709    fn test_parser_mode_preserve_block_comments() {
2710        let sql = "/* Query explanation */\nSELECT * FROM users";
2711        let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2712        let stmt = parser.parse().unwrap();
2713
2714        // Should have leading block comment
2715        assert_eq!(stmt.leading_comments.len(), 1);
2716        assert!(!stmt.leading_comments[0].is_line_comment); // It's a block comment
2717        assert!(stmt.leading_comments[0].text.contains("Query explanation"));
2718    }
2719
2720    /// Test that PreserveComments mode collects both leading and trailing
2721    #[test]
2722    fn test_parser_mode_preserve_both_comments() {
2723        let sql = "-- Leading\nSELECT * FROM users -- Trailing";
2724        let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2725        let stmt = parser.parse().unwrap();
2726
2727        // Should have both
2728        assert_eq!(stmt.leading_comments.len(), 1);
2729        assert!(stmt.leading_comments[0].text.contains("Leading"));
2730        assert!(stmt.trailing_comment.is_some());
2731        assert!(stmt.trailing_comment.unwrap().text.contains("Trailing"));
2732    }
2733
2734    /// Test that Standard mode has zero performance overhead (no comment parsing)
2735    #[test]
2736    fn test_parser_mode_standard_ignores_comments() {
2737        let sql = "-- Comment 1\n/* Comment 2 */\nSELECT * FROM users -- Comment 3";
2738        let mut parser = Parser::with_mode(sql, ParserMode::Standard);
2739        let stmt = parser.parse().unwrap();
2740
2741        // Comments should be completely ignored
2742        assert!(stmt.leading_comments.is_empty());
2743        assert!(stmt.trailing_comment.is_none());
2744
2745        // But query should still parse correctly
2746        assert_eq!(stmt.select_items.len(), 1);
2747        assert_eq!(stmt.from_table, Some("users".to_string()));
2748    }
2749
2750    /// Test backward compatibility - existing code using Parser::new() unchanged
2751    #[test]
2752    fn test_parser_backward_compatibility() {
2753        let sql = "SELECT id, name FROM users WHERE active = true";
2754
2755        // Old way (still works, defaults to Standard mode)
2756        let mut parser1 = Parser::new(sql);
2757        let stmt1 = parser1.parse().unwrap();
2758
2759        // Explicit Standard mode (same behavior)
2760        let mut parser2 = Parser::with_mode(sql, ParserMode::Standard);
2761        let stmt2 = parser2.parse().unwrap();
2762
2763        // Both should produce identical ASTs (comments are empty in both)
2764        assert_eq!(stmt1.select_items.len(), stmt2.select_items.len());
2765        assert_eq!(stmt1.from_table, stmt2.from_table);
2766        assert_eq!(stmt1.where_clause.is_some(), stmt2.where_clause.is_some());
2767        assert!(stmt1.leading_comments.is_empty());
2768        assert!(stmt2.leading_comments.is_empty());
2769    }
2770}