sql_cli/sql/
recursive_parser.rs

1// Keep chrono imports for the parser implementation
2
3// Re-exports for backward compatibility - these serve as both imports and re-exports
4pub use super::parser::ast::{
5    CTEType, Comment, Condition, DataFormat, FrameBound, FrameUnit, HttpMethod, IntoTable,
6    JoinClause, JoinCondition, JoinOperator, JoinType, LogicalOp, OrderByColumn, OrderByItem,
7    SelectItem, SelectStatement, SetOperation, SingleJoinCondition, SortDirection, SqlExpression,
8    TableFunction, TableSource, WebCTESpec, WhenBranch, WhereClause, WindowFrame, WindowSpec, CTE,
9};
10pub use super::parser::legacy::{ParseContext, ParseState, Schema, SqlParser, SqlToken, TableInfo};
11pub use super::parser::lexer::{Lexer, LexerMode, Token};
12pub use super::parser::ParserConfig;
13
14// Re-export formatting functions for backward compatibility
15pub use super::parser::formatter::{format_ast_tree, format_sql_pretty, format_sql_pretty_compact};
16
17// New AST-based formatter
18pub use super::parser::ast_formatter::{format_sql_ast, format_sql_ast_with_config, FormatConfig};
19
20// Import the new expression modules
21use super::parser::expressions::arithmetic::{
22    parse_additive as parse_additive_expr, parse_multiplicative as parse_multiplicative_expr,
23    ParseArithmetic,
24};
25use super::parser::expressions::case::{parse_case_expression as parse_case_expr, ParseCase};
26use super::parser::expressions::comparison::{
27    parse_comparison as parse_comparison_expr, parse_in_operator, ParseComparison,
28};
29use super::parser::expressions::logical::{
30    parse_logical_and as parse_logical_and_expr, parse_logical_or as parse_logical_or_expr,
31    ParseLogical,
32};
33use super::parser::expressions::primary::{
34    parse_primary as parse_primary_expr, ParsePrimary, PrimaryExpressionContext,
35};
36use super::parser::expressions::ExpressionParser;
37
38// Import function registry to check for function existence
39use crate::sql::functions::{FunctionCategory, FunctionRegistry};
40use crate::sql::generators::GeneratorRegistry;
41use std::sync::Arc;
42
43// Import Web CTE parser
44use super::parser::web_cte_parser::WebCteParser;
45
46/// Parser mode - controls whether comments are preserved in AST
47#[derive(Debug, Clone, Copy, PartialEq)]
48pub enum ParserMode {
49    /// Standard parsing - skip comments (current behavior, backward compatible)
50    Standard,
51    /// Preserve comments in AST (opt-in for formatters)
52    PreserveComments,
53}
54
55impl Default for ParserMode {
56    fn default() -> Self {
57        ParserMode::Standard
58    }
59}
60
61pub struct Parser {
62    lexer: Lexer,
63    pub current_token: Token,    // Made public for web_cte_parser access
64    in_method_args: bool,        // Track if we're parsing method arguments
65    columns: Vec<String>,        // Known column names for context-aware parsing
66    paren_depth: i32,            // Track parentheses nesting depth
67    paren_depth_stack: Vec<i32>, // Stack to save/restore paren depth for nested contexts
68    _config: ParserConfig,       // Parser configuration including case sensitivity
69    debug_trace: bool,           // Enable detailed token-by-token trace
70    trace_depth: usize,          // Track recursion depth for indented trace
71    function_registry: Arc<FunctionRegistry>, // Function registry for validation
72    generator_registry: Arc<GeneratorRegistry>, // Generator registry for table functions
73    mode: ParserMode,            // Parser mode for comment preservation
74}
75
76impl Parser {
77    #[must_use]
78    pub fn new(input: &str) -> Self {
79        Self::with_mode(input, ParserMode::default())
80    }
81
82    /// Create a new parser with explicit mode for comment preservation
83    #[must_use]
84    pub fn with_mode(input: &str, mode: ParserMode) -> Self {
85        // Choose lexer mode based on parser mode
86        let lexer_mode = match mode {
87            ParserMode::Standard => LexerMode::SkipComments,
88            ParserMode::PreserveComments => LexerMode::PreserveComments,
89        };
90
91        let mut lexer = Lexer::with_mode(input, lexer_mode);
92        let current_token = lexer.next_token();
93        Self {
94            lexer,
95            current_token,
96            in_method_args: false,
97            columns: Vec::new(),
98            paren_depth: 0,
99            paren_depth_stack: Vec::new(),
100            _config: ParserConfig::default(),
101            debug_trace: false,
102            trace_depth: 0,
103            function_registry: Arc::new(FunctionRegistry::new()),
104            generator_registry: Arc::new(GeneratorRegistry::new()),
105            mode,
106        }
107    }
108
109    #[must_use]
110    pub fn with_config(input: &str, config: ParserConfig) -> Self {
111        let mut lexer = Lexer::new(input);
112        let current_token = lexer.next_token();
113        Self {
114            lexer,
115            current_token,
116            in_method_args: false,
117            columns: Vec::new(),
118            paren_depth: 0,
119            paren_depth_stack: Vec::new(),
120            _config: config,
121            debug_trace: false,
122            trace_depth: 0,
123            function_registry: Arc::new(FunctionRegistry::new()),
124            generator_registry: Arc::new(GeneratorRegistry::new()),
125            mode: ParserMode::default(),
126        }
127    }
128
129    #[must_use]
130    pub fn with_columns(mut self, columns: Vec<String>) -> Self {
131        self.columns = columns;
132        self
133    }
134
135    #[must_use]
136    pub fn with_debug_trace(mut self, enabled: bool) -> Self {
137        self.debug_trace = enabled;
138        self
139    }
140
141    #[must_use]
142    pub fn with_function_registry(mut self, registry: Arc<FunctionRegistry>) -> Self {
143        self.function_registry = registry;
144        self
145    }
146
147    #[must_use]
148    pub fn with_generator_registry(mut self, registry: Arc<GeneratorRegistry>) -> Self {
149        self.generator_registry = registry;
150        self
151    }
152
153    fn trace_enter(&mut self, context: &str) {
154        if self.debug_trace {
155            let indent = "  ".repeat(self.trace_depth);
156            eprintln!("{}→ {} | Token: {:?}", indent, context, self.current_token);
157            self.trace_depth += 1;
158        }
159    }
160
161    fn trace_exit(&mut self, context: &str, result: &Result<impl std::fmt::Debug, String>) {
162        if self.debug_trace {
163            self.trace_depth = self.trace_depth.saturating_sub(1);
164            let indent = "  ".repeat(self.trace_depth);
165            match result {
166                Ok(val) => eprintln!("{}← {} ✓ | Result: {:?}", indent, context, val),
167                Err(e) => eprintln!("{}← {} ✗ | Error: {}", indent, context, e),
168            }
169        }
170    }
171
172    fn trace_token(&self, action: &str) {
173        if self.debug_trace {
174            let indent = "  ".repeat(self.trace_depth);
175            eprintln!("{}  {} | Token: {:?}", indent, action, self.current_token);
176        }
177    }
178
179    #[allow(dead_code)]
180    fn peek_token(&self) -> Option<Token> {
181        // Alternative peek that returns owned token
182        let mut temp_lexer = self.lexer.clone();
183        let next_token = temp_lexer.next_token();
184        if matches!(next_token, Token::Eof) {
185            None
186        } else {
187            Some(next_token)
188        }
189    }
190
191    /// Check if current token is one of the reserved keywords that should stop parsing
192    /// Check if an identifier string is a reserved keyword (for backward compatibility)
193    /// This is used when the lexer hasn't properly tokenized keywords and they come through
194    /// as Token::Identifier instead of their proper token types
195    fn is_identifier_reserved(id: &str) -> bool {
196        let id_upper = id.to_uppercase();
197        matches!(
198            id_upper.as_str(),
199            "ORDER" | "HAVING" | "LIMIT" | "OFFSET" | "UNION" | "INTERSECT" | "EXCEPT"
200        )
201    }
202
203    /// Get comparison operator string representation (for autocomplete context)
204    const COMPARISON_OPERATORS: [&'static str; 6] = [" > ", " < ", " >= ", " <= ", " = ", " != "];
205
206    pub fn consume(&mut self, expected: Token) -> Result<(), String> {
207        self.trace_token(&format!("Consuming expected {:?}", expected));
208        if std::mem::discriminant(&self.current_token) == std::mem::discriminant(&expected) {
209            // Track parentheses depth
210            self.update_paren_depth(&expected)?;
211
212            self.current_token = self.lexer.next_token();
213            Ok(())
214        } else {
215            // Provide better error messages for common cases
216            let error_msg = match (&expected, &self.current_token) {
217                (Token::RightParen, Token::Eof) if self.paren_depth > 0 => {
218                    format!(
219                        "Unclosed parenthesis - missing {} closing parenthes{}",
220                        self.paren_depth,
221                        if self.paren_depth == 1 { "is" } else { "es" }
222                    )
223                }
224                (Token::RightParen, _) if self.paren_depth > 0 => {
225                    format!(
226                        "Expected closing parenthesis but found {:?} (currently {} unclosed parenthes{})",
227                        self.current_token,
228                        self.paren_depth,
229                        if self.paren_depth == 1 { "is" } else { "es" }
230                    )
231                }
232                _ => format!("Expected {:?}, found {:?}", expected, self.current_token),
233            };
234            Err(error_msg)
235        }
236    }
237
238    pub fn advance(&mut self) {
239        // Track parentheses depth when advancing
240        match &self.current_token {
241            Token::LeftParen => self.paren_depth += 1,
242            Token::RightParen => {
243                self.paren_depth -= 1;
244                // Note: We don't check for < 0 here because advance() is used
245                // in contexts where we're not necessarily expecting a right paren
246            }
247            _ => {}
248        }
249        let old_token = self.current_token.clone();
250        self.current_token = self.lexer.next_token();
251        if self.debug_trace {
252            let indent = "  ".repeat(self.trace_depth);
253            eprintln!(
254                "{}  Advanced: {:?} → {:?}",
255                indent, old_token, self.current_token
256            );
257        }
258    }
259
260    /// Collect all leading comments before a SQL construct
261    /// This consumes comment tokens and returns them as a Vec<Comment>
262    fn collect_leading_comments(&mut self) -> Vec<Comment> {
263        let mut comments = Vec::new();
264        loop {
265            match &self.current_token {
266                Token::LineComment(text) => {
267                    comments.push(Comment::line(text.clone()));
268                    self.advance();
269                }
270                Token::BlockComment(text) => {
271                    comments.push(Comment::block(text.clone()));
272                    self.advance();
273                }
274                _ => break,
275            }
276        }
277        comments
278    }
279
280    /// Collect a trailing inline comment (on the same line)
281    /// This consumes a single comment token if present
282    fn collect_trailing_comment(&mut self) -> Option<Comment> {
283        match &self.current_token {
284            Token::LineComment(text) => {
285                let comment = Some(Comment::line(text.clone()));
286                self.advance();
287                comment
288            }
289            Token::BlockComment(text) => {
290                let comment = Some(Comment::block(text.clone()));
291                self.advance();
292                comment
293            }
294            _ => None,
295        }
296    }
297
298    fn push_paren_depth(&mut self) {
299        self.paren_depth_stack.push(self.paren_depth);
300        self.paren_depth = 0;
301    }
302
303    fn pop_paren_depth(&mut self) {
304        if let Some(depth) = self.paren_depth_stack.pop() {
305            // Ignore the internal depth - just restore the saved value
306            self.paren_depth = depth;
307        }
308    }
309
310    pub fn parse(&mut self) -> Result<SelectStatement, String> {
311        self.trace_enter("parse");
312
313        // Collect leading comments FIRST (before checking for WITH or SELECT)
314        // This allows comments before WITH clauses to be preserved
315        let leading_comments = if self.mode == ParserMode::PreserveComments {
316            self.collect_leading_comments()
317        } else {
318            vec![]
319        };
320
321        // Now check for WITH clause (after consuming comments)
322        let result = if matches!(self.current_token, Token::With) {
323            let mut stmt = self.parse_with_clause()?;
324            // Attach the leading comments we collected
325            stmt.leading_comments = leading_comments;
326            stmt
327        } else {
328            // For SELECT without WITH, pass comments to inner parser
329            let stmt = self.parse_select_statement_with_comments_public(leading_comments)?;
330            self.check_balanced_parentheses()?;
331            stmt
332        };
333
334        self.trace_exit("parse", &Ok(&result));
335        Ok(result)
336    }
337
338    /// Public wrapper that accepts pre-collected comments and checks parens
339    fn parse_select_statement_with_comments_public(
340        &mut self,
341        comments: Vec<Comment>,
342    ) -> Result<SelectStatement, String> {
343        self.parse_select_statement_with_comments(comments)
344    }
345
346    fn parse_with_clause(&mut self) -> Result<SelectStatement, String> {
347        self.consume(Token::With)?;
348        let ctes = self.parse_cte_list()?;
349
350        // Parse the main SELECT statement - use inner version since we're already tracking parens
351        let mut main_query = self.parse_select_statement_inner_no_comments()?;
352        main_query.ctes = ctes;
353
354        // Check for balanced parentheses at the end of parsing
355        self.check_balanced_parentheses()?;
356
357        Ok(main_query)
358    }
359
360    fn parse_with_clause_inner(&mut self) -> Result<SelectStatement, String> {
361        self.consume(Token::With)?;
362        let ctes = self.parse_cte_list()?;
363
364        // Parse the main SELECT statement (without parenthesis checking for subqueries)
365        let mut main_query = self.parse_select_statement_inner()?;
366        main_query.ctes = ctes;
367
368        Ok(main_query)
369    }
370
371    // Helper function to parse CTE list - eliminates duplication
372    fn parse_cte_list(&mut self) -> Result<Vec<CTE>, String> {
373        let mut ctes = Vec::new();
374
375        // Parse CTEs
376        loop {
377            // Check for WEB keyword for each CTE (can be different for each one)
378            let is_web = if matches!(&self.current_token, Token::Web) {
379                self.trace_token("Found WEB keyword for CTE");
380                self.advance();
381                true
382            } else {
383                false
384            };
385
386            // Parse CTE name
387            let name = match &self.current_token {
388                Token::Identifier(name) => name.clone(),
389                _ => {
390                    return Err(format!(
391                        "Expected CTE name after {}",
392                        if is_web { "WEB" } else { "WITH or comma" }
393                    ))
394                }
395            };
396            self.advance();
397
398            // Optional column list: WITH t(col1, col2) AS ...
399            let column_list = if matches!(self.current_token, Token::LeftParen) {
400                self.advance();
401                let cols = self.parse_identifier_list()?;
402                self.consume(Token::RightParen)?;
403                Some(cols)
404            } else {
405                None
406            };
407
408            // Expect AS
409            self.consume(Token::As)?;
410
411            let cte_type = if is_web {
412                // Expect opening parenthesis for WEB CTE
413                self.consume(Token::LeftParen)?;
414                // Parse WEB CTE specification using dedicated parser
415                let web_spec = WebCteParser::parse(self)?;
416                // Consume closing parenthesis for WEB CTE
417                self.consume(Token::RightParen)?;
418                CTEType::Web(web_spec)
419            } else {
420                // For standard CTEs, push depth BEFORE consuming opening paren
421                // This ensures the paren is counted in the inner context
422                self.push_paren_depth();
423                // Now consume opening parenthesis
424                self.consume(Token::LeftParen)?;
425                let query = self.parse_select_statement_inner()?;
426                // Expect closing parenthesis while still in CTE context
427                self.consume(Token::RightParen)?;
428                // Now pop to restore outer depth after consuming both parens
429                self.pop_paren_depth();
430                CTEType::Standard(query)
431            };
432
433            ctes.push(CTE {
434                name,
435                column_list,
436                cte_type,
437            });
438
439            // Check for more CTEs
440            if !matches!(self.current_token, Token::Comma) {
441                break;
442            }
443            self.advance();
444        }
445
446        Ok(ctes)
447    }
448
449    /// Helper function to parse an optional table alias (with or without AS keyword)
450    fn parse_optional_alias(&mut self) -> Result<Option<String>, String> {
451        if matches!(self.current_token, Token::As) {
452            self.advance();
453            match &self.current_token {
454                Token::Identifier(name) => {
455                    let alias = name.clone();
456                    self.advance();
457                    Ok(Some(alias))
458                }
459                token => {
460                    // Check if it's a reserved keyword - provide helpful error
461                    if let Some(keyword) = token.as_keyword_str() {
462                        Err(format!(
463                            "Reserved keyword '{}' cannot be used as column alias. Use a different name or quote it with double quotes: \"{}\"",
464                            keyword,
465                            keyword.to_lowercase()
466                        ))
467                    } else {
468                        Err("Expected alias name after AS".to_string())
469                    }
470                }
471            }
472        } else if let Token::Identifier(name) = &self.current_token {
473            // AS is optional for table aliases
474            let alias = name.clone();
475            self.advance();
476            Ok(Some(alias))
477        } else {
478            Ok(None)
479        }
480    }
481
482    /// Helper function to check if an identifier is valid (quoted or regular)
483    fn is_valid_identifier(name: &str) -> bool {
484        if name.starts_with('"') && name.ends_with('"') {
485            // Quoted identifier - always valid
486            true
487        } else {
488            // Regular identifier - check if it's alphanumeric or underscore
489            name.chars().all(|c| c.is_alphanumeric() || c == '_')
490        }
491    }
492
493    /// Helper function to update parentheses depth tracking
494    fn update_paren_depth(&mut self, token: &Token) -> Result<(), String> {
495        match token {
496            Token::LeftParen => self.paren_depth += 1,
497            Token::RightParen => {
498                self.paren_depth -= 1;
499                // Check for extra closing parenthesis
500                if self.paren_depth < 0 {
501                    return Err(
502                        "Unexpected closing parenthesis - no matching opening parenthesis"
503                            .to_string(),
504                    );
505                }
506            }
507            _ => {}
508        }
509        Ok(())
510    }
511
512    /// Helper function to parse comma-separated argument list
513    fn parse_argument_list(&mut self) -> Result<Vec<SqlExpression>, String> {
514        let mut args = Vec::new();
515
516        if !matches!(self.current_token, Token::RightParen) {
517            loop {
518                args.push(self.parse_expression()?);
519
520                if matches!(self.current_token, Token::Comma) {
521                    self.advance();
522                } else {
523                    break;
524                }
525            }
526        }
527
528        Ok(args)
529    }
530
531    /// Helper function to check for balanced parentheses at the end of parsing
532    fn check_balanced_parentheses(&self) -> Result<(), String> {
533        if self.paren_depth > 0 {
534            Err(format!(
535                "Unclosed parenthesis - missing {} closing parenthes{}",
536                self.paren_depth,
537                if self.paren_depth == 1 { "is" } else { "es" }
538            ))
539        } else if self.paren_depth < 0 {
540            Err("Extra closing parenthesis found - no matching opening parenthesis".to_string())
541        } else {
542            Ok(())
543        }
544    }
545
546    /// Check if an expression contains aggregate functions (COUNT, SUM, AVG, etc.)
547    /// This is used to detect unsupported patterns in HAVING clause
548    fn contains_aggregate_function(expr: &SqlExpression) -> bool {
549        match expr {
550            SqlExpression::FunctionCall { name, args, .. } => {
551                // Check if this is an aggregate function
552                let upper_name = name.to_uppercase();
553                let is_aggregate = matches!(
554                    upper_name.as_str(),
555                    "COUNT" | "SUM" | "AVG" | "MIN" | "MAX" | "GROUP_CONCAT" | "STRING_AGG"
556                );
557
558                // If this is an aggregate, return true
559                // Otherwise, recursively check arguments
560                is_aggregate || args.iter().any(Self::contains_aggregate_function)
561            }
562            // Recursively check nested expressions
563            SqlExpression::BinaryOp { left, right, .. } => {
564                Self::contains_aggregate_function(left) || Self::contains_aggregate_function(right)
565            }
566            SqlExpression::Not { expr } => Self::contains_aggregate_function(expr),
567            SqlExpression::MethodCall { args, .. } => {
568                args.iter().any(Self::contains_aggregate_function)
569            }
570            SqlExpression::ChainedMethodCall { base, args, .. } => {
571                Self::contains_aggregate_function(base)
572                    || args.iter().any(Self::contains_aggregate_function)
573            }
574            SqlExpression::CaseExpression {
575                when_branches,
576                else_branch,
577            } => {
578                when_branches.iter().any(|branch| {
579                    Self::contains_aggregate_function(&branch.condition)
580                        || Self::contains_aggregate_function(&branch.result)
581                }) || else_branch
582                    .as_ref()
583                    .map_or(false, |e| Self::contains_aggregate_function(e))
584            }
585            SqlExpression::SimpleCaseExpression {
586                expr,
587                when_branches,
588                else_branch,
589            } => {
590                Self::contains_aggregate_function(expr)
591                    || when_branches.iter().any(|branch| {
592                        Self::contains_aggregate_function(&branch.value)
593                            || Self::contains_aggregate_function(&branch.result)
594                    })
595                    || else_branch
596                        .as_ref()
597                        .map_or(false, |e| Self::contains_aggregate_function(e))
598            }
599            SqlExpression::ScalarSubquery { query } => {
600                // Subqueries can have their own aggregates, but that's fine
601                // We're only checking the outer HAVING clause
602                query
603                    .having
604                    .as_ref()
605                    .map_or(false, |h| Self::contains_aggregate_function(h))
606            }
607            // Leaf nodes - no aggregates
608            SqlExpression::Column(_)
609            | SqlExpression::StringLiteral(_)
610            | SqlExpression::NumberLiteral(_)
611            | SqlExpression::BooleanLiteral(_)
612            | SqlExpression::Null
613            | SqlExpression::DateTimeConstructor { .. }
614            | SqlExpression::DateTimeToday { .. } => false,
615
616            // Window functions contain aggregates by definition
617            SqlExpression::WindowFunction { .. } => true,
618
619            // Between has three parts to check
620            SqlExpression::Between { expr, lower, upper } => {
621                Self::contains_aggregate_function(expr)
622                    || Self::contains_aggregate_function(lower)
623                    || Self::contains_aggregate_function(upper)
624            }
625
626            // IN list - check expr and all values
627            SqlExpression::InList { expr, values } | SqlExpression::NotInList { expr, values } => {
628                Self::contains_aggregate_function(expr)
629                    || values.iter().any(Self::contains_aggregate_function)
630            }
631
632            // IN subquery - check expr and subquery
633            SqlExpression::InSubquery { expr, subquery }
634            | SqlExpression::NotInSubquery { expr, subquery } => {
635                Self::contains_aggregate_function(expr)
636                    || subquery
637                        .having
638                        .as_ref()
639                        .map_or(false, |h| Self::contains_aggregate_function(h))
640            }
641
642            // UNNEST - check column expression
643            SqlExpression::Unnest { column, .. } => Self::contains_aggregate_function(column),
644        }
645    }
646
647    fn parse_select_statement(&mut self) -> Result<SelectStatement, String> {
648        self.trace_enter("parse_select_statement");
649        let result = self.parse_select_statement_inner()?;
650
651        // Check for balanced parentheses at the end of parsing
652        self.check_balanced_parentheses()?;
653
654        Ok(result)
655    }
656
657    fn parse_select_statement_inner(&mut self) -> Result<SelectStatement, String> {
658        // Collect leading comments ONLY in PreserveComments mode
659        let leading_comments = if self.mode == ParserMode::PreserveComments {
660            self.collect_leading_comments()
661        } else {
662            vec![]
663        };
664
665        self.parse_select_statement_with_comments(leading_comments)
666    }
667
668    /// Parse SELECT statement without collecting leading comments
669    /// Used when comments were already collected (e.g., before WITH clause)
670    fn parse_select_statement_inner_no_comments(&mut self) -> Result<SelectStatement, String> {
671        self.parse_select_statement_with_comments(vec![])
672    }
673
674    /// Core SELECT parsing logic - takes pre-collected comments
675    fn parse_select_statement_with_comments(
676        &mut self,
677        leading_comments: Vec<Comment>,
678    ) -> Result<SelectStatement, String> {
679        self.consume(Token::Select)?;
680
681        // Check for DISTINCT keyword
682        let distinct = if matches!(self.current_token, Token::Distinct) {
683            self.advance();
684            true
685        } else {
686            false
687        };
688
689        // Parse SELECT items (supports computed expressions)
690        let select_items = self.parse_select_items()?;
691
692        // Create legacy columns vector for backward compatibility
693        let columns = select_items
694            .iter()
695            .map(|item| match item {
696                SelectItem::Star { .. } => "*".to_string(),
697                SelectItem::Column {
698                    column: col_ref, ..
699                } => col_ref.name.clone(),
700                SelectItem::Expression { alias, .. } => alias.clone(),
701            })
702            .collect();
703
704        // Parse INTO clause (for temporary tables) - comes immediately after SELECT items
705        let into_table = if matches!(self.current_token, Token::Into) {
706            self.advance();
707            Some(self.parse_into_clause()?)
708        } else {
709            None
710        };
711
712        // Parse FROM clause - can be a table name, subquery, or table function
713        let (from_table, from_subquery, from_function, from_alias) = if matches!(
714            self.current_token,
715            Token::From
716        ) {
717            self.advance();
718
719            // Check for table function like RANGE()
720            if let Token::Identifier(name) = &self.current_token.clone() {
721                // Check if this is a table function by consulting the registry
722                // We need to lookahead to see if there's a parenthesis to distinguish
723                // between a function call and a table with the same name
724                let has_paren = self.peek_token() == Some(Token::LeftParen);
725                if self.debug_trace {
726                    eprintln!(
727                        "  Checking {} for table function, has_paren={}",
728                        name, has_paren
729                    );
730                }
731
732                // Check if it's a known table function or generator
733                // In FROM clause context, prioritize generators over scalar functions
734                let is_table_function = if has_paren {
735                    // First check generator registry (for FROM clause context)
736                    if self.debug_trace {
737                        eprintln!("  Checking generator registry for {}", name.to_uppercase());
738                    }
739                    if let Some(_gen) = self.generator_registry.get(&name.to_uppercase()) {
740                        if self.debug_trace {
741                            eprintln!("  Found {} in generator registry", name);
742                        }
743                        self.trace_token(&format!("Found generator: {}", name));
744                        true
745                    } else {
746                        // Then check if it's a table function in the function registry
747                        if let Some(func) = self.function_registry.get(&name.to_uppercase()) {
748                            let sig = func.signature();
749                            let is_table_fn = sig.category == FunctionCategory::TableFunction;
750                            if self.debug_trace {
751                                eprintln!(
752                                    "  Found {} in function registry, is_table_function={}",
753                                    name, is_table_fn
754                                );
755                            }
756                            if is_table_fn {
757                                self.trace_token(&format!(
758                                    "Found table function in function registry: {}",
759                                    name
760                                ));
761                            }
762                            is_table_fn
763                        } else {
764                            if self.debug_trace {
765                                eprintln!("  {} not found in either registry", name);
766                                self.trace_token(&format!(
767                                    "Not found as generator or table function: {}",
768                                    name
769                                ));
770                            }
771                            false
772                        }
773                    }
774                } else {
775                    if self.debug_trace {
776                        eprintln!("  No parenthesis after {}, treating as table", name);
777                    }
778                    false
779                };
780
781                if is_table_function {
782                    // Parse table function
783                    let function_name = name.clone();
784                    self.advance(); // Skip function name
785
786                    // Parse arguments
787                    self.consume(Token::LeftParen)?;
788                    let args = self.parse_argument_list()?;
789                    self.consume(Token::RightParen)?;
790
791                    // Optional alias
792                    let alias = if matches!(self.current_token, Token::As) {
793                        self.advance();
794                        match &self.current_token {
795                            Token::Identifier(name) => {
796                                let alias = name.clone();
797                                self.advance();
798                                Some(alias)
799                            }
800                            token => {
801                                if let Some(keyword) = token.as_keyword_str() {
802                                    return Err(format!(
803                                            "Reserved keyword '{}' cannot be used as column alias. Use a different name or quote it with double quotes: \"{}\"",
804                                            keyword,
805                                            keyword.to_lowercase()
806                                        ));
807                                } else {
808                                    return Err("Expected alias name after AS".to_string());
809                                }
810                            }
811                        }
812                    } else if let Token::Identifier(name) = &self.current_token {
813                        let alias = name.clone();
814                        self.advance();
815                        Some(alias)
816                    } else {
817                        None
818                    };
819
820                    (
821                        None,
822                        None,
823                        Some(TableFunction::Generator {
824                            name: function_name,
825                            args,
826                        }),
827                        alias,
828                    )
829                } else {
830                    // Not a RANGE, SPLIT, or generator function, so it's a regular table name
831                    let table_name = name.clone();
832                    self.advance();
833
834                    // Check for optional alias
835                    let alias = self.parse_optional_alias()?;
836
837                    (Some(table_name), None, None, alias)
838                }
839            } else if matches!(self.current_token, Token::LeftParen) {
840                // Check for subquery: FROM (SELECT ...) or FROM (WITH ... SELECT ...)
841                self.advance();
842
843                // Parse the subquery - it might start with WITH
844                let subquery = if matches!(self.current_token, Token::With) {
845                    self.parse_with_clause_inner()?
846                } else {
847                    self.parse_select_statement_inner()?
848                };
849
850                self.consume(Token::RightParen)?;
851
852                // Subqueries must have an alias
853                let alias = if matches!(self.current_token, Token::As) {
854                    self.advance();
855                    match &self.current_token {
856                        Token::Identifier(name) => {
857                            let alias = name.clone();
858                            self.advance();
859                            alias
860                        }
861                        token => {
862                            if let Some(keyword) = token.as_keyword_str() {
863                                return Err(format!(
864                                        "Reserved keyword '{}' cannot be used as subquery alias. Use a different name or quote it with double quotes: \"{}\"",
865                                        keyword,
866                                        keyword.to_lowercase()
867                                    ));
868                            } else {
869                                return Err("Expected alias name after AS".to_string());
870                            }
871                        }
872                    }
873                } else {
874                    // AS is optional, but alias is required
875                    match &self.current_token {
876                        Token::Identifier(name) => {
877                            let alias = name.clone();
878                            self.advance();
879                            alias
880                        }
881                        _ => {
882                            return Err(
883                                "Subquery in FROM must have an alias (e.g., AS t)".to_string()
884                            )
885                        }
886                    }
887                };
888
889                (None, Some(Box::new(subquery)), None, Some(alias))
890            } else {
891                // Regular table name
892                match &self.current_token {
893                    Token::Identifier(table) => {
894                        let table_name = table.clone();
895                        self.advance();
896
897                        // Check for optional alias
898                        let alias = self.parse_optional_alias()?;
899
900                        (Some(table_name), None, None, alias)
901                    }
902                    Token::QuotedIdentifier(table) => {
903                        // Handle quoted table names
904                        let table_name = table.clone();
905                        self.advance();
906
907                        // Check for optional alias
908                        let alias = self.parse_optional_alias()?;
909
910                        (Some(table_name), None, None, alias)
911                    }
912                    _ => return Err("Expected table name or subquery after FROM".to_string()),
913                }
914            }
915        } else {
916            (None, None, None, None)
917        };
918
919        // Parse JOIN clauses
920        let mut joins = Vec::new();
921        while self.is_join_token() {
922            joins.push(self.parse_join_clause()?);
923        }
924
925        let where_clause = if matches!(self.current_token, Token::Where) {
926            self.advance();
927            Some(self.parse_where_clause()?)
928        } else {
929            None
930        };
931
932        let group_by = if matches!(self.current_token, Token::GroupBy) {
933            self.advance();
934            // Parse expressions instead of just identifiers for GROUP BY
935            // This allows GROUP BY TIME_BUCKET(...), CASE ..., etc.
936            Some(self.parse_expression_list()?)
937        } else {
938            None
939        };
940
941        // Parse HAVING clause (must come after GROUP BY)
942        let having = if matches!(self.current_token, Token::Having) {
943            if group_by.is_none() {
944                return Err("HAVING clause requires GROUP BY".to_string());
945            }
946            self.advance();
947            let having_expr = self.parse_expression()?;
948
949            // Note: Aggregate functions in HAVING are now supported via the
950            // HavingAliasTransformer preprocessing step, which automatically
951            // adds aliases and rewrites the HAVING clause to use them.
952
953            Some(having_expr)
954        } else {
955            None
956        };
957
958        // Parse QUALIFY clause (Snowflake-style window function filtering)
959        // QUALIFY filters on window function results without needing a subquery
960        // Example: SELECT *, ROW_NUMBER() OVER (...) AS rn FROM t QUALIFY rn <= 3
961        let qualify = if matches!(self.current_token, Token::Qualify) {
962            self.advance();
963            let qualify_expr = self.parse_expression()?;
964
965            // Note: QUALIFY is handled by the QualifyToWhereTransformer preprocessing step
966            // which converts it to WHERE after window functions are lifted to CTEs
967
968            Some(qualify_expr)
969        } else {
970            None
971        };
972
973        // Parse ORDER BY clause (comes after GROUP BY, HAVING, and QUALIFY)
974        let order_by = if matches!(self.current_token, Token::OrderBy) {
975            self.trace_token("Found OrderBy token");
976            self.advance();
977            Some(self.parse_order_by_list()?)
978        } else if let Token::Identifier(s) = &self.current_token {
979            // This shouldn't happen if the lexer properly tokenizes ORDER BY
980            // But keeping as fallback for compatibility
981            if Self::is_identifier_reserved(s) && s.to_uppercase() == "ORDER" {
982                self.trace_token("Warning: ORDER as identifier instead of OrderBy token");
983                self.advance(); // consume ORDER
984                if matches!(&self.current_token, Token::By) {
985                    self.advance(); // consume BY
986                    Some(self.parse_order_by_list()?)
987                } else {
988                    return Err("Expected BY after ORDER".to_string());
989                }
990            } else {
991                None
992            }
993        } else {
994            None
995        };
996
997        // Parse LIMIT clause
998        let limit = if matches!(self.current_token, Token::Limit) {
999            self.advance();
1000            match &self.current_token {
1001                Token::NumberLiteral(num) => {
1002                    let limit_val = num
1003                        .parse::<usize>()
1004                        .map_err(|_| format!("Invalid LIMIT value: {num}"))?;
1005                    self.advance();
1006                    Some(limit_val)
1007                }
1008                _ => return Err("Expected number after LIMIT".to_string()),
1009            }
1010        } else {
1011            None
1012        };
1013
1014        // Parse OFFSET clause
1015        let offset = if matches!(self.current_token, Token::Offset) {
1016            self.advance();
1017            match &self.current_token {
1018                Token::NumberLiteral(num) => {
1019                    let offset_val = num
1020                        .parse::<usize>()
1021                        .map_err(|_| format!("Invalid OFFSET value: {num}"))?;
1022                    self.advance();
1023                    Some(offset_val)
1024                }
1025                _ => return Err("Expected number after OFFSET".to_string()),
1026            }
1027        } else {
1028            None
1029        };
1030
1031        // Parse INTO clause (alternative position - SQL Server also supports INTO after all clauses)
1032        // This handles: SELECT * FROM table WHERE x > 5 INTO #temp
1033        // If INTO was already parsed after SELECT, this will be None (can't have two INTOs)
1034        let into_table = if into_table.is_none() && matches!(self.current_token, Token::Into) {
1035            self.advance();
1036            Some(self.parse_into_clause()?)
1037        } else {
1038            into_table // Keep the one from after SELECT if it exists
1039        };
1040
1041        // Parse UNION/INTERSECT/EXCEPT operations
1042        let set_operations = self.parse_set_operations()?;
1043
1044        // Collect trailing comment ONLY in PreserveComments mode
1045        let trailing_comment = if self.mode == ParserMode::PreserveComments {
1046            self.collect_trailing_comment()
1047        } else {
1048            None
1049        };
1050
1051        Ok(SelectStatement {
1052            distinct,
1053            columns,
1054            select_items,
1055            from_table,
1056            from_subquery,
1057            from_function,
1058            from_alias,
1059            joins,
1060            where_clause,
1061            order_by,
1062            group_by,
1063            having,
1064            qualify,
1065            limit,
1066            offset,
1067            ctes: Vec::new(), // Will be populated by WITH clause parser
1068            into_table,
1069            set_operations,
1070            leading_comments,
1071            trailing_comment,
1072        })
1073    }
1074
1075    /// Parse UNION/INTERSECT/EXCEPT operations
1076    /// Returns a vector of (operation, select_statement) pairs
1077    fn parse_set_operations(
1078        &mut self,
1079    ) -> Result<Vec<(SetOperation, Box<SelectStatement>)>, String> {
1080        let mut operations = Vec::new();
1081
1082        while matches!(
1083            self.current_token,
1084            Token::Union | Token::Intersect | Token::Except
1085        ) {
1086            // Determine the operation type
1087            let operation = match &self.current_token {
1088                Token::Union => {
1089                    self.advance();
1090                    // Check for ALL keyword
1091                    if let Token::Identifier(id) = &self.current_token {
1092                        if id.to_uppercase() == "ALL" {
1093                            self.advance();
1094                            SetOperation::UnionAll
1095                        } else {
1096                            SetOperation::Union
1097                        }
1098                    } else {
1099                        SetOperation::Union
1100                    }
1101                }
1102                Token::Intersect => {
1103                    self.advance();
1104                    SetOperation::Intersect
1105                }
1106                Token::Except => {
1107                    self.advance();
1108                    SetOperation::Except
1109                }
1110                _ => unreachable!(),
1111            };
1112
1113            // Parse the next SELECT statement
1114            let next_select = self.parse_select_statement_inner()?;
1115
1116            operations.push((operation, Box::new(next_select)));
1117        }
1118
1119        Ok(operations)
1120    }
1121
1122    /// Parse SELECT items that support computed expressions with aliases
1123    fn parse_select_items(&mut self) -> Result<Vec<SelectItem>, String> {
1124        let mut items = Vec::new();
1125
1126        loop {
1127            // Check for qualified star (table.*) or unqualified star (*)
1128            // First check if we have identifier.* pattern
1129            if let Token::Identifier(name) = &self.current_token.clone() {
1130                // Peek ahead to check for .* pattern
1131                let saved_pos = self.lexer.clone();
1132                let saved_token = self.current_token.clone();
1133                let table_name = name.clone();
1134
1135                self.advance();
1136
1137                if matches!(self.current_token, Token::Dot) {
1138                    self.advance();
1139                    if matches!(self.current_token, Token::Star) {
1140                        // This is table.* pattern
1141                        items.push(SelectItem::Star {
1142                            table_prefix: Some(table_name),
1143                            leading_comments: vec![],
1144                            trailing_comment: None,
1145                        });
1146                        self.advance();
1147
1148                        // Continue to next item or end
1149                        if matches!(self.current_token, Token::Comma) {
1150                            self.advance();
1151                            continue;
1152                        } else {
1153                            break;
1154                        }
1155                    }
1156                }
1157
1158                // Not table.*, restore position and continue with normal parsing
1159                self.lexer = saved_pos;
1160                self.current_token = saved_token;
1161            }
1162
1163            // Check for unqualified *
1164            if matches!(self.current_token, Token::Star) {
1165                items.push(SelectItem::Star {
1166                    table_prefix: None,
1167                    leading_comments: vec![],
1168                    trailing_comment: None,
1169                });
1170                self.advance();
1171            } else {
1172                // Parse expression or column
1173                let expr = self.parse_comparison()?; // Use comparison to support IS NULL and other comparisons
1174
1175                // Check for AS alias
1176                let alias = if matches!(self.current_token, Token::As) {
1177                    self.advance();
1178                    match &self.current_token {
1179                        Token::Identifier(alias_name) => {
1180                            let alias = alias_name.clone();
1181                            self.advance();
1182                            alias
1183                        }
1184                        Token::QuotedIdentifier(alias_name) => {
1185                            let alias = alias_name.clone();
1186                            self.advance();
1187                            alias
1188                        }
1189                        token => {
1190                            if let Some(keyword) = token.as_keyword_str() {
1191                                return Err(format!(
1192                                    "Reserved keyword '{}' cannot be used as column alias. Use a different name or quote it with double quotes: \"{}\"",
1193                                    keyword,
1194                                    keyword.to_lowercase()
1195                                ));
1196                            } else {
1197                                return Err("Expected alias name after AS".to_string());
1198                            }
1199                        }
1200                    }
1201                } else {
1202                    // Generate default alias based on expression
1203                    match &expr {
1204                        SqlExpression::Column(col_ref) => col_ref.name.clone(),
1205                        _ => format!("expr_{}", items.len() + 1), // Default alias for computed expressions
1206                    }
1207                };
1208
1209                // Create SelectItem based on expression type
1210                let item = match expr {
1211                    SqlExpression::Column(col_ref) if alias == col_ref.name => {
1212                        // Simple column reference without alias
1213                        SelectItem::Column {
1214                            column: col_ref,
1215                            leading_comments: vec![],
1216                            trailing_comment: None,
1217                        }
1218                    }
1219                    _ => {
1220                        // Computed expression or column with different alias
1221                        SelectItem::Expression {
1222                            expr,
1223                            alias,
1224                            leading_comments: vec![],
1225                            trailing_comment: None,
1226                        }
1227                    }
1228                };
1229
1230                items.push(item);
1231            }
1232
1233            // Check for comma to continue
1234            if matches!(self.current_token, Token::Comma) {
1235                self.advance();
1236            } else {
1237                break;
1238            }
1239        }
1240
1241        Ok(items)
1242    }
1243
1244    fn parse_identifier_list(&mut self) -> Result<Vec<String>, String> {
1245        let mut identifiers = Vec::new();
1246
1247        loop {
1248            match &self.current_token {
1249                Token::Identifier(id) => {
1250                    // Check if this is a reserved keyword that should stop identifier parsing
1251                    if Self::is_identifier_reserved(id) {
1252                        // Stop parsing identifiers if we hit a reserved keyword
1253                        break;
1254                    }
1255                    identifiers.push(id.clone());
1256                    self.advance();
1257                }
1258                Token::QuotedIdentifier(id) => {
1259                    // Handle quoted identifiers like "Customer Id"
1260                    identifiers.push(id.clone());
1261                    self.advance();
1262                }
1263                _ => {
1264                    // Stop parsing if we hit any other token type
1265                    break;
1266                }
1267            }
1268
1269            if matches!(self.current_token, Token::Comma) {
1270                self.advance();
1271            } else {
1272                break;
1273            }
1274        }
1275
1276        if identifiers.is_empty() {
1277            return Err("Expected at least one identifier".to_string());
1278        }
1279
1280        Ok(identifiers)
1281    }
1282
1283    fn parse_window_spec(&mut self) -> Result<WindowSpec, String> {
1284        let mut partition_by = Vec::new();
1285        let mut order_by = Vec::new();
1286
1287        // Check for PARTITION BY
1288        if matches!(self.current_token, Token::Partition) {
1289            self.advance(); // consume PARTITION
1290            if !matches!(self.current_token, Token::By) {
1291                return Err("Expected BY after PARTITION".to_string());
1292            }
1293            self.advance(); // consume BY
1294
1295            // Parse partition columns
1296            partition_by = self.parse_identifier_list()?;
1297        }
1298
1299        // Check for ORDER BY
1300        if matches!(self.current_token, Token::OrderBy) {
1301            self.advance(); // consume ORDER BY (as single token)
1302            order_by = self.parse_order_by_list()?;
1303        } else if let Token::Identifier(s) = &self.current_token {
1304            if Self::is_identifier_reserved(s) && s.to_uppercase() == "ORDER" {
1305                // Handle ORDER BY as two tokens
1306                self.advance(); // consume ORDER
1307                if !matches!(self.current_token, Token::By) {
1308                    return Err("Expected BY after ORDER".to_string());
1309                }
1310                self.advance(); // consume BY
1311                order_by = self.parse_order_by_list()?;
1312            }
1313        }
1314
1315        // Parse optional window frame (ROWS/RANGE BETWEEN ... AND ...)
1316        let frame = self.parse_window_frame()?;
1317
1318        Ok(WindowSpec {
1319            partition_by,
1320            order_by,
1321            frame,
1322        })
1323    }
1324
1325    fn parse_order_by_list(&mut self) -> Result<Vec<OrderByItem>, String> {
1326        let mut order_items = Vec::new();
1327
1328        loop {
1329            // Parse ANY expression (not just column names)
1330            // This supports:
1331            // - Simple columns: region
1332            // - Qualified columns: table.column
1333            // - Aggregate functions: SUM(sales_amount)
1334            // - Arithmetic: sales_amount * 1.1
1335            // - CASE expressions: CASE WHEN ... END
1336            let expr = self.parse_expression()?;
1337
1338            // Check for ASC/DESC
1339            let direction = match &self.current_token {
1340                Token::Asc => {
1341                    self.advance();
1342                    SortDirection::Asc
1343                }
1344                Token::Desc => {
1345                    self.advance();
1346                    SortDirection::Desc
1347                }
1348                _ => SortDirection::Asc, // Default to ASC if not specified
1349            };
1350
1351            order_items.push(OrderByItem { expr, direction });
1352
1353            if matches!(self.current_token, Token::Comma) {
1354                self.advance();
1355            } else {
1356                break;
1357            }
1358        }
1359
1360        Ok(order_items)
1361    }
1362
1363    /// Parse INTO clause for temporary tables
1364    /// Syntax: INTO #table_name
1365    fn parse_into_clause(&mut self) -> Result<IntoTable, String> {
1366        // Expect an identifier starting with #
1367        let name = match &self.current_token {
1368            Token::Identifier(id) if id.starts_with('#') => {
1369                let table_name = id.clone();
1370                self.advance();
1371                table_name
1372            }
1373            Token::Identifier(id) => {
1374                return Err(format!(
1375                    "Temporary table name must start with #, got: {}",
1376                    id
1377                ));
1378            }
1379            _ => {
1380                return Err(
1381                    "Expected temporary table name (starting with #) after INTO".to_string()
1382                );
1383            }
1384        };
1385
1386        Ok(IntoTable { name })
1387    }
1388
1389    fn parse_window_frame(&mut self) -> Result<Option<WindowFrame>, String> {
1390        // Check for ROWS or RANGE keyword
1391        let unit = match &self.current_token {
1392            Token::Rows => {
1393                self.advance();
1394                FrameUnit::Rows
1395            }
1396            Token::Identifier(id) if id.to_uppercase() == "RANGE" => {
1397                // RANGE as window frame unit
1398                self.advance();
1399                FrameUnit::Range
1400            }
1401            _ => return Ok(None), // No window frame specified
1402        };
1403
1404        // Check for BETWEEN or just a single bound
1405        let (start, end) = if let Token::Between = &self.current_token {
1406            self.advance(); // consume BETWEEN
1407                            // Parse start bound
1408            let start = self.parse_frame_bound()?;
1409
1410            // Expect AND
1411            if !matches!(&self.current_token, Token::And) {
1412                return Err("Expected AND after window frame start bound".to_string());
1413            }
1414            self.advance();
1415
1416            // Parse end bound
1417            let end = self.parse_frame_bound()?;
1418            (start, Some(end))
1419        } else {
1420            // Single bound (e.g., "ROWS 5 PRECEDING")
1421            let bound = self.parse_frame_bound()?;
1422            (bound, None)
1423        };
1424
1425        Ok(Some(WindowFrame { unit, start, end }))
1426    }
1427
1428    fn parse_frame_bound(&mut self) -> Result<FrameBound, String> {
1429        match &self.current_token {
1430            Token::Unbounded => {
1431                self.advance();
1432                match &self.current_token {
1433                    Token::Preceding => {
1434                        self.advance();
1435                        Ok(FrameBound::UnboundedPreceding)
1436                    }
1437                    Token::Following => {
1438                        self.advance();
1439                        Ok(FrameBound::UnboundedFollowing)
1440                    }
1441                    _ => Err("Expected PRECEDING or FOLLOWING after UNBOUNDED".to_string()),
1442                }
1443            }
1444            Token::Current => {
1445                self.advance();
1446                if matches!(&self.current_token, Token::Row) {
1447                    self.advance();
1448                    return Ok(FrameBound::CurrentRow);
1449                }
1450                Err("Expected ROW after CURRENT".to_string())
1451            }
1452            Token::NumberLiteral(num) => {
1453                let n: i64 = num
1454                    .parse()
1455                    .map_err(|_| "Invalid number in window frame".to_string())?;
1456                self.advance();
1457                match &self.current_token {
1458                    Token::Preceding => {
1459                        self.advance();
1460                        Ok(FrameBound::Preceding(n))
1461                    }
1462                    Token::Following => {
1463                        self.advance();
1464                        Ok(FrameBound::Following(n))
1465                    }
1466                    _ => Err("Expected PRECEDING or FOLLOWING after number".to_string()),
1467                }
1468            }
1469            _ => Err("Invalid window frame bound".to_string()),
1470        }
1471    }
1472
1473    fn parse_where_clause(&mut self) -> Result<WhereClause, String> {
1474        // Parse the entire WHERE clause as a single expression tree
1475        // The logical operators (AND/OR) are now handled within parse_expression
1476        let expr = self.parse_expression()?;
1477
1478        // Check for unexpected closing parenthesis
1479        if matches!(self.current_token, Token::RightParen) && self.paren_depth <= 0 {
1480            return Err(
1481                "Unexpected closing parenthesis - no matching opening parenthesis".to_string(),
1482            );
1483        }
1484
1485        // Create a single condition with the entire expression
1486        let conditions = vec![Condition {
1487            expr,
1488            connector: None,
1489        }];
1490
1491        Ok(WhereClause { conditions })
1492    }
1493
1494    fn parse_expression(&mut self) -> Result<SqlExpression, String> {
1495        self.trace_enter("parse_expression");
1496        // Start with logical OR as the lowest precedence operator
1497        // The hierarchy is: OR -> AND -> comparison -> additive -> multiplicative -> primary
1498        let mut left = self.parse_logical_or()?;
1499
1500        // Handle IN operator (not preceded by NOT)
1501        // This uses the modular comparison module
1502        left = parse_in_operator(self, left)?;
1503
1504        let result = Ok(left);
1505        self.trace_exit("parse_expression", &result);
1506        result
1507    }
1508
1509    fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
1510        // Use the new modular comparison expression parser
1511        parse_comparison_expr(self)
1512    }
1513
1514    fn parse_additive(&mut self) -> Result<SqlExpression, String> {
1515        // Use the new modular arithmetic expression parser
1516        parse_additive_expr(self)
1517    }
1518
1519    fn parse_multiplicative(&mut self) -> Result<SqlExpression, String> {
1520        // Use the new modular arithmetic expression parser
1521        parse_multiplicative_expr(self)
1522    }
1523
1524    fn parse_logical_or(&mut self) -> Result<SqlExpression, String> {
1525        // Use the new modular logical expression parser
1526        parse_logical_or_expr(self)
1527    }
1528
1529    fn parse_logical_and(&mut self) -> Result<SqlExpression, String> {
1530        // Use the new modular logical expression parser
1531        parse_logical_and_expr(self)
1532    }
1533
1534    fn parse_case_expression(&mut self) -> Result<SqlExpression, String> {
1535        // Use the new modular CASE expression parser
1536        parse_case_expr(self)
1537    }
1538
1539    fn parse_primary(&mut self) -> Result<SqlExpression, String> {
1540        // Use the new modular primary expression parser
1541        // Clone the necessary data to avoid borrowing issues
1542        let columns = self.columns.clone();
1543        let in_method_args = self.in_method_args;
1544        let ctx = PrimaryExpressionContext {
1545            columns: &columns,
1546            in_method_args,
1547        };
1548        parse_primary_expr(self, &ctx)
1549    }
1550
1551    // Keep the old implementation temporarily for reference (will be removed)
1552    fn parse_method_args(&mut self) -> Result<Vec<SqlExpression>, String> {
1553        // Set flag to indicate we're parsing method arguments
1554        self.in_method_args = true;
1555
1556        let args = self.parse_argument_list()?;
1557
1558        // Clear the flag
1559        self.in_method_args = false;
1560
1561        Ok(args)
1562    }
1563
1564    fn parse_function_args(&mut self) -> Result<(Vec<SqlExpression>, bool), String> {
1565        let mut args = Vec::new();
1566        let mut has_distinct = false;
1567
1568        if !matches!(self.current_token, Token::RightParen) {
1569            // Check if first argument starts with DISTINCT
1570            if matches!(self.current_token, Token::Distinct) {
1571                self.advance(); // consume DISTINCT
1572                has_distinct = true;
1573            }
1574
1575            // Parse the expression (either after DISTINCT or directly)
1576            args.push(self.parse_additive()?);
1577
1578            // Parse any remaining arguments (DISTINCT only applies to first arg for aggregates)
1579            while matches!(self.current_token, Token::Comma) {
1580                self.advance();
1581                args.push(self.parse_additive()?);
1582            }
1583        }
1584
1585        Ok((args, has_distinct))
1586    }
1587
1588    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
1589        let mut expressions = Vec::new();
1590
1591        loop {
1592            expressions.push(self.parse_expression()?);
1593
1594            if matches!(self.current_token, Token::Comma) {
1595                self.advance();
1596            } else {
1597                break;
1598            }
1599        }
1600
1601        Ok(expressions)
1602    }
1603
1604    #[must_use]
1605    pub fn get_position(&self) -> usize {
1606        self.lexer.get_position()
1607    }
1608
1609    // Check if current token is a JOIN-related token
1610    fn is_join_token(&self) -> bool {
1611        matches!(
1612            self.current_token,
1613            Token::Join | Token::Inner | Token::Left | Token::Right | Token::Full | Token::Cross
1614        )
1615    }
1616
1617    // Parse a JOIN clause
1618    fn parse_join_clause(&mut self) -> Result<JoinClause, String> {
1619        // Determine join type
1620        let join_type = match &self.current_token {
1621            Token::Join => {
1622                self.advance();
1623                JoinType::Inner // Default JOIN is INNER JOIN
1624            }
1625            Token::Inner => {
1626                self.advance();
1627                if !matches!(self.current_token, Token::Join) {
1628                    return Err("Expected JOIN after INNER".to_string());
1629                }
1630                self.advance();
1631                JoinType::Inner
1632            }
1633            Token::Left => {
1634                self.advance();
1635                // Handle optional OUTER keyword
1636                if matches!(self.current_token, Token::Outer) {
1637                    self.advance();
1638                }
1639                if !matches!(self.current_token, Token::Join) {
1640                    return Err("Expected JOIN after LEFT".to_string());
1641                }
1642                self.advance();
1643                JoinType::Left
1644            }
1645            Token::Right => {
1646                self.advance();
1647                // Handle optional OUTER keyword
1648                if matches!(self.current_token, Token::Outer) {
1649                    self.advance();
1650                }
1651                if !matches!(self.current_token, Token::Join) {
1652                    return Err("Expected JOIN after RIGHT".to_string());
1653                }
1654                self.advance();
1655                JoinType::Right
1656            }
1657            Token::Full => {
1658                self.advance();
1659                // Handle optional OUTER keyword
1660                if matches!(self.current_token, Token::Outer) {
1661                    self.advance();
1662                }
1663                if !matches!(self.current_token, Token::Join) {
1664                    return Err("Expected JOIN after FULL".to_string());
1665                }
1666                self.advance();
1667                JoinType::Full
1668            }
1669            Token::Cross => {
1670                self.advance();
1671                if !matches!(self.current_token, Token::Join) {
1672                    return Err("Expected JOIN after CROSS".to_string());
1673                }
1674                self.advance();
1675                JoinType::Cross
1676            }
1677            _ => return Err("Expected JOIN keyword".to_string()),
1678        };
1679
1680        // Parse the table being joined
1681        let (table, alias) = self.parse_join_table_source()?;
1682
1683        // Parse ON condition (required for all joins except CROSS JOIN)
1684        let condition = if join_type == JoinType::Cross {
1685            // CROSS JOIN doesn't have ON condition - create empty condition
1686            JoinCondition { conditions: vec![] }
1687        } else {
1688            if !matches!(self.current_token, Token::On) {
1689                return Err("Expected ON keyword after JOIN table".to_string());
1690            }
1691            self.advance();
1692            self.parse_join_condition()?
1693        };
1694
1695        Ok(JoinClause {
1696            join_type,
1697            table,
1698            alias,
1699            condition,
1700        })
1701    }
1702
1703    fn parse_join_table_source(&mut self) -> Result<(TableSource, Option<String>), String> {
1704        let table = match &self.current_token {
1705            Token::Identifier(name) => {
1706                let table_name = name.clone();
1707                self.advance();
1708                TableSource::Table(table_name)
1709            }
1710            Token::LeftParen => {
1711                // Subquery as table source
1712                self.advance();
1713                let subquery = self.parse_select_statement_inner()?;
1714                if !matches!(self.current_token, Token::RightParen) {
1715                    return Err("Expected ')' after subquery".to_string());
1716                }
1717                self.advance();
1718
1719                // Subqueries must have an alias
1720                let alias = match &self.current_token {
1721                    Token::Identifier(alias_name) => {
1722                        let alias = alias_name.clone();
1723                        self.advance();
1724                        alias
1725                    }
1726                    Token::As => {
1727                        self.advance();
1728                        match &self.current_token {
1729                            Token::Identifier(alias_name) => {
1730                                let alias = alias_name.clone();
1731                                self.advance();
1732                                alias
1733                            }
1734                            _ => return Err("Expected alias after AS keyword".to_string()),
1735                        }
1736                    }
1737                    _ => return Err("Subqueries must have an alias".to_string()),
1738                };
1739
1740                return Ok((
1741                    TableSource::DerivedTable {
1742                        query: Box::new(subquery),
1743                        alias: alias.clone(),
1744                    },
1745                    Some(alias),
1746                ));
1747            }
1748            _ => return Err("Expected table name or subquery in JOIN clause".to_string()),
1749        };
1750
1751        // Check for optional alias
1752        let alias = match &self.current_token {
1753            Token::Identifier(alias_name) => {
1754                let alias = alias_name.clone();
1755                self.advance();
1756                Some(alias)
1757            }
1758            Token::As => {
1759                self.advance();
1760                match &self.current_token {
1761                    Token::Identifier(alias_name) => {
1762                        let alias = alias_name.clone();
1763                        self.advance();
1764                        Some(alias)
1765                    }
1766                    _ => return Err("Expected alias after AS keyword".to_string()),
1767                }
1768            }
1769            _ => None,
1770        };
1771
1772        Ok((table, alias))
1773    }
1774
1775    fn parse_join_condition(&mut self) -> Result<JoinCondition, String> {
1776        let mut conditions = Vec::new();
1777
1778        // Parse first condition
1779        conditions.push(self.parse_single_join_condition()?);
1780
1781        // Parse additional conditions connected by AND
1782        while matches!(self.current_token, Token::And) {
1783            self.advance(); // consume AND
1784            conditions.push(self.parse_single_join_condition()?);
1785        }
1786
1787        Ok(JoinCondition { conditions })
1788    }
1789
1790    fn parse_single_join_condition(&mut self) -> Result<SingleJoinCondition, String> {
1791        // Parse left side as additive expression (stops before comparison operators)
1792        // This allows the comparison operator to be explicitly parsed by this function
1793        let left_expr = self.parse_additive()?;
1794
1795        // Parse operator
1796        let operator = match &self.current_token {
1797            Token::Equal => JoinOperator::Equal,
1798            Token::NotEqual => JoinOperator::NotEqual,
1799            Token::LessThan => JoinOperator::LessThan,
1800            Token::LessThanOrEqual => JoinOperator::LessThanOrEqual,
1801            Token::GreaterThan => JoinOperator::GreaterThan,
1802            Token::GreaterThanOrEqual => JoinOperator::GreaterThanOrEqual,
1803            _ => return Err("Expected comparison operator in JOIN condition".to_string()),
1804        };
1805        self.advance();
1806
1807        // Parse right side as additive expression (stops before comparison operators)
1808        let right_expr = self.parse_additive()?;
1809
1810        Ok(SingleJoinCondition {
1811            left_expr,
1812            operator,
1813            right_expr,
1814        })
1815    }
1816
1817    fn parse_column_reference(&mut self) -> Result<String, String> {
1818        match &self.current_token {
1819            Token::Identifier(name) => {
1820                let mut column_ref = name.clone();
1821                self.advance();
1822
1823                // Check for table.column notation
1824                if matches!(self.current_token, Token::Dot) {
1825                    self.advance();
1826                    match &self.current_token {
1827                        Token::Identifier(col_name) => {
1828                            column_ref.push('.');
1829                            column_ref.push_str(col_name);
1830                            self.advance();
1831                        }
1832                        _ => return Err("Expected column name after '.'".to_string()),
1833                    }
1834                }
1835
1836                Ok(column_ref)
1837            }
1838            _ => Err("Expected column reference".to_string()),
1839        }
1840    }
1841}
1842
1843// Context detection for cursor position
1844#[derive(Debug, Clone)]
1845pub enum CursorContext {
1846    SelectClause,
1847    FromClause,
1848    WhereClause,
1849    OrderByClause,
1850    AfterColumn(String),
1851    AfterLogicalOp(LogicalOp),
1852    AfterComparisonOp(String, String), // column_name, operator
1853    InMethodCall(String, String),      // object, method
1854    InExpression,
1855    Unknown,
1856}
1857
1858/// Safe UTF-8 string slicing that ensures we don't slice in the middle of a character
1859fn safe_slice_to(s: &str, pos: usize) -> &str {
1860    if pos >= s.len() {
1861        return s;
1862    }
1863
1864    // Find the nearest valid character boundary at or before pos
1865    let mut safe_pos = pos;
1866    while safe_pos > 0 && !s.is_char_boundary(safe_pos) {
1867        safe_pos -= 1;
1868    }
1869
1870    &s[..safe_pos]
1871}
1872
1873/// Safe UTF-8 string slicing from a position to the end
1874fn safe_slice_from(s: &str, pos: usize) -> &str {
1875    if pos >= s.len() {
1876        return "";
1877    }
1878
1879    // Find the nearest valid character boundary at or after pos
1880    let mut safe_pos = pos;
1881    while safe_pos < s.len() && !s.is_char_boundary(safe_pos) {
1882        safe_pos += 1;
1883    }
1884
1885    &s[safe_pos..]
1886}
1887
1888#[must_use]
1889pub fn detect_cursor_context(query: &str, cursor_pos: usize) -> (CursorContext, Option<String>) {
1890    let truncated = safe_slice_to(query, cursor_pos);
1891    let mut parser = Parser::new(truncated);
1892
1893    // Try to parse as much as possible
1894    if let Ok(stmt) = parser.parse() {
1895        let (ctx, partial) = analyze_statement(&stmt, truncated, cursor_pos);
1896        #[cfg(test)]
1897        println!("analyze_statement returned: {ctx:?}, {partial:?} for query: '{truncated}'");
1898        (ctx, partial)
1899    } else {
1900        // Partial parse - analyze what we have
1901        let (ctx, partial) = analyze_partial(truncated, cursor_pos);
1902        #[cfg(test)]
1903        println!("analyze_partial returned: {ctx:?}, {partial:?} for query: '{truncated}'");
1904        (ctx, partial)
1905    }
1906}
1907
1908#[must_use]
1909pub fn tokenize_query(query: &str) -> Vec<String> {
1910    let mut lexer = Lexer::new(query);
1911    let tokens = lexer.tokenize_all();
1912    tokens.iter().map(|t| format!("{t:?}")).collect()
1913}
1914
1915#[must_use]
1916/// Helper function to find the start of a quoted string searching backwards
1917fn find_quote_start(bytes: &[u8], mut pos: usize) -> Option<usize> {
1918    // Skip the closing quote and search backwards
1919    if pos > 0 {
1920        pos -= 1;
1921        while pos > 0 {
1922            if bytes[pos] == b'"' {
1923                // Check if it's not an escaped quote
1924                if pos == 0 || bytes[pos - 1] != b'\\' {
1925                    return Some(pos);
1926                }
1927            }
1928            pos -= 1;
1929        }
1930        // Check position 0 separately
1931        if bytes[0] == b'"' {
1932            return Some(0);
1933        }
1934    }
1935    None
1936}
1937
1938/// Helper function to handle method call context after validation
1939fn handle_method_call_context(col_name: &str, after_dot: &str) -> (CursorContext, Option<String>) {
1940    // Check if there's a partial method name after the dot
1941    let partial_method = if after_dot.is_empty() {
1942        None
1943    } else if after_dot.chars().all(|c| c.is_alphanumeric() || c == '_') {
1944        Some(after_dot.to_string())
1945    } else {
1946        None
1947    };
1948
1949    // For AfterColumn context, strip quotes if present for consistency
1950    let col_name_for_context =
1951        if col_name.starts_with('"') && col_name.ends_with('"') && col_name.len() > 2 {
1952            col_name[1..col_name.len() - 1].to_string()
1953        } else {
1954            col_name.to_string()
1955        };
1956
1957    (
1958        CursorContext::AfterColumn(col_name_for_context),
1959        partial_method,
1960    )
1961}
1962
1963/// Helper function to check if we're after a comparison operator
1964fn check_after_comparison_operator(query: &str) -> Option<(CursorContext, Option<String>)> {
1965    for op in &Parser::COMPARISON_OPERATORS {
1966        if let Some(op_pos) = query.rfind(op) {
1967            let before_op = safe_slice_to(query, op_pos);
1968            let after_op_start = op_pos + op.len();
1969            let after_op = if after_op_start < query.len() {
1970                &query[after_op_start..]
1971            } else {
1972                ""
1973            };
1974
1975            // Check if we have a column name before the operator
1976            if let Some(col_name) = before_op.split_whitespace().last() {
1977                if col_name.chars().all(|c| c.is_alphanumeric() || c == '_') {
1978                    // Check if we're at or near the end of the query
1979                    let after_op_trimmed = after_op.trim();
1980                    if after_op_trimmed.is_empty()
1981                        || (after_op_trimmed
1982                            .chars()
1983                            .all(|c| c.is_alphanumeric() || c == '_')
1984                            && !after_op_trimmed.contains('('))
1985                    {
1986                        let partial = if after_op_trimmed.is_empty() {
1987                            None
1988                        } else {
1989                            Some(after_op_trimmed.to_string())
1990                        };
1991                        return Some((
1992                            CursorContext::AfterComparisonOp(
1993                                col_name.to_string(),
1994                                op.trim().to_string(),
1995                            ),
1996                            partial,
1997                        ));
1998                    }
1999                }
2000            }
2001        }
2002    }
2003    None
2004}
2005
2006fn analyze_statement(
2007    stmt: &SelectStatement,
2008    query: &str,
2009    _cursor_pos: usize,
2010) -> (CursorContext, Option<String>) {
2011    // First check for method call context (e.g., "columnName." or "columnName.Con")
2012    let trimmed = query.trim();
2013
2014    // Check if we're after a comparison operator (e.g., "createdDate > ")
2015    if let Some(result) = check_after_comparison_operator(query) {
2016        return result;
2017    }
2018
2019    // First check if we're after AND/OR - this takes precedence
2020    // Helper function to check if string ends with a logical operator
2021    let ends_with_logical_op = |s: &str| -> bool {
2022        let s_upper = s.to_uppercase();
2023        s_upper.ends_with(" AND") || s_upper.ends_with(" OR")
2024    };
2025
2026    if ends_with_logical_op(trimmed) {
2027        // Don't check for method context if we're clearly after a logical operator
2028    } else {
2029        // Look for the last dot in the query
2030        if let Some(dot_pos) = trimmed.rfind('.') {
2031            // Check if we're after a column name and dot
2032            let before_dot = safe_slice_to(trimmed, dot_pos);
2033            let after_dot_start = dot_pos + 1;
2034            let after_dot = if after_dot_start < trimmed.len() {
2035                &trimmed[after_dot_start..]
2036            } else {
2037                ""
2038            };
2039
2040            // Check if the part after dot looks like an incomplete method call
2041            // (not a complete method call like "Contains(...)")
2042            if !after_dot.contains('(') {
2043                // Try to extract the column name - could be quoted or regular
2044                let col_name = if before_dot.ends_with('"') {
2045                    // Handle quoted identifier - search backwards for matching opening quote
2046                    let bytes = before_dot.as_bytes();
2047                    let pos = before_dot.len() - 1; // Position of closing quote
2048
2049                    find_quote_start(bytes, pos).map(|start| safe_slice_from(before_dot, start))
2050                } else {
2051                    // Regular identifier - get the last word, handling parentheses
2052                    // Strip all leading parentheses
2053                    before_dot
2054                        .split_whitespace()
2055                        .last()
2056                        .map(|word| word.trim_start_matches('('))
2057                };
2058
2059                if let Some(col_name) = col_name {
2060                    // For quoted identifiers, keep the quotes, for regular identifiers check validity
2061                    let is_valid = Parser::is_valid_identifier(col_name);
2062
2063                    if is_valid {
2064                        return handle_method_call_context(col_name, after_dot);
2065                    }
2066                }
2067            }
2068        }
2069    }
2070
2071    // Check if we're in WHERE clause
2072    if let Some(where_clause) = &stmt.where_clause {
2073        // Check if query ends with AND/OR (with or without trailing space/partial)
2074        let trimmed_upper = trimmed.to_uppercase();
2075        if trimmed_upper.ends_with(" AND") || trimmed_upper.ends_with(" OR") {
2076            let op = if trimmed_upper.ends_with(" AND") {
2077                LogicalOp::And
2078            } else {
2079                LogicalOp::Or
2080            };
2081            return (CursorContext::AfterLogicalOp(op), None);
2082        }
2083
2084        // Check if we have AND/OR followed by a partial word
2085        let query_upper = query.to_uppercase();
2086        if let Some(and_pos) = query_upper.rfind(" AND ") {
2087            let after_and = safe_slice_from(query, and_pos + 5);
2088            let partial = extract_partial_at_end(after_and);
2089            if partial.is_some() {
2090                return (CursorContext::AfterLogicalOp(LogicalOp::And), partial);
2091            }
2092        }
2093
2094        if let Some(or_pos) = query_upper.rfind(" OR ") {
2095            let after_or = safe_slice_from(query, or_pos + 4);
2096            let partial = extract_partial_at_end(after_or);
2097            if partial.is_some() {
2098                return (CursorContext::AfterLogicalOp(LogicalOp::Or), partial);
2099            }
2100        }
2101
2102        if let Some(last_condition) = where_clause.conditions.last() {
2103            if let Some(connector) = &last_condition.connector {
2104                // We're after AND/OR
2105                return (
2106                    CursorContext::AfterLogicalOp(connector.clone()),
2107                    extract_partial_at_end(query),
2108                );
2109            }
2110        }
2111        // We're in WHERE clause but not after AND/OR
2112        return (CursorContext::WhereClause, extract_partial_at_end(query));
2113    }
2114
2115    // Check if we're after ORDER BY
2116    let query_upper = query.to_uppercase();
2117    if query_upper.ends_with(" ORDER BY") {
2118        return (CursorContext::OrderByClause, None);
2119    }
2120
2121    // Check other contexts based on what's in the statement
2122    if stmt.order_by.is_some() {
2123        return (CursorContext::OrderByClause, extract_partial_at_end(query));
2124    }
2125
2126    if stmt.from_table.is_some() && stmt.where_clause.is_none() && stmt.order_by.is_none() {
2127        return (CursorContext::FromClause, extract_partial_at_end(query));
2128    }
2129
2130    if !stmt.columns.is_empty() && stmt.from_table.is_none() {
2131        return (CursorContext::SelectClause, extract_partial_at_end(query));
2132    }
2133
2134    (CursorContext::Unknown, None)
2135}
2136
2137/// Helper function to find the last occurrence of a token type in the token stream
2138fn find_last_token(tokens: &[(usize, usize, Token)], target: &Token) -> Option<usize> {
2139    tokens
2140        .iter()
2141        .rposition(|(_, _, t)| t == target)
2142        .map(|idx| tokens[idx].0)
2143}
2144
2145/// Helper function to find the last occurrence of any matching token
2146fn find_last_matching_token<F>(
2147    tokens: &[(usize, usize, Token)],
2148    predicate: F,
2149) -> Option<(usize, &Token)>
2150where
2151    F: Fn(&Token) -> bool,
2152{
2153    tokens
2154        .iter()
2155        .rposition(|(_, _, t)| predicate(t))
2156        .map(|idx| (tokens[idx].0, &tokens[idx].2))
2157}
2158
2159/// Helper function to check if we're in a specific clause based on tokens
2160fn is_in_clause(
2161    tokens: &[(usize, usize, Token)],
2162    clause_token: Token,
2163    exclude_tokens: &[Token],
2164) -> bool {
2165    // Find the last occurrence of the clause token
2166    if let Some(clause_pos) = find_last_token(tokens, &clause_token) {
2167        // Check if any exclude tokens appear after it
2168        for (pos, _, token) in tokens.iter() {
2169            if *pos > clause_pos && exclude_tokens.contains(token) {
2170                return false;
2171            }
2172        }
2173        return true;
2174    }
2175    false
2176}
2177
2178fn analyze_partial(query: &str, cursor_pos: usize) -> (CursorContext, Option<String>) {
2179    // Tokenize the query up to cursor position
2180    let mut lexer = Lexer::new(query);
2181    let tokens = lexer.tokenize_all_with_positions();
2182
2183    let trimmed = query.trim();
2184
2185    #[cfg(test)]
2186    {
2187        if trimmed.contains("\"Last Name\"") {
2188            eprintln!("DEBUG analyze_partial: query='{query}', trimmed='{trimmed}'");
2189        }
2190    }
2191
2192    // Check if we're after a comparison operator (e.g., "createdDate > ")
2193    if let Some(result) = check_after_comparison_operator(query) {
2194        return result;
2195    }
2196
2197    // Look for the last dot in the query (method call context) - check this FIRST
2198    // before AND/OR detection to properly handle cases like "AND (Country."
2199    if let Some(dot_pos) = trimmed.rfind('.') {
2200        #[cfg(test)]
2201        {
2202            if trimmed.contains("\"Last Name\"") {
2203                eprintln!("DEBUG: Found dot at position {dot_pos}");
2204            }
2205        }
2206        // Check if we're after a column name and dot
2207        let before_dot = &trimmed[..dot_pos];
2208        let after_dot = &trimmed[dot_pos + 1..];
2209
2210        // Check if the part after dot looks like an incomplete method call
2211        // (not a complete method call like "Contains(...)")
2212        if !after_dot.contains('(') {
2213            // Try to extract the column name before the dot
2214            // It could be a quoted identifier like "Last Name" or a regular identifier
2215            let col_name = if before_dot.ends_with('"') {
2216                // Handle quoted identifier - search backwards for matching opening quote
2217                let bytes = before_dot.as_bytes();
2218                let pos = before_dot.len() - 1; // Position of closing quote
2219
2220                #[cfg(test)]
2221                {
2222                    if trimmed.contains("\"Last Name\"") {
2223                        eprintln!("DEBUG: before_dot='{before_dot}', looking for opening quote");
2224                    }
2225                }
2226
2227                let found_start = find_quote_start(bytes, pos);
2228
2229                if let Some(start) = found_start {
2230                    // Extract the full quoted identifier including quotes
2231                    let result = safe_slice_from(before_dot, start);
2232                    #[cfg(test)]
2233                    {
2234                        if trimmed.contains("\"Last Name\"") {
2235                            eprintln!("DEBUG: Extracted quoted identifier: '{result}'");
2236                        }
2237                    }
2238                    Some(result)
2239                } else {
2240                    #[cfg(test)]
2241                    {
2242                        if trimmed.contains("\"Last Name\"") {
2243                            eprintln!("DEBUG: No opening quote found!");
2244                        }
2245                    }
2246                    None
2247                }
2248            } else {
2249                // Regular identifier - get the last word, handling parentheses
2250                // Strip all leading parentheses
2251                before_dot
2252                    .split_whitespace()
2253                    .last()
2254                    .map(|word| word.trim_start_matches('('))
2255            };
2256
2257            if let Some(col_name) = col_name {
2258                #[cfg(test)]
2259                {
2260                    if trimmed.contains("\"Last Name\"") {
2261                        eprintln!("DEBUG: col_name = '{col_name}'");
2262                    }
2263                }
2264
2265                // For quoted identifiers, keep the quotes, for regular identifiers check validity
2266                let is_valid = Parser::is_valid_identifier(col_name);
2267
2268                #[cfg(test)]
2269                {
2270                    if trimmed.contains("\"Last Name\"") {
2271                        eprintln!("DEBUG: is_valid = {is_valid}");
2272                    }
2273                }
2274
2275                if is_valid {
2276                    return handle_method_call_context(col_name, after_dot);
2277                }
2278            }
2279        }
2280    }
2281
2282    // Check if we're after AND/OR using tokens - but only after checking for method calls
2283    if let Some((pos, token)) =
2284        find_last_matching_token(&tokens, |t| matches!(t, Token::And | Token::Or))
2285    {
2286        // Check if cursor is after the logical operator
2287        let token_end_pos = if matches!(token, Token::And) {
2288            pos + 3 // "AND" is 3 characters
2289        } else {
2290            pos + 2 // "OR" is 2 characters
2291        };
2292
2293        if cursor_pos > token_end_pos {
2294            // Extract any partial word after the operator
2295            let after_op = safe_slice_from(query, token_end_pos + 1); // +1 for the space
2296            let partial = extract_partial_at_end(after_op);
2297            let op = if matches!(token, Token::And) {
2298                LogicalOp::And
2299            } else {
2300                LogicalOp::Or
2301            };
2302            return (CursorContext::AfterLogicalOp(op), partial);
2303        }
2304    }
2305
2306    // Check if the last token is AND or OR (handles case where it's at the very end)
2307    if let Some((_, _, last_token)) = tokens.last() {
2308        if matches!(last_token, Token::And | Token::Or) {
2309            let op = if matches!(last_token, Token::And) {
2310                LogicalOp::And
2311            } else {
2312                LogicalOp::Or
2313            };
2314            return (CursorContext::AfterLogicalOp(op), None);
2315        }
2316    }
2317
2318    // Check if we're in ORDER BY clause using tokens
2319    if let Some(order_pos) = find_last_token(&tokens, &Token::OrderBy) {
2320        // Check if there's a BY token after ORDER
2321        let has_by = tokens
2322            .iter()
2323            .any(|(pos, _, t)| *pos > order_pos && matches!(t, Token::By));
2324        if has_by
2325            || tokens
2326                .last()
2327                .map_or(false, |(_, _, t)| matches!(t, Token::OrderBy))
2328        {
2329            return (CursorContext::OrderByClause, extract_partial_at_end(query));
2330        }
2331    }
2332
2333    // Check if we're in WHERE clause using tokens
2334    if is_in_clause(&tokens, Token::Where, &[Token::OrderBy, Token::GroupBy]) {
2335        return (CursorContext::WhereClause, extract_partial_at_end(query));
2336    }
2337
2338    // Check if we're in FROM clause using tokens
2339    if is_in_clause(
2340        &tokens,
2341        Token::From,
2342        &[Token::Where, Token::OrderBy, Token::GroupBy],
2343    ) {
2344        return (CursorContext::FromClause, extract_partial_at_end(query));
2345    }
2346
2347    // Check if we're in SELECT clause using tokens
2348    if find_last_token(&tokens, &Token::Select).is_some()
2349        && find_last_token(&tokens, &Token::From).is_none()
2350    {
2351        return (CursorContext::SelectClause, extract_partial_at_end(query));
2352    }
2353
2354    (CursorContext::Unknown, None)
2355}
2356
2357fn extract_partial_at_end(query: &str) -> Option<String> {
2358    let trimmed = query.trim();
2359
2360    // First check if the last word itself starts with a quote (unclosed quoted identifier being typed)
2361    if let Some(last_word) = trimmed.split_whitespace().last() {
2362        if last_word.starts_with('"') && !last_word.ends_with('"') {
2363            // This is an unclosed quoted identifier like "Cust
2364            return Some(last_word.to_string());
2365        }
2366    }
2367
2368    // Regular identifier extraction
2369    let last_word = trimmed.split_whitespace().last()?;
2370
2371    // Check if it's a partial identifier (not a keyword or operator)
2372    // First check if it's alphanumeric (potential identifier)
2373    if last_word.chars().all(|c| c.is_alphanumeric() || c == '_') {
2374        // Use lexer to determine if it's a keyword or identifier
2375        if !is_sql_keyword(last_word) {
2376            Some(last_word.to_string())
2377        } else {
2378            None
2379        }
2380    } else {
2381        None
2382    }
2383}
2384
2385// Implement the ParsePrimary trait for Parser to use the modular expression parsing
2386impl ParsePrimary for Parser {
2387    fn current_token(&self) -> &Token {
2388        &self.current_token
2389    }
2390
2391    fn advance(&mut self) {
2392        self.advance();
2393    }
2394
2395    fn consume(&mut self, expected: Token) -> Result<(), String> {
2396        self.consume(expected)
2397    }
2398
2399    fn parse_case_expression(&mut self) -> Result<SqlExpression, String> {
2400        self.parse_case_expression()
2401    }
2402
2403    fn parse_function_args(&mut self) -> Result<(Vec<SqlExpression>, bool), String> {
2404        self.parse_function_args()
2405    }
2406
2407    fn parse_window_spec(&mut self) -> Result<WindowSpec, String> {
2408        self.parse_window_spec()
2409    }
2410
2411    fn parse_logical_or(&mut self) -> Result<SqlExpression, String> {
2412        self.parse_logical_or()
2413    }
2414
2415    fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
2416        self.parse_comparison()
2417    }
2418
2419    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2420        self.parse_expression_list()
2421    }
2422
2423    fn parse_subquery(&mut self) -> Result<SelectStatement, String> {
2424        // Parse subquery without parenthesis balance validation
2425        if matches!(self.current_token, Token::With) {
2426            self.parse_with_clause_inner()
2427        } else {
2428            self.parse_select_statement_inner()
2429        }
2430    }
2431}
2432
2433// Implement the ExpressionParser trait for Parser to use the modular expression parsing
2434impl ExpressionParser for Parser {
2435    fn current_token(&self) -> &Token {
2436        &self.current_token
2437    }
2438
2439    fn advance(&mut self) {
2440        // Call the main advance method directly to avoid recursion
2441        match &self.current_token {
2442            Token::LeftParen => self.paren_depth += 1,
2443            Token::RightParen => {
2444                self.paren_depth -= 1;
2445            }
2446            _ => {}
2447        }
2448        self.current_token = self.lexer.next_token();
2449    }
2450
2451    fn peek(&self) -> Option<&Token> {
2452        // We can't return a reference to a token from a temporary lexer,
2453        // so we need a different approach. For now, let's use a workaround
2454        // that checks the next token type without consuming it.
2455        // This is a limitation of the current design.
2456        // A proper fix would be to store the peeked token in the Parser struct.
2457        None // TODO: Implement proper lookahead
2458    }
2459
2460    fn is_at_end(&self) -> bool {
2461        matches!(self.current_token, Token::Eof)
2462    }
2463
2464    fn consume(&mut self, expected: Token) -> Result<(), String> {
2465        // Call the main consume method to avoid recursion
2466        if std::mem::discriminant(&self.current_token) == std::mem::discriminant(&expected) {
2467            self.update_paren_depth(&expected)?;
2468            self.current_token = self.lexer.next_token();
2469            Ok(())
2470        } else {
2471            Err(format!(
2472                "Expected {:?}, found {:?}",
2473                expected, self.current_token
2474            ))
2475        }
2476    }
2477
2478    fn parse_identifier(&mut self) -> Result<String, String> {
2479        if let Token::Identifier(id) = &self.current_token {
2480            let id = id.clone();
2481            self.advance();
2482            Ok(id)
2483        } else {
2484            Err(format!(
2485                "Expected identifier, found {:?}",
2486                self.current_token
2487            ))
2488        }
2489    }
2490}
2491
2492// Implement the ParseArithmetic trait for Parser to use the modular arithmetic parsing
2493impl ParseArithmetic for Parser {
2494    fn current_token(&self) -> &Token {
2495        &self.current_token
2496    }
2497
2498    fn advance(&mut self) {
2499        self.advance();
2500    }
2501
2502    fn consume(&mut self, expected: Token) -> Result<(), String> {
2503        self.consume(expected)
2504    }
2505
2506    fn parse_primary(&mut self) -> Result<SqlExpression, String> {
2507        self.parse_primary()
2508    }
2509
2510    fn parse_multiplicative(&mut self) -> Result<SqlExpression, String> {
2511        self.parse_multiplicative()
2512    }
2513
2514    fn parse_method_args(&mut self) -> Result<Vec<SqlExpression>, String> {
2515        self.parse_method_args()
2516    }
2517}
2518
2519// Implement the ParseComparison trait for Parser to use the modular comparison parsing
2520impl ParseComparison for Parser {
2521    fn current_token(&self) -> &Token {
2522        &self.current_token
2523    }
2524
2525    fn advance(&mut self) {
2526        self.advance();
2527    }
2528
2529    fn consume(&mut self, expected: Token) -> Result<(), String> {
2530        self.consume(expected)
2531    }
2532
2533    fn parse_primary(&mut self) -> Result<SqlExpression, String> {
2534        self.parse_primary()
2535    }
2536
2537    fn parse_additive(&mut self) -> Result<SqlExpression, String> {
2538        self.parse_additive()
2539    }
2540
2541    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2542        self.parse_expression_list()
2543    }
2544
2545    fn parse_subquery(&mut self) -> Result<SelectStatement, String> {
2546        // Parse subquery without parenthesis balance validation
2547        if matches!(self.current_token, Token::With) {
2548            self.parse_with_clause_inner()
2549        } else {
2550            self.parse_select_statement_inner()
2551        }
2552    }
2553}
2554
2555// Implement the ParseLogical trait for Parser to use the modular logical parsing
2556impl ParseLogical for Parser {
2557    fn current_token(&self) -> &Token {
2558        &self.current_token
2559    }
2560
2561    fn advance(&mut self) {
2562        self.advance();
2563    }
2564
2565    fn consume(&mut self, expected: Token) -> Result<(), String> {
2566        self.consume(expected)
2567    }
2568
2569    fn parse_logical_and(&mut self) -> Result<SqlExpression, String> {
2570        self.parse_logical_and()
2571    }
2572
2573    fn parse_base_logical_expression(&mut self) -> Result<SqlExpression, String> {
2574        // This is the base for logical AND - it should parse comparison expressions
2575        // to avoid infinite recursion with parse_expression
2576        self.parse_comparison()
2577    }
2578
2579    fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
2580        self.parse_comparison()
2581    }
2582
2583    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2584        self.parse_expression_list()
2585    }
2586}
2587
2588// Implement the ParseCase trait for Parser to use the modular CASE parsing
2589impl ParseCase for Parser {
2590    fn current_token(&self) -> &Token {
2591        &self.current_token
2592    }
2593
2594    fn advance(&mut self) {
2595        self.advance();
2596    }
2597
2598    fn consume(&mut self, expected: Token) -> Result<(), String> {
2599        self.consume(expected)
2600    }
2601
2602    fn parse_expression(&mut self) -> Result<SqlExpression, String> {
2603        self.parse_expression()
2604    }
2605}
2606
2607fn is_sql_keyword(word: &str) -> bool {
2608    // Use the lexer to check if this word produces a keyword token
2609    let mut lexer = Lexer::new(word);
2610    let token = lexer.next_token();
2611
2612    // Check if it's a keyword token (not an identifier)
2613    !matches!(token, Token::Identifier(_) | Token::Eof)
2614}
2615
2616#[cfg(test)]
2617mod tests {
2618    use super::*;
2619
2620    /// Test that Parser::new() defaults to Standard mode (backward compatible)
2621    #[test]
2622    fn test_parser_mode_default_is_standard() {
2623        let sql = "-- Leading comment\nSELECT * FROM users";
2624        let mut parser = Parser::new(sql);
2625        let stmt = parser.parse().unwrap();
2626
2627        // In Standard mode, comments should be empty
2628        assert!(stmt.leading_comments.is_empty());
2629        assert!(stmt.trailing_comment.is_none());
2630    }
2631
2632    /// Test that PreserveComments mode collects leading comments
2633    #[test]
2634    fn test_parser_mode_preserve_leading_comments() {
2635        let sql = "-- Important query\n-- Author: Alice\nSELECT id, name FROM users";
2636        let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2637        let stmt = parser.parse().unwrap();
2638
2639        // Should have 2 leading comments
2640        assert_eq!(stmt.leading_comments.len(), 2);
2641        assert!(stmt.leading_comments[0].is_line_comment);
2642        assert!(stmt.leading_comments[0].text.contains("Important query"));
2643        assert!(stmt.leading_comments[1].text.contains("Author: Alice"));
2644    }
2645
2646    /// Test that PreserveComments mode collects trailing comments
2647    #[test]
2648    fn test_parser_mode_preserve_trailing_comment() {
2649        let sql = "SELECT * FROM users -- Fetch all users";
2650        let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2651        let stmt = parser.parse().unwrap();
2652
2653        // Should have trailing comment
2654        assert!(stmt.trailing_comment.is_some());
2655        let comment = stmt.trailing_comment.unwrap();
2656        assert!(comment.is_line_comment);
2657        assert!(comment.text.contains("Fetch all users"));
2658    }
2659
2660    /// Test that PreserveComments mode handles block comments
2661    #[test]
2662    fn test_parser_mode_preserve_block_comments() {
2663        let sql = "/* Query explanation */\nSELECT * FROM users";
2664        let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2665        let stmt = parser.parse().unwrap();
2666
2667        // Should have leading block comment
2668        assert_eq!(stmt.leading_comments.len(), 1);
2669        assert!(!stmt.leading_comments[0].is_line_comment); // It's a block comment
2670        assert!(stmt.leading_comments[0].text.contains("Query explanation"));
2671    }
2672
2673    /// Test that PreserveComments mode collects both leading and trailing
2674    #[test]
2675    fn test_parser_mode_preserve_both_comments() {
2676        let sql = "-- Leading\nSELECT * FROM users -- Trailing";
2677        let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2678        let stmt = parser.parse().unwrap();
2679
2680        // Should have both
2681        assert_eq!(stmt.leading_comments.len(), 1);
2682        assert!(stmt.leading_comments[0].text.contains("Leading"));
2683        assert!(stmt.trailing_comment.is_some());
2684        assert!(stmt.trailing_comment.unwrap().text.contains("Trailing"));
2685    }
2686
2687    /// Test that Standard mode has zero performance overhead (no comment parsing)
2688    #[test]
2689    fn test_parser_mode_standard_ignores_comments() {
2690        let sql = "-- Comment 1\n/* Comment 2 */\nSELECT * FROM users -- Comment 3";
2691        let mut parser = Parser::with_mode(sql, ParserMode::Standard);
2692        let stmt = parser.parse().unwrap();
2693
2694        // Comments should be completely ignored
2695        assert!(stmt.leading_comments.is_empty());
2696        assert!(stmt.trailing_comment.is_none());
2697
2698        // But query should still parse correctly
2699        assert_eq!(stmt.select_items.len(), 1);
2700        assert_eq!(stmt.from_table, Some("users".to_string()));
2701    }
2702
2703    /// Test backward compatibility - existing code using Parser::new() unchanged
2704    #[test]
2705    fn test_parser_backward_compatibility() {
2706        let sql = "SELECT id, name FROM users WHERE active = true";
2707
2708        // Old way (still works, defaults to Standard mode)
2709        let mut parser1 = Parser::new(sql);
2710        let stmt1 = parser1.parse().unwrap();
2711
2712        // Explicit Standard mode (same behavior)
2713        let mut parser2 = Parser::with_mode(sql, ParserMode::Standard);
2714        let stmt2 = parser2.parse().unwrap();
2715
2716        // Both should produce identical ASTs (comments are empty in both)
2717        assert_eq!(stmt1.select_items.len(), stmt2.select_items.len());
2718        assert_eq!(stmt1.from_table, stmt2.from_table);
2719        assert_eq!(stmt1.where_clause.is_some(), stmt2.where_clause.is_some());
2720        assert!(stmt1.leading_comments.is_empty());
2721        assert!(stmt2.leading_comments.is_empty());
2722    }
2723}