sql_cli/sql/
recursive_parser.rs

1// Keep chrono imports for the parser implementation
2
3// Re-exports for backward compatibility - these serve as both imports and re-exports
4pub use super::parser::ast::{
5    CTEType, Comment, Condition, DataFormat, FrameBound, FrameUnit, HttpMethod, IntoTable,
6    JoinClause, JoinCondition, JoinOperator, JoinType, LogicalOp, OrderByColumn, SelectItem,
7    SelectStatement, SetOperation, SingleJoinCondition, SortDirection, SqlExpression,
8    TableFunction, TableSource, WebCTESpec, WhenBranch, WhereClause, WindowFrame, WindowSpec, CTE,
9};
10pub use super::parser::legacy::{ParseContext, ParseState, Schema, SqlParser, SqlToken, TableInfo};
11pub use super::parser::lexer::{Lexer, LexerMode, Token};
12pub use super::parser::ParserConfig;
13
14// Re-export formatting functions for backward compatibility
15pub use super::parser::formatter::{format_ast_tree, format_sql_pretty, format_sql_pretty_compact};
16
17// New AST-based formatter
18pub use super::parser::ast_formatter::{format_sql_ast, format_sql_ast_with_config, FormatConfig};
19
20// Import the new expression modules
21use super::parser::expressions::arithmetic::{
22    parse_additive as parse_additive_expr, parse_multiplicative as parse_multiplicative_expr,
23    ParseArithmetic,
24};
25use super::parser::expressions::case::{parse_case_expression as parse_case_expr, ParseCase};
26use super::parser::expressions::comparison::{
27    parse_comparison as parse_comparison_expr, parse_in_operator, ParseComparison,
28};
29use super::parser::expressions::logical::{
30    parse_logical_and as parse_logical_and_expr, parse_logical_or as parse_logical_or_expr,
31    ParseLogical,
32};
33use super::parser::expressions::primary::{
34    parse_primary as parse_primary_expr, ParsePrimary, PrimaryExpressionContext,
35};
36use super::parser::expressions::ExpressionParser;
37
38// Import function registry to check for function existence
39use crate::sql::functions::{FunctionCategory, FunctionRegistry};
40use crate::sql::generators::GeneratorRegistry;
41use std::sync::Arc;
42
43// Import Web CTE parser
44use super::parser::web_cte_parser::WebCteParser;
45
46/// Parser mode - controls whether comments are preserved in AST
47#[derive(Debug, Clone, Copy, PartialEq)]
48pub enum ParserMode {
49    /// Standard parsing - skip comments (current behavior, backward compatible)
50    Standard,
51    /// Preserve comments in AST (opt-in for formatters)
52    PreserveComments,
53}
54
55impl Default for ParserMode {
56    fn default() -> Self {
57        ParserMode::Standard
58    }
59}
60
61pub struct Parser {
62    lexer: Lexer,
63    pub current_token: Token,    // Made public for web_cte_parser access
64    in_method_args: bool,        // Track if we're parsing method arguments
65    columns: Vec<String>,        // Known column names for context-aware parsing
66    paren_depth: i32,            // Track parentheses nesting depth
67    paren_depth_stack: Vec<i32>, // Stack to save/restore paren depth for nested contexts
68    _config: ParserConfig,       // Parser configuration including case sensitivity
69    debug_trace: bool,           // Enable detailed token-by-token trace
70    trace_depth: usize,          // Track recursion depth for indented trace
71    function_registry: Arc<FunctionRegistry>, // Function registry for validation
72    generator_registry: Arc<GeneratorRegistry>, // Generator registry for table functions
73    mode: ParserMode,            // Parser mode for comment preservation
74}
75
76impl Parser {
77    #[must_use]
78    pub fn new(input: &str) -> Self {
79        Self::with_mode(input, ParserMode::default())
80    }
81
82    /// Create a new parser with explicit mode for comment preservation
83    #[must_use]
84    pub fn with_mode(input: &str, mode: ParserMode) -> Self {
85        // Choose lexer mode based on parser mode
86        let lexer_mode = match mode {
87            ParserMode::Standard => LexerMode::SkipComments,
88            ParserMode::PreserveComments => LexerMode::PreserveComments,
89        };
90
91        let mut lexer = Lexer::with_mode(input, lexer_mode);
92        let current_token = lexer.next_token();
93        Self {
94            lexer,
95            current_token,
96            in_method_args: false,
97            columns: Vec::new(),
98            paren_depth: 0,
99            paren_depth_stack: Vec::new(),
100            _config: ParserConfig::default(),
101            debug_trace: false,
102            trace_depth: 0,
103            function_registry: Arc::new(FunctionRegistry::new()),
104            generator_registry: Arc::new(GeneratorRegistry::new()),
105            mode,
106        }
107    }
108
109    #[must_use]
110    pub fn with_config(input: &str, config: ParserConfig) -> Self {
111        let mut lexer = Lexer::new(input);
112        let current_token = lexer.next_token();
113        Self {
114            lexer,
115            current_token,
116            in_method_args: false,
117            columns: Vec::new(),
118            paren_depth: 0,
119            paren_depth_stack: Vec::new(),
120            _config: config,
121            debug_trace: false,
122            trace_depth: 0,
123            function_registry: Arc::new(FunctionRegistry::new()),
124            generator_registry: Arc::new(GeneratorRegistry::new()),
125            mode: ParserMode::default(),
126        }
127    }
128
129    #[must_use]
130    pub fn with_columns(mut self, columns: Vec<String>) -> Self {
131        self.columns = columns;
132        self
133    }
134
135    #[must_use]
136    pub fn with_debug_trace(mut self, enabled: bool) -> Self {
137        self.debug_trace = enabled;
138        self
139    }
140
141    #[must_use]
142    pub fn with_function_registry(mut self, registry: Arc<FunctionRegistry>) -> Self {
143        self.function_registry = registry;
144        self
145    }
146
147    #[must_use]
148    pub fn with_generator_registry(mut self, registry: Arc<GeneratorRegistry>) -> Self {
149        self.generator_registry = registry;
150        self
151    }
152
153    fn trace_enter(&mut self, context: &str) {
154        if self.debug_trace {
155            let indent = "  ".repeat(self.trace_depth);
156            eprintln!("{}→ {} | Token: {:?}", indent, context, self.current_token);
157            self.trace_depth += 1;
158        }
159    }
160
161    fn trace_exit(&mut self, context: &str, result: &Result<impl std::fmt::Debug, String>) {
162        if self.debug_trace {
163            self.trace_depth = self.trace_depth.saturating_sub(1);
164            let indent = "  ".repeat(self.trace_depth);
165            match result {
166                Ok(val) => eprintln!("{}← {} ✓ | Result: {:?}", indent, context, val),
167                Err(e) => eprintln!("{}← {} ✗ | Error: {}", indent, context, e),
168            }
169        }
170    }
171
172    fn trace_token(&self, action: &str) {
173        if self.debug_trace {
174            let indent = "  ".repeat(self.trace_depth);
175            eprintln!("{}  {} | Token: {:?}", indent, action, self.current_token);
176        }
177    }
178
179    #[allow(dead_code)]
180    fn peek_token(&self) -> Option<Token> {
181        // Alternative peek that returns owned token
182        let mut temp_lexer = self.lexer.clone();
183        let next_token = temp_lexer.next_token();
184        if matches!(next_token, Token::Eof) {
185            None
186        } else {
187            Some(next_token)
188        }
189    }
190
191    /// Check if current token is one of the reserved keywords that should stop parsing
192    /// Check if an identifier string is a reserved keyword (for backward compatibility)
193    /// This is used when the lexer hasn't properly tokenized keywords and they come through
194    /// as Token::Identifier instead of their proper token types
195    fn is_identifier_reserved(id: &str) -> bool {
196        let id_upper = id.to_uppercase();
197        matches!(
198            id_upper.as_str(),
199            "ORDER" | "HAVING" | "LIMIT" | "OFFSET" | "UNION" | "INTERSECT" | "EXCEPT"
200        )
201    }
202
203    /// Get comparison operator string representation (for autocomplete context)
204    const COMPARISON_OPERATORS: [&'static str; 6] = [" > ", " < ", " >= ", " <= ", " = ", " != "];
205
206    pub fn consume(&mut self, expected: Token) -> Result<(), String> {
207        self.trace_token(&format!("Consuming expected {:?}", expected));
208        if std::mem::discriminant(&self.current_token) == std::mem::discriminant(&expected) {
209            // Track parentheses depth
210            self.update_paren_depth(&expected)?;
211
212            self.current_token = self.lexer.next_token();
213            Ok(())
214        } else {
215            // Provide better error messages for common cases
216            let error_msg = match (&expected, &self.current_token) {
217                (Token::RightParen, Token::Eof) if self.paren_depth > 0 => {
218                    format!(
219                        "Unclosed parenthesis - missing {} closing parenthes{}",
220                        self.paren_depth,
221                        if self.paren_depth == 1 { "is" } else { "es" }
222                    )
223                }
224                (Token::RightParen, _) if self.paren_depth > 0 => {
225                    format!(
226                        "Expected closing parenthesis but found {:?} (currently {} unclosed parenthes{})",
227                        self.current_token,
228                        self.paren_depth,
229                        if self.paren_depth == 1 { "is" } else { "es" }
230                    )
231                }
232                _ => format!("Expected {:?}, found {:?}", expected, self.current_token),
233            };
234            Err(error_msg)
235        }
236    }
237
238    pub fn advance(&mut self) {
239        // Track parentheses depth when advancing
240        match &self.current_token {
241            Token::LeftParen => self.paren_depth += 1,
242            Token::RightParen => {
243                self.paren_depth -= 1;
244                // Note: We don't check for < 0 here because advance() is used
245                // in contexts where we're not necessarily expecting a right paren
246            }
247            _ => {}
248        }
249        let old_token = self.current_token.clone();
250        self.current_token = self.lexer.next_token();
251        if self.debug_trace {
252            let indent = "  ".repeat(self.trace_depth);
253            eprintln!(
254                "{}  Advanced: {:?} → {:?}",
255                indent, old_token, self.current_token
256            );
257        }
258    }
259
260    /// Collect all leading comments before a SQL construct
261    /// This consumes comment tokens and returns them as a Vec<Comment>
262    fn collect_leading_comments(&mut self) -> Vec<Comment> {
263        let mut comments = Vec::new();
264        loop {
265            match &self.current_token {
266                Token::LineComment(text) => {
267                    comments.push(Comment::line(text.clone()));
268                    self.advance();
269                }
270                Token::BlockComment(text) => {
271                    comments.push(Comment::block(text.clone()));
272                    self.advance();
273                }
274                _ => break,
275            }
276        }
277        comments
278    }
279
280    /// Collect a trailing inline comment (on the same line)
281    /// This consumes a single comment token if present
282    fn collect_trailing_comment(&mut self) -> Option<Comment> {
283        match &self.current_token {
284            Token::LineComment(text) => {
285                let comment = Some(Comment::line(text.clone()));
286                self.advance();
287                comment
288            }
289            Token::BlockComment(text) => {
290                let comment = Some(Comment::block(text.clone()));
291                self.advance();
292                comment
293            }
294            _ => None,
295        }
296    }
297
298    fn push_paren_depth(&mut self) {
299        self.paren_depth_stack.push(self.paren_depth);
300        self.paren_depth = 0;
301    }
302
303    fn pop_paren_depth(&mut self) {
304        if let Some(depth) = self.paren_depth_stack.pop() {
305            // Ignore the internal depth - just restore the saved value
306            self.paren_depth = depth;
307        }
308    }
309
310    pub fn parse(&mut self) -> Result<SelectStatement, String> {
311        self.trace_enter("parse");
312
313        // Collect leading comments FIRST (before checking for WITH or SELECT)
314        // This allows comments before WITH clauses to be preserved
315        let leading_comments = if self.mode == ParserMode::PreserveComments {
316            self.collect_leading_comments()
317        } else {
318            vec![]
319        };
320
321        // Now check for WITH clause (after consuming comments)
322        let result = if matches!(self.current_token, Token::With) {
323            let mut stmt = self.parse_with_clause()?;
324            // Attach the leading comments we collected
325            stmt.leading_comments = leading_comments;
326            stmt
327        } else {
328            // For SELECT without WITH, pass comments to inner parser
329            let stmt = self.parse_select_statement_with_comments_public(leading_comments)?;
330            self.check_balanced_parentheses()?;
331            stmt
332        };
333
334        self.trace_exit("parse", &Ok(&result));
335        Ok(result)
336    }
337
338    /// Public wrapper that accepts pre-collected comments and checks parens
339    fn parse_select_statement_with_comments_public(
340        &mut self,
341        comments: Vec<Comment>,
342    ) -> Result<SelectStatement, String> {
343        self.parse_select_statement_with_comments(comments)
344    }
345
346    fn parse_with_clause(&mut self) -> Result<SelectStatement, String> {
347        self.consume(Token::With)?;
348        let ctes = self.parse_cte_list()?;
349
350        // Parse the main SELECT statement - use inner version since we're already tracking parens
351        let mut main_query = self.parse_select_statement_inner_no_comments()?;
352        main_query.ctes = ctes;
353
354        // Check for balanced parentheses at the end of parsing
355        self.check_balanced_parentheses()?;
356
357        Ok(main_query)
358    }
359
360    fn parse_with_clause_inner(&mut self) -> Result<SelectStatement, String> {
361        self.consume(Token::With)?;
362        let ctes = self.parse_cte_list()?;
363
364        // Parse the main SELECT statement (without parenthesis checking for subqueries)
365        let mut main_query = self.parse_select_statement_inner()?;
366        main_query.ctes = ctes;
367
368        Ok(main_query)
369    }
370
371    // Helper function to parse CTE list - eliminates duplication
372    fn parse_cte_list(&mut self) -> Result<Vec<CTE>, String> {
373        let mut ctes = Vec::new();
374
375        // Parse CTEs
376        loop {
377            // Check for WEB keyword for each CTE (can be different for each one)
378            let is_web = if matches!(&self.current_token, Token::Web) {
379                self.trace_token("Found WEB keyword for CTE");
380                self.advance();
381                true
382            } else {
383                false
384            };
385
386            // Parse CTE name
387            let name = match &self.current_token {
388                Token::Identifier(name) => name.clone(),
389                _ => {
390                    return Err(format!(
391                        "Expected CTE name after {}",
392                        if is_web { "WEB" } else { "WITH or comma" }
393                    ))
394                }
395            };
396            self.advance();
397
398            // Optional column list: WITH t(col1, col2) AS ...
399            let column_list = if matches!(self.current_token, Token::LeftParen) {
400                self.advance();
401                let cols = self.parse_identifier_list()?;
402                self.consume(Token::RightParen)?;
403                Some(cols)
404            } else {
405                None
406            };
407
408            // Expect AS
409            self.consume(Token::As)?;
410
411            let cte_type = if is_web {
412                // Expect opening parenthesis for WEB CTE
413                self.consume(Token::LeftParen)?;
414                // Parse WEB CTE specification using dedicated parser
415                let web_spec = WebCteParser::parse(self)?;
416                // Consume closing parenthesis for WEB CTE
417                self.consume(Token::RightParen)?;
418                CTEType::Web(web_spec)
419            } else {
420                // For standard CTEs, push depth BEFORE consuming opening paren
421                // This ensures the paren is counted in the inner context
422                self.push_paren_depth();
423                // Now consume opening parenthesis
424                self.consume(Token::LeftParen)?;
425                let query = self.parse_select_statement_inner()?;
426                // Expect closing parenthesis while still in CTE context
427                self.consume(Token::RightParen)?;
428                // Now pop to restore outer depth after consuming both parens
429                self.pop_paren_depth();
430                CTEType::Standard(query)
431            };
432
433            ctes.push(CTE {
434                name,
435                column_list,
436                cte_type,
437            });
438
439            // Check for more CTEs
440            if !matches!(self.current_token, Token::Comma) {
441                break;
442            }
443            self.advance();
444        }
445
446        Ok(ctes)
447    }
448
449    /// Helper function to parse an optional table alias (with or without AS keyword)
450    fn parse_optional_alias(&mut self) -> Result<Option<String>, String> {
451        if matches!(self.current_token, Token::As) {
452            self.advance();
453            match &self.current_token {
454                Token::Identifier(name) => {
455                    let alias = name.clone();
456                    self.advance();
457                    Ok(Some(alias))
458                }
459                token => {
460                    // Check if it's a reserved keyword - provide helpful error
461                    if let Some(keyword) = token.as_keyword_str() {
462                        Err(format!(
463                            "Reserved keyword '{}' cannot be used as column alias. Use a different name or quote it with double quotes: \"{}\"",
464                            keyword,
465                            keyword.to_lowercase()
466                        ))
467                    } else {
468                        Err("Expected alias name after AS".to_string())
469                    }
470                }
471            }
472        } else if let Token::Identifier(name) = &self.current_token {
473            // AS is optional for table aliases
474            let alias = name.clone();
475            self.advance();
476            Ok(Some(alias))
477        } else {
478            Ok(None)
479        }
480    }
481
482    /// Helper function to check if an identifier is valid (quoted or regular)
483    fn is_valid_identifier(name: &str) -> bool {
484        if name.starts_with('"') && name.ends_with('"') {
485            // Quoted identifier - always valid
486            true
487        } else {
488            // Regular identifier - check if it's alphanumeric or underscore
489            name.chars().all(|c| c.is_alphanumeric() || c == '_')
490        }
491    }
492
493    /// Helper function to update parentheses depth tracking
494    fn update_paren_depth(&mut self, token: &Token) -> Result<(), String> {
495        match token {
496            Token::LeftParen => self.paren_depth += 1,
497            Token::RightParen => {
498                self.paren_depth -= 1;
499                // Check for extra closing parenthesis
500                if self.paren_depth < 0 {
501                    return Err(
502                        "Unexpected closing parenthesis - no matching opening parenthesis"
503                            .to_string(),
504                    );
505                }
506            }
507            _ => {}
508        }
509        Ok(())
510    }
511
512    /// Helper function to parse comma-separated argument list
513    fn parse_argument_list(&mut self) -> Result<Vec<SqlExpression>, String> {
514        let mut args = Vec::new();
515
516        if !matches!(self.current_token, Token::RightParen) {
517            loop {
518                args.push(self.parse_expression()?);
519
520                if matches!(self.current_token, Token::Comma) {
521                    self.advance();
522                } else {
523                    break;
524                }
525            }
526        }
527
528        Ok(args)
529    }
530
531    /// Helper function to check for balanced parentheses at the end of parsing
532    fn check_balanced_parentheses(&self) -> Result<(), String> {
533        if self.paren_depth > 0 {
534            Err(format!(
535                "Unclosed parenthesis - missing {} closing parenthes{}",
536                self.paren_depth,
537                if self.paren_depth == 1 { "is" } else { "es" }
538            ))
539        } else if self.paren_depth < 0 {
540            Err("Extra closing parenthesis found - no matching opening parenthesis".to_string())
541        } else {
542            Ok(())
543        }
544    }
545
546    /// Check if an expression contains aggregate functions (COUNT, SUM, AVG, etc.)
547    /// This is used to detect unsupported patterns in HAVING clause
548    fn contains_aggregate_function(expr: &SqlExpression) -> bool {
549        match expr {
550            SqlExpression::FunctionCall { name, args, .. } => {
551                // Check if this is an aggregate function
552                let upper_name = name.to_uppercase();
553                let is_aggregate = matches!(
554                    upper_name.as_str(),
555                    "COUNT" | "SUM" | "AVG" | "MIN" | "MAX" | "GROUP_CONCAT" | "STRING_AGG"
556                );
557
558                // If this is an aggregate, return true
559                // Otherwise, recursively check arguments
560                is_aggregate || args.iter().any(Self::contains_aggregate_function)
561            }
562            // Recursively check nested expressions
563            SqlExpression::BinaryOp { left, right, .. } => {
564                Self::contains_aggregate_function(left) || Self::contains_aggregate_function(right)
565            }
566            SqlExpression::Not { expr } => Self::contains_aggregate_function(expr),
567            SqlExpression::MethodCall { args, .. } => {
568                args.iter().any(Self::contains_aggregate_function)
569            }
570            SqlExpression::ChainedMethodCall { base, args, .. } => {
571                Self::contains_aggregate_function(base)
572                    || args.iter().any(Self::contains_aggregate_function)
573            }
574            SqlExpression::CaseExpression {
575                when_branches,
576                else_branch,
577            } => {
578                when_branches.iter().any(|branch| {
579                    Self::contains_aggregate_function(&branch.condition)
580                        || Self::contains_aggregate_function(&branch.result)
581                }) || else_branch
582                    .as_ref()
583                    .map_or(false, |e| Self::contains_aggregate_function(e))
584            }
585            SqlExpression::SimpleCaseExpression {
586                expr,
587                when_branches,
588                else_branch,
589            } => {
590                Self::contains_aggregate_function(expr)
591                    || when_branches.iter().any(|branch| {
592                        Self::contains_aggregate_function(&branch.value)
593                            || Self::contains_aggregate_function(&branch.result)
594                    })
595                    || else_branch
596                        .as_ref()
597                        .map_or(false, |e| Self::contains_aggregate_function(e))
598            }
599            SqlExpression::ScalarSubquery { query } => {
600                // Subqueries can have their own aggregates, but that's fine
601                // We're only checking the outer HAVING clause
602                query
603                    .having
604                    .as_ref()
605                    .map_or(false, |h| Self::contains_aggregate_function(h))
606            }
607            // Leaf nodes - no aggregates
608            SqlExpression::Column(_)
609            | SqlExpression::StringLiteral(_)
610            | SqlExpression::NumberLiteral(_)
611            | SqlExpression::BooleanLiteral(_)
612            | SqlExpression::Null
613            | SqlExpression::DateTimeConstructor { .. }
614            | SqlExpression::DateTimeToday { .. } => false,
615
616            // Window functions contain aggregates by definition
617            SqlExpression::WindowFunction { .. } => true,
618
619            // Between has three parts to check
620            SqlExpression::Between { expr, lower, upper } => {
621                Self::contains_aggregate_function(expr)
622                    || Self::contains_aggregate_function(lower)
623                    || Self::contains_aggregate_function(upper)
624            }
625
626            // IN list - check expr and all values
627            SqlExpression::InList { expr, values } | SqlExpression::NotInList { expr, values } => {
628                Self::contains_aggregate_function(expr)
629                    || values.iter().any(Self::contains_aggregate_function)
630            }
631
632            // IN subquery - check expr and subquery
633            SqlExpression::InSubquery { expr, subquery }
634            | SqlExpression::NotInSubquery { expr, subquery } => {
635                Self::contains_aggregate_function(expr)
636                    || subquery
637                        .having
638                        .as_ref()
639                        .map_or(false, |h| Self::contains_aggregate_function(h))
640            }
641
642            // UNNEST - check column expression
643            SqlExpression::Unnest { column, .. } => Self::contains_aggregate_function(column),
644        }
645    }
646
647    fn parse_select_statement(&mut self) -> Result<SelectStatement, String> {
648        self.trace_enter("parse_select_statement");
649        let result = self.parse_select_statement_inner()?;
650
651        // Check for balanced parentheses at the end of parsing
652        self.check_balanced_parentheses()?;
653
654        Ok(result)
655    }
656
657    fn parse_select_statement_inner(&mut self) -> Result<SelectStatement, String> {
658        // Collect leading comments ONLY in PreserveComments mode
659        let leading_comments = if self.mode == ParserMode::PreserveComments {
660            self.collect_leading_comments()
661        } else {
662            vec![]
663        };
664
665        self.parse_select_statement_with_comments(leading_comments)
666    }
667
668    /// Parse SELECT statement without collecting leading comments
669    /// Used when comments were already collected (e.g., before WITH clause)
670    fn parse_select_statement_inner_no_comments(&mut self) -> Result<SelectStatement, String> {
671        self.parse_select_statement_with_comments(vec![])
672    }
673
674    /// Core SELECT parsing logic - takes pre-collected comments
675    fn parse_select_statement_with_comments(
676        &mut self,
677        leading_comments: Vec<Comment>,
678    ) -> Result<SelectStatement, String> {
679        self.consume(Token::Select)?;
680
681        // Check for DISTINCT keyword
682        let distinct = if matches!(self.current_token, Token::Distinct) {
683            self.advance();
684            true
685        } else {
686            false
687        };
688
689        // Parse SELECT items (supports computed expressions)
690        let select_items = self.parse_select_items()?;
691
692        // Create legacy columns vector for backward compatibility
693        let columns = select_items
694            .iter()
695            .map(|item| match item {
696                SelectItem::Star { .. } => "*".to_string(),
697                SelectItem::Column {
698                    column: col_ref, ..
699                } => col_ref.name.clone(),
700                SelectItem::Expression { alias, .. } => alias.clone(),
701            })
702            .collect();
703
704        // Parse INTO clause (for temporary tables) - comes immediately after SELECT items
705        let into_table = if matches!(self.current_token, Token::Into) {
706            self.advance();
707            Some(self.parse_into_clause()?)
708        } else {
709            None
710        };
711
712        // Parse FROM clause - can be a table name, subquery, or table function
713        let (from_table, from_subquery, from_function, from_alias) = if matches!(
714            self.current_token,
715            Token::From
716        ) {
717            self.advance();
718
719            // Check for table function like RANGE()
720            if let Token::Identifier(name) = &self.current_token.clone() {
721                // Check if this is a table function by consulting the registry
722                // We need to lookahead to see if there's a parenthesis to distinguish
723                // between a function call and a table with the same name
724                let has_paren = self.peek_token() == Some(Token::LeftParen);
725                if self.debug_trace {
726                    eprintln!(
727                        "  Checking {} for table function, has_paren={}",
728                        name, has_paren
729                    );
730                }
731
732                // Check if it's a known table function or generator
733                // In FROM clause context, prioritize generators over scalar functions
734                let is_table_function = if has_paren {
735                    // First check generator registry (for FROM clause context)
736                    if self.debug_trace {
737                        eprintln!("  Checking generator registry for {}", name.to_uppercase());
738                    }
739                    if let Some(_gen) = self.generator_registry.get(&name.to_uppercase()) {
740                        if self.debug_trace {
741                            eprintln!("  Found {} in generator registry", name);
742                        }
743                        self.trace_token(&format!("Found generator: {}", name));
744                        true
745                    } else {
746                        // Then check if it's a table function in the function registry
747                        if let Some(func) = self.function_registry.get(&name.to_uppercase()) {
748                            let sig = func.signature();
749                            let is_table_fn = sig.category == FunctionCategory::TableFunction;
750                            if self.debug_trace {
751                                eprintln!(
752                                    "  Found {} in function registry, is_table_function={}",
753                                    name, is_table_fn
754                                );
755                            }
756                            if is_table_fn {
757                                self.trace_token(&format!(
758                                    "Found table function in function registry: {}",
759                                    name
760                                ));
761                            }
762                            is_table_fn
763                        } else {
764                            if self.debug_trace {
765                                eprintln!("  {} not found in either registry", name);
766                                self.trace_token(&format!(
767                                    "Not found as generator or table function: {}",
768                                    name
769                                ));
770                            }
771                            false
772                        }
773                    }
774                } else {
775                    if self.debug_trace {
776                        eprintln!("  No parenthesis after {}, treating as table", name);
777                    }
778                    false
779                };
780
781                if is_table_function {
782                    // Parse table function
783                    let function_name = name.clone();
784                    self.advance(); // Skip function name
785
786                    // Parse arguments
787                    self.consume(Token::LeftParen)?;
788                    let args = self.parse_argument_list()?;
789                    self.consume(Token::RightParen)?;
790
791                    // Optional alias
792                    let alias = if matches!(self.current_token, Token::As) {
793                        self.advance();
794                        match &self.current_token {
795                            Token::Identifier(name) => {
796                                let alias = name.clone();
797                                self.advance();
798                                Some(alias)
799                            }
800                            token => {
801                                if let Some(keyword) = token.as_keyword_str() {
802                                    return Err(format!(
803                                            "Reserved keyword '{}' cannot be used as column alias. Use a different name or quote it with double quotes: \"{}\"",
804                                            keyword,
805                                            keyword.to_lowercase()
806                                        ));
807                                } else {
808                                    return Err("Expected alias name after AS".to_string());
809                                }
810                            }
811                        }
812                    } else if let Token::Identifier(name) = &self.current_token {
813                        let alias = name.clone();
814                        self.advance();
815                        Some(alias)
816                    } else {
817                        None
818                    };
819
820                    (
821                        None,
822                        None,
823                        Some(TableFunction::Generator {
824                            name: function_name,
825                            args,
826                        }),
827                        alias,
828                    )
829                } else {
830                    // Not a RANGE, SPLIT, or generator function, so it's a regular table name
831                    let table_name = name.clone();
832                    self.advance();
833
834                    // Check for optional alias
835                    let alias = self.parse_optional_alias()?;
836
837                    (Some(table_name), None, None, alias)
838                }
839            } else if matches!(self.current_token, Token::LeftParen) {
840                // Check for subquery: FROM (SELECT ...) or FROM (WITH ... SELECT ...)
841                self.advance();
842
843                // Parse the subquery - it might start with WITH
844                let subquery = if matches!(self.current_token, Token::With) {
845                    self.parse_with_clause_inner()?
846                } else {
847                    self.parse_select_statement_inner()?
848                };
849
850                self.consume(Token::RightParen)?;
851
852                // Subqueries must have an alias
853                let alias = if matches!(self.current_token, Token::As) {
854                    self.advance();
855                    match &self.current_token {
856                        Token::Identifier(name) => {
857                            let alias = name.clone();
858                            self.advance();
859                            alias
860                        }
861                        token => {
862                            if let Some(keyword) = token.as_keyword_str() {
863                                return Err(format!(
864                                        "Reserved keyword '{}' cannot be used as subquery alias. Use a different name or quote it with double quotes: \"{}\"",
865                                        keyword,
866                                        keyword.to_lowercase()
867                                    ));
868                            } else {
869                                return Err("Expected alias name after AS".to_string());
870                            }
871                        }
872                    }
873                } else {
874                    // AS is optional, but alias is required
875                    match &self.current_token {
876                        Token::Identifier(name) => {
877                            let alias = name.clone();
878                            self.advance();
879                            alias
880                        }
881                        _ => {
882                            return Err(
883                                "Subquery in FROM must have an alias (e.g., AS t)".to_string()
884                            )
885                        }
886                    }
887                };
888
889                (None, Some(Box::new(subquery)), None, Some(alias))
890            } else {
891                // Regular table name
892                match &self.current_token {
893                    Token::Identifier(table) => {
894                        let table_name = table.clone();
895                        self.advance();
896
897                        // Check for optional alias
898                        let alias = self.parse_optional_alias()?;
899
900                        (Some(table_name), None, None, alias)
901                    }
902                    Token::QuotedIdentifier(table) => {
903                        // Handle quoted table names
904                        let table_name = table.clone();
905                        self.advance();
906
907                        // Check for optional alias
908                        let alias = self.parse_optional_alias()?;
909
910                        (Some(table_name), None, None, alias)
911                    }
912                    _ => return Err("Expected table name or subquery after FROM".to_string()),
913                }
914            }
915        } else {
916            (None, None, None, None)
917        };
918
919        // Parse JOIN clauses
920        let mut joins = Vec::new();
921        while self.is_join_token() {
922            joins.push(self.parse_join_clause()?);
923        }
924
925        let where_clause = if matches!(self.current_token, Token::Where) {
926            self.advance();
927            Some(self.parse_where_clause()?)
928        } else {
929            None
930        };
931
932        let group_by = if matches!(self.current_token, Token::GroupBy) {
933            self.advance();
934            // Parse expressions instead of just identifiers for GROUP BY
935            // This allows GROUP BY TIME_BUCKET(...), CASE ..., etc.
936            Some(self.parse_expression_list()?)
937        } else {
938            None
939        };
940
941        // Parse HAVING clause (must come after GROUP BY)
942        let having = if matches!(self.current_token, Token::Having) {
943            if group_by.is_none() {
944                return Err("HAVING clause requires GROUP BY".to_string());
945            }
946            self.advance();
947            let having_expr = self.parse_expression()?;
948
949            // Check if HAVING contains aggregate functions (not supported - use aliases instead)
950            if Self::contains_aggregate_function(&having_expr) {
951                return Err(
952                    "HAVING clause with aggregate functions is not supported. \
953                    Use an alias in SELECT for the aggregate and reference it in HAVING.\n\
954                    Example: SELECT trader, COUNT(*) as trade_count FROM trades GROUP BY trader HAVING trade_count > 1"
955                    .to_string()
956                );
957            }
958
959            Some(having_expr)
960        } else {
961            None
962        };
963
964        // Parse ORDER BY clause (comes after GROUP BY and HAVING)
965        let order_by = if matches!(self.current_token, Token::OrderBy) {
966            self.trace_token("Found OrderBy token");
967            self.advance();
968            Some(self.parse_order_by_list()?)
969        } else if let Token::Identifier(s) = &self.current_token {
970            // This shouldn't happen if the lexer properly tokenizes ORDER BY
971            // But keeping as fallback for compatibility
972            if Self::is_identifier_reserved(s) && s.to_uppercase() == "ORDER" {
973                self.trace_token("Warning: ORDER as identifier instead of OrderBy token");
974                self.advance(); // consume ORDER
975                if matches!(&self.current_token, Token::By) {
976                    self.advance(); // consume BY
977                    Some(self.parse_order_by_list()?)
978                } else {
979                    return Err("Expected BY after ORDER".to_string());
980                }
981            } else {
982                None
983            }
984        } else {
985            None
986        };
987
988        // Parse LIMIT clause
989        let limit = if matches!(self.current_token, Token::Limit) {
990            self.advance();
991            match &self.current_token {
992                Token::NumberLiteral(num) => {
993                    let limit_val = num
994                        .parse::<usize>()
995                        .map_err(|_| format!("Invalid LIMIT value: {num}"))?;
996                    self.advance();
997                    Some(limit_val)
998                }
999                _ => return Err("Expected number after LIMIT".to_string()),
1000            }
1001        } else {
1002            None
1003        };
1004
1005        // Parse OFFSET clause
1006        let offset = if matches!(self.current_token, Token::Offset) {
1007            self.advance();
1008            match &self.current_token {
1009                Token::NumberLiteral(num) => {
1010                    let offset_val = num
1011                        .parse::<usize>()
1012                        .map_err(|_| format!("Invalid OFFSET value: {num}"))?;
1013                    self.advance();
1014                    Some(offset_val)
1015                }
1016                _ => return Err("Expected number after OFFSET".to_string()),
1017            }
1018        } else {
1019            None
1020        };
1021
1022        // Parse INTO clause (alternative position - SQL Server also supports INTO after all clauses)
1023        // This handles: SELECT * FROM table WHERE x > 5 INTO #temp
1024        // If INTO was already parsed after SELECT, this will be None (can't have two INTOs)
1025        let into_table = if into_table.is_none() && matches!(self.current_token, Token::Into) {
1026            self.advance();
1027            Some(self.parse_into_clause()?)
1028        } else {
1029            into_table // Keep the one from after SELECT if it exists
1030        };
1031
1032        // Parse UNION/INTERSECT/EXCEPT operations
1033        let set_operations = self.parse_set_operations()?;
1034
1035        // Collect trailing comment ONLY in PreserveComments mode
1036        let trailing_comment = if self.mode == ParserMode::PreserveComments {
1037            self.collect_trailing_comment()
1038        } else {
1039            None
1040        };
1041
1042        Ok(SelectStatement {
1043            distinct,
1044            columns,
1045            select_items,
1046            from_table,
1047            from_subquery,
1048            from_function,
1049            from_alias,
1050            joins,
1051            where_clause,
1052            order_by,
1053            group_by,
1054            having,
1055            limit,
1056            offset,
1057            ctes: Vec::new(), // Will be populated by WITH clause parser
1058            into_table,
1059            set_operations,
1060            leading_comments,
1061            trailing_comment,
1062        })
1063    }
1064
1065    /// Parse UNION/INTERSECT/EXCEPT operations
1066    /// Returns a vector of (operation, select_statement) pairs
1067    fn parse_set_operations(
1068        &mut self,
1069    ) -> Result<Vec<(SetOperation, Box<SelectStatement>)>, String> {
1070        let mut operations = Vec::new();
1071
1072        while matches!(
1073            self.current_token,
1074            Token::Union | Token::Intersect | Token::Except
1075        ) {
1076            // Determine the operation type
1077            let operation = match &self.current_token {
1078                Token::Union => {
1079                    self.advance();
1080                    // Check for ALL keyword
1081                    if let Token::Identifier(id) = &self.current_token {
1082                        if id.to_uppercase() == "ALL" {
1083                            self.advance();
1084                            SetOperation::UnionAll
1085                        } else {
1086                            SetOperation::Union
1087                        }
1088                    } else {
1089                        SetOperation::Union
1090                    }
1091                }
1092                Token::Intersect => {
1093                    self.advance();
1094                    SetOperation::Intersect
1095                }
1096                Token::Except => {
1097                    self.advance();
1098                    SetOperation::Except
1099                }
1100                _ => unreachable!(),
1101            };
1102
1103            // Parse the next SELECT statement
1104            let next_select = self.parse_select_statement_inner()?;
1105
1106            operations.push((operation, Box::new(next_select)));
1107        }
1108
1109        Ok(operations)
1110    }
1111
1112    /// Parse SELECT items that support computed expressions with aliases
1113    fn parse_select_items(&mut self) -> Result<Vec<SelectItem>, String> {
1114        let mut items = Vec::new();
1115
1116        loop {
1117            // Check for qualified star (table.*) or unqualified star (*)
1118            // First check if we have identifier.* pattern
1119            if let Token::Identifier(name) = &self.current_token.clone() {
1120                // Peek ahead to check for .* pattern
1121                let saved_pos = self.lexer.clone();
1122                let saved_token = self.current_token.clone();
1123                let table_name = name.clone();
1124
1125                self.advance();
1126
1127                if matches!(self.current_token, Token::Dot) {
1128                    self.advance();
1129                    if matches!(self.current_token, Token::Star) {
1130                        // This is table.* pattern
1131                        items.push(SelectItem::Star {
1132                            table_prefix: Some(table_name),
1133                            leading_comments: vec![],
1134                            trailing_comment: None,
1135                        });
1136                        self.advance();
1137
1138                        // Continue to next item or end
1139                        if matches!(self.current_token, Token::Comma) {
1140                            self.advance();
1141                            continue;
1142                        } else {
1143                            break;
1144                        }
1145                    }
1146                }
1147
1148                // Not table.*, restore position and continue with normal parsing
1149                self.lexer = saved_pos;
1150                self.current_token = saved_token;
1151            }
1152
1153            // Check for unqualified *
1154            if matches!(self.current_token, Token::Star) {
1155                items.push(SelectItem::Star {
1156                    table_prefix: None,
1157                    leading_comments: vec![],
1158                    trailing_comment: None,
1159                });
1160                self.advance();
1161            } else {
1162                // Parse expression or column
1163                let expr = self.parse_comparison()?; // Use comparison to support IS NULL and other comparisons
1164
1165                // Check for AS alias
1166                let alias = if matches!(self.current_token, Token::As) {
1167                    self.advance();
1168                    match &self.current_token {
1169                        Token::Identifier(alias_name) => {
1170                            let alias = alias_name.clone();
1171                            self.advance();
1172                            alias
1173                        }
1174                        Token::QuotedIdentifier(alias_name) => {
1175                            let alias = alias_name.clone();
1176                            self.advance();
1177                            alias
1178                        }
1179                        token => {
1180                            if let Some(keyword) = token.as_keyword_str() {
1181                                return Err(format!(
1182                                    "Reserved keyword '{}' cannot be used as column alias. Use a different name or quote it with double quotes: \"{}\"",
1183                                    keyword,
1184                                    keyword.to_lowercase()
1185                                ));
1186                            } else {
1187                                return Err("Expected alias name after AS".to_string());
1188                            }
1189                        }
1190                    }
1191                } else {
1192                    // Generate default alias based on expression
1193                    match &expr {
1194                        SqlExpression::Column(col_ref) => col_ref.name.clone(),
1195                        _ => format!("expr_{}", items.len() + 1), // Default alias for computed expressions
1196                    }
1197                };
1198
1199                // Create SelectItem based on expression type
1200                let item = match expr {
1201                    SqlExpression::Column(col_ref) if alias == col_ref.name => {
1202                        // Simple column reference without alias
1203                        SelectItem::Column {
1204                            column: col_ref,
1205                            leading_comments: vec![],
1206                            trailing_comment: None,
1207                        }
1208                    }
1209                    _ => {
1210                        // Computed expression or column with different alias
1211                        SelectItem::Expression {
1212                            expr,
1213                            alias,
1214                            leading_comments: vec![],
1215                            trailing_comment: None,
1216                        }
1217                    }
1218                };
1219
1220                items.push(item);
1221            }
1222
1223            // Check for comma to continue
1224            if matches!(self.current_token, Token::Comma) {
1225                self.advance();
1226            } else {
1227                break;
1228            }
1229        }
1230
1231        Ok(items)
1232    }
1233
1234    fn parse_identifier_list(&mut self) -> Result<Vec<String>, String> {
1235        let mut identifiers = Vec::new();
1236
1237        loop {
1238            match &self.current_token {
1239                Token::Identifier(id) => {
1240                    // Check if this is a reserved keyword that should stop identifier parsing
1241                    if Self::is_identifier_reserved(id) {
1242                        // Stop parsing identifiers if we hit a reserved keyword
1243                        break;
1244                    }
1245                    identifiers.push(id.clone());
1246                    self.advance();
1247                }
1248                Token::QuotedIdentifier(id) => {
1249                    // Handle quoted identifiers like "Customer Id"
1250                    identifiers.push(id.clone());
1251                    self.advance();
1252                }
1253                _ => {
1254                    // Stop parsing if we hit any other token type
1255                    break;
1256                }
1257            }
1258
1259            if matches!(self.current_token, Token::Comma) {
1260                self.advance();
1261            } else {
1262                break;
1263            }
1264        }
1265
1266        if identifiers.is_empty() {
1267            return Err("Expected at least one identifier".to_string());
1268        }
1269
1270        Ok(identifiers)
1271    }
1272
1273    fn parse_window_spec(&mut self) -> Result<WindowSpec, String> {
1274        let mut partition_by = Vec::new();
1275        let mut order_by = Vec::new();
1276
1277        // Check for PARTITION BY
1278        if matches!(self.current_token, Token::Partition) {
1279            self.advance(); // consume PARTITION
1280            if !matches!(self.current_token, Token::By) {
1281                return Err("Expected BY after PARTITION".to_string());
1282            }
1283            self.advance(); // consume BY
1284
1285            // Parse partition columns
1286            partition_by = self.parse_identifier_list()?;
1287        }
1288
1289        // Check for ORDER BY
1290        if matches!(self.current_token, Token::OrderBy) {
1291            self.advance(); // consume ORDER BY (as single token)
1292            order_by = self.parse_order_by_list()?;
1293        } else if let Token::Identifier(s) = &self.current_token {
1294            if Self::is_identifier_reserved(s) && s.to_uppercase() == "ORDER" {
1295                // Handle ORDER BY as two tokens
1296                self.advance(); // consume ORDER
1297                if !matches!(self.current_token, Token::By) {
1298                    return Err("Expected BY after ORDER".to_string());
1299                }
1300                self.advance(); // consume BY
1301                order_by = self.parse_order_by_list()?;
1302            }
1303        }
1304
1305        // Parse optional window frame (ROWS/RANGE BETWEEN ... AND ...)
1306        let frame = self.parse_window_frame()?;
1307
1308        Ok(WindowSpec {
1309            partition_by,
1310            order_by,
1311            frame,
1312        })
1313    }
1314
1315    fn parse_order_by_list(&mut self) -> Result<Vec<OrderByColumn>, String> {
1316        let mut order_columns = Vec::new();
1317
1318        loop {
1319            let column = match &self.current_token {
1320                Token::Identifier(id) => {
1321                    let col = id.clone();
1322                    self.advance();
1323
1324                    // Check for qualified column name (table.column)
1325                    if matches!(self.current_token, Token::Dot) {
1326                        self.advance();
1327                        match &self.current_token {
1328                            Token::Identifier(col_name) => {
1329                                let mut qualified = col;
1330                                qualified.push('.');
1331                                qualified.push_str(col_name);
1332                                self.advance();
1333                                qualified
1334                            }
1335                            _ => return Err("Expected column name after '.'".to_string()),
1336                        }
1337                    } else {
1338                        col
1339                    }
1340                }
1341                Token::QuotedIdentifier(id) => {
1342                    let col = id.clone();
1343                    self.advance();
1344                    col
1345                }
1346                Token::NumberLiteral(num) if self.columns.iter().any(|col| col == num) => {
1347                    // Support numeric column names like "202204"
1348                    let col = num.clone();
1349                    self.advance();
1350                    col
1351                }
1352                // Handle window keywords that can be column names
1353                Token::Row => {
1354                    self.advance();
1355                    "row".to_string()
1356                }
1357                Token::Rows => {
1358                    self.advance();
1359                    "rows".to_string()
1360                }
1361                Token::Range => {
1362                    self.advance();
1363                    "range".to_string()
1364                }
1365                _ => return Err("Expected column name in ORDER BY".to_string()),
1366            };
1367
1368            // Check for ASC/DESC
1369            let direction = match &self.current_token {
1370                Token::Asc => {
1371                    self.advance();
1372                    SortDirection::Asc
1373                }
1374                Token::Desc => {
1375                    self.advance();
1376                    SortDirection::Desc
1377                }
1378                _ => SortDirection::Asc, // Default to ASC if not specified
1379            };
1380
1381            order_columns.push(OrderByColumn { column, direction });
1382
1383            if matches!(self.current_token, Token::Comma) {
1384                self.advance();
1385            } else {
1386                break;
1387            }
1388        }
1389
1390        Ok(order_columns)
1391    }
1392
1393    /// Parse INTO clause for temporary tables
1394    /// Syntax: INTO #table_name
1395    fn parse_into_clause(&mut self) -> Result<IntoTable, String> {
1396        // Expect an identifier starting with #
1397        let name = match &self.current_token {
1398            Token::Identifier(id) if id.starts_with('#') => {
1399                let table_name = id.clone();
1400                self.advance();
1401                table_name
1402            }
1403            Token::Identifier(id) => {
1404                return Err(format!(
1405                    "Temporary table name must start with #, got: {}",
1406                    id
1407                ));
1408            }
1409            _ => {
1410                return Err(
1411                    "Expected temporary table name (starting with #) after INTO".to_string()
1412                );
1413            }
1414        };
1415
1416        Ok(IntoTable { name })
1417    }
1418
1419    fn parse_window_frame(&mut self) -> Result<Option<WindowFrame>, String> {
1420        // Check for ROWS or RANGE keyword
1421        let unit = match &self.current_token {
1422            Token::Rows => {
1423                self.advance();
1424                FrameUnit::Rows
1425            }
1426            Token::Identifier(id) if id.to_uppercase() == "RANGE" => {
1427                // RANGE as window frame unit
1428                self.advance();
1429                FrameUnit::Range
1430            }
1431            _ => return Ok(None), // No window frame specified
1432        };
1433
1434        // Check for BETWEEN or just a single bound
1435        let (start, end) = if let Token::Between = &self.current_token {
1436            self.advance(); // consume BETWEEN
1437                            // Parse start bound
1438            let start = self.parse_frame_bound()?;
1439
1440            // Expect AND
1441            if !matches!(&self.current_token, Token::And) {
1442                return Err("Expected AND after window frame start bound".to_string());
1443            }
1444            self.advance();
1445
1446            // Parse end bound
1447            let end = self.parse_frame_bound()?;
1448            (start, Some(end))
1449        } else {
1450            // Single bound (e.g., "ROWS 5 PRECEDING")
1451            let bound = self.parse_frame_bound()?;
1452            (bound, None)
1453        };
1454
1455        Ok(Some(WindowFrame { unit, start, end }))
1456    }
1457
1458    fn parse_frame_bound(&mut self) -> Result<FrameBound, String> {
1459        match &self.current_token {
1460            Token::Unbounded => {
1461                self.advance();
1462                match &self.current_token {
1463                    Token::Preceding => {
1464                        self.advance();
1465                        Ok(FrameBound::UnboundedPreceding)
1466                    }
1467                    Token::Following => {
1468                        self.advance();
1469                        Ok(FrameBound::UnboundedFollowing)
1470                    }
1471                    _ => Err("Expected PRECEDING or FOLLOWING after UNBOUNDED".to_string()),
1472                }
1473            }
1474            Token::Current => {
1475                self.advance();
1476                if matches!(&self.current_token, Token::Row) {
1477                    self.advance();
1478                    return Ok(FrameBound::CurrentRow);
1479                }
1480                Err("Expected ROW after CURRENT".to_string())
1481            }
1482            Token::NumberLiteral(num) => {
1483                let n: i64 = num
1484                    .parse()
1485                    .map_err(|_| "Invalid number in window frame".to_string())?;
1486                self.advance();
1487                match &self.current_token {
1488                    Token::Preceding => {
1489                        self.advance();
1490                        Ok(FrameBound::Preceding(n))
1491                    }
1492                    Token::Following => {
1493                        self.advance();
1494                        Ok(FrameBound::Following(n))
1495                    }
1496                    _ => Err("Expected PRECEDING or FOLLOWING after number".to_string()),
1497                }
1498            }
1499            _ => Err("Invalid window frame bound".to_string()),
1500        }
1501    }
1502
1503    fn parse_where_clause(&mut self) -> Result<WhereClause, String> {
1504        // Parse the entire WHERE clause as a single expression tree
1505        // The logical operators (AND/OR) are now handled within parse_expression
1506        let expr = self.parse_expression()?;
1507
1508        // Check for unexpected closing parenthesis
1509        if matches!(self.current_token, Token::RightParen) && self.paren_depth <= 0 {
1510            return Err(
1511                "Unexpected closing parenthesis - no matching opening parenthesis".to_string(),
1512            );
1513        }
1514
1515        // Create a single condition with the entire expression
1516        let conditions = vec![Condition {
1517            expr,
1518            connector: None,
1519        }];
1520
1521        Ok(WhereClause { conditions })
1522    }
1523
1524    fn parse_expression(&mut self) -> Result<SqlExpression, String> {
1525        self.trace_enter("parse_expression");
1526        // Start with logical OR as the lowest precedence operator
1527        // The hierarchy is: OR -> AND -> comparison -> additive -> multiplicative -> primary
1528        let mut left = self.parse_logical_or()?;
1529
1530        // Handle IN operator (not preceded by NOT)
1531        // This uses the modular comparison module
1532        left = parse_in_operator(self, left)?;
1533
1534        let result = Ok(left);
1535        self.trace_exit("parse_expression", &result);
1536        result
1537    }
1538
1539    fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
1540        // Use the new modular comparison expression parser
1541        parse_comparison_expr(self)
1542    }
1543
1544    fn parse_additive(&mut self) -> Result<SqlExpression, String> {
1545        // Use the new modular arithmetic expression parser
1546        parse_additive_expr(self)
1547    }
1548
1549    fn parse_multiplicative(&mut self) -> Result<SqlExpression, String> {
1550        // Use the new modular arithmetic expression parser
1551        parse_multiplicative_expr(self)
1552    }
1553
1554    fn parse_logical_or(&mut self) -> Result<SqlExpression, String> {
1555        // Use the new modular logical expression parser
1556        parse_logical_or_expr(self)
1557    }
1558
1559    fn parse_logical_and(&mut self) -> Result<SqlExpression, String> {
1560        // Use the new modular logical expression parser
1561        parse_logical_and_expr(self)
1562    }
1563
1564    fn parse_case_expression(&mut self) -> Result<SqlExpression, String> {
1565        // Use the new modular CASE expression parser
1566        parse_case_expr(self)
1567    }
1568
1569    fn parse_primary(&mut self) -> Result<SqlExpression, String> {
1570        // Use the new modular primary expression parser
1571        // Clone the necessary data to avoid borrowing issues
1572        let columns = self.columns.clone();
1573        let in_method_args = self.in_method_args;
1574        let ctx = PrimaryExpressionContext {
1575            columns: &columns,
1576            in_method_args,
1577        };
1578        parse_primary_expr(self, &ctx)
1579    }
1580
1581    // Keep the old implementation temporarily for reference (will be removed)
1582    fn parse_method_args(&mut self) -> Result<Vec<SqlExpression>, String> {
1583        // Set flag to indicate we're parsing method arguments
1584        self.in_method_args = true;
1585
1586        let args = self.parse_argument_list()?;
1587
1588        // Clear the flag
1589        self.in_method_args = false;
1590
1591        Ok(args)
1592    }
1593
1594    fn parse_function_args(&mut self) -> Result<(Vec<SqlExpression>, bool), String> {
1595        let mut args = Vec::new();
1596        let mut has_distinct = false;
1597
1598        if !matches!(self.current_token, Token::RightParen) {
1599            // Check if first argument starts with DISTINCT
1600            if matches!(self.current_token, Token::Distinct) {
1601                self.advance(); // consume DISTINCT
1602                has_distinct = true;
1603            }
1604
1605            // Parse the expression (either after DISTINCT or directly)
1606            args.push(self.parse_additive()?);
1607
1608            // Parse any remaining arguments (DISTINCT only applies to first arg for aggregates)
1609            while matches!(self.current_token, Token::Comma) {
1610                self.advance();
1611                args.push(self.parse_additive()?);
1612            }
1613        }
1614
1615        Ok((args, has_distinct))
1616    }
1617
1618    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
1619        let mut expressions = Vec::new();
1620
1621        loop {
1622            expressions.push(self.parse_expression()?);
1623
1624            if matches!(self.current_token, Token::Comma) {
1625                self.advance();
1626            } else {
1627                break;
1628            }
1629        }
1630
1631        Ok(expressions)
1632    }
1633
1634    #[must_use]
1635    pub fn get_position(&self) -> usize {
1636        self.lexer.get_position()
1637    }
1638
1639    // Check if current token is a JOIN-related token
1640    fn is_join_token(&self) -> bool {
1641        matches!(
1642            self.current_token,
1643            Token::Join | Token::Inner | Token::Left | Token::Right | Token::Full | Token::Cross
1644        )
1645    }
1646
1647    // Parse a JOIN clause
1648    fn parse_join_clause(&mut self) -> Result<JoinClause, String> {
1649        // Determine join type
1650        let join_type = match &self.current_token {
1651            Token::Join => {
1652                self.advance();
1653                JoinType::Inner // Default JOIN is INNER JOIN
1654            }
1655            Token::Inner => {
1656                self.advance();
1657                if !matches!(self.current_token, Token::Join) {
1658                    return Err("Expected JOIN after INNER".to_string());
1659                }
1660                self.advance();
1661                JoinType::Inner
1662            }
1663            Token::Left => {
1664                self.advance();
1665                // Handle optional OUTER keyword
1666                if matches!(self.current_token, Token::Outer) {
1667                    self.advance();
1668                }
1669                if !matches!(self.current_token, Token::Join) {
1670                    return Err("Expected JOIN after LEFT".to_string());
1671                }
1672                self.advance();
1673                JoinType::Left
1674            }
1675            Token::Right => {
1676                self.advance();
1677                // Handle optional OUTER keyword
1678                if matches!(self.current_token, Token::Outer) {
1679                    self.advance();
1680                }
1681                if !matches!(self.current_token, Token::Join) {
1682                    return Err("Expected JOIN after RIGHT".to_string());
1683                }
1684                self.advance();
1685                JoinType::Right
1686            }
1687            Token::Full => {
1688                self.advance();
1689                // Handle optional OUTER keyword
1690                if matches!(self.current_token, Token::Outer) {
1691                    self.advance();
1692                }
1693                if !matches!(self.current_token, Token::Join) {
1694                    return Err("Expected JOIN after FULL".to_string());
1695                }
1696                self.advance();
1697                JoinType::Full
1698            }
1699            Token::Cross => {
1700                self.advance();
1701                if !matches!(self.current_token, Token::Join) {
1702                    return Err("Expected JOIN after CROSS".to_string());
1703                }
1704                self.advance();
1705                JoinType::Cross
1706            }
1707            _ => return Err("Expected JOIN keyword".to_string()),
1708        };
1709
1710        // Parse the table being joined
1711        let (table, alias) = self.parse_join_table_source()?;
1712
1713        // Parse ON condition (required for all joins except CROSS JOIN)
1714        let condition = if join_type == JoinType::Cross {
1715            // CROSS JOIN doesn't have ON condition - create empty condition
1716            JoinCondition { conditions: vec![] }
1717        } else {
1718            if !matches!(self.current_token, Token::On) {
1719                return Err("Expected ON keyword after JOIN table".to_string());
1720            }
1721            self.advance();
1722            self.parse_join_condition()?
1723        };
1724
1725        Ok(JoinClause {
1726            join_type,
1727            table,
1728            alias,
1729            condition,
1730        })
1731    }
1732
1733    fn parse_join_table_source(&mut self) -> Result<(TableSource, Option<String>), String> {
1734        let table = match &self.current_token {
1735            Token::Identifier(name) => {
1736                let table_name = name.clone();
1737                self.advance();
1738                TableSource::Table(table_name)
1739            }
1740            Token::LeftParen => {
1741                // Subquery as table source
1742                self.advance();
1743                let subquery = self.parse_select_statement_inner()?;
1744                if !matches!(self.current_token, Token::RightParen) {
1745                    return Err("Expected ')' after subquery".to_string());
1746                }
1747                self.advance();
1748
1749                // Subqueries must have an alias
1750                let alias = match &self.current_token {
1751                    Token::Identifier(alias_name) => {
1752                        let alias = alias_name.clone();
1753                        self.advance();
1754                        alias
1755                    }
1756                    Token::As => {
1757                        self.advance();
1758                        match &self.current_token {
1759                            Token::Identifier(alias_name) => {
1760                                let alias = alias_name.clone();
1761                                self.advance();
1762                                alias
1763                            }
1764                            _ => return Err("Expected alias after AS keyword".to_string()),
1765                        }
1766                    }
1767                    _ => return Err("Subqueries must have an alias".to_string()),
1768                };
1769
1770                return Ok((
1771                    TableSource::DerivedTable {
1772                        query: Box::new(subquery),
1773                        alias: alias.clone(),
1774                    },
1775                    Some(alias),
1776                ));
1777            }
1778            _ => return Err("Expected table name or subquery in JOIN clause".to_string()),
1779        };
1780
1781        // Check for optional alias
1782        let alias = match &self.current_token {
1783            Token::Identifier(alias_name) => {
1784                let alias = alias_name.clone();
1785                self.advance();
1786                Some(alias)
1787            }
1788            Token::As => {
1789                self.advance();
1790                match &self.current_token {
1791                    Token::Identifier(alias_name) => {
1792                        let alias = alias_name.clone();
1793                        self.advance();
1794                        Some(alias)
1795                    }
1796                    _ => return Err("Expected alias after AS keyword".to_string()),
1797                }
1798            }
1799            _ => None,
1800        };
1801
1802        Ok((table, alias))
1803    }
1804
1805    fn parse_join_condition(&mut self) -> Result<JoinCondition, String> {
1806        let mut conditions = Vec::new();
1807
1808        // Parse first condition
1809        conditions.push(self.parse_single_join_condition()?);
1810
1811        // Parse additional conditions connected by AND
1812        while matches!(self.current_token, Token::And) {
1813            self.advance(); // consume AND
1814            conditions.push(self.parse_single_join_condition()?);
1815        }
1816
1817        Ok(JoinCondition { conditions })
1818    }
1819
1820    fn parse_single_join_condition(&mut self) -> Result<SingleJoinCondition, String> {
1821        // Parse left side as additive expression (stops before comparison operators)
1822        // This allows the comparison operator to be explicitly parsed by this function
1823        let left_expr = self.parse_additive()?;
1824
1825        // Parse operator
1826        let operator = match &self.current_token {
1827            Token::Equal => JoinOperator::Equal,
1828            Token::NotEqual => JoinOperator::NotEqual,
1829            Token::LessThan => JoinOperator::LessThan,
1830            Token::LessThanOrEqual => JoinOperator::LessThanOrEqual,
1831            Token::GreaterThan => JoinOperator::GreaterThan,
1832            Token::GreaterThanOrEqual => JoinOperator::GreaterThanOrEqual,
1833            _ => return Err("Expected comparison operator in JOIN condition".to_string()),
1834        };
1835        self.advance();
1836
1837        // Parse right side as additive expression (stops before comparison operators)
1838        let right_expr = self.parse_additive()?;
1839
1840        Ok(SingleJoinCondition {
1841            left_expr,
1842            operator,
1843            right_expr,
1844        })
1845    }
1846
1847    fn parse_column_reference(&mut self) -> Result<String, String> {
1848        match &self.current_token {
1849            Token::Identifier(name) => {
1850                let mut column_ref = name.clone();
1851                self.advance();
1852
1853                // Check for table.column notation
1854                if matches!(self.current_token, Token::Dot) {
1855                    self.advance();
1856                    match &self.current_token {
1857                        Token::Identifier(col_name) => {
1858                            column_ref.push('.');
1859                            column_ref.push_str(col_name);
1860                            self.advance();
1861                        }
1862                        _ => return Err("Expected column name after '.'".to_string()),
1863                    }
1864                }
1865
1866                Ok(column_ref)
1867            }
1868            _ => Err("Expected column reference".to_string()),
1869        }
1870    }
1871}
1872
1873// Context detection for cursor position
1874#[derive(Debug, Clone)]
1875pub enum CursorContext {
1876    SelectClause,
1877    FromClause,
1878    WhereClause,
1879    OrderByClause,
1880    AfterColumn(String),
1881    AfterLogicalOp(LogicalOp),
1882    AfterComparisonOp(String, String), // column_name, operator
1883    InMethodCall(String, String),      // object, method
1884    InExpression,
1885    Unknown,
1886}
1887
1888/// Safe UTF-8 string slicing that ensures we don't slice in the middle of a character
1889fn safe_slice_to(s: &str, pos: usize) -> &str {
1890    if pos >= s.len() {
1891        return s;
1892    }
1893
1894    // Find the nearest valid character boundary at or before pos
1895    let mut safe_pos = pos;
1896    while safe_pos > 0 && !s.is_char_boundary(safe_pos) {
1897        safe_pos -= 1;
1898    }
1899
1900    &s[..safe_pos]
1901}
1902
1903/// Safe UTF-8 string slicing from a position to the end
1904fn safe_slice_from(s: &str, pos: usize) -> &str {
1905    if pos >= s.len() {
1906        return "";
1907    }
1908
1909    // Find the nearest valid character boundary at or after pos
1910    let mut safe_pos = pos;
1911    while safe_pos < s.len() && !s.is_char_boundary(safe_pos) {
1912        safe_pos += 1;
1913    }
1914
1915    &s[safe_pos..]
1916}
1917
1918#[must_use]
1919pub fn detect_cursor_context(query: &str, cursor_pos: usize) -> (CursorContext, Option<String>) {
1920    let truncated = safe_slice_to(query, cursor_pos);
1921    let mut parser = Parser::new(truncated);
1922
1923    // Try to parse as much as possible
1924    if let Ok(stmt) = parser.parse() {
1925        let (ctx, partial) = analyze_statement(&stmt, truncated, cursor_pos);
1926        #[cfg(test)]
1927        println!("analyze_statement returned: {ctx:?}, {partial:?} for query: '{truncated}'");
1928        (ctx, partial)
1929    } else {
1930        // Partial parse - analyze what we have
1931        let (ctx, partial) = analyze_partial(truncated, cursor_pos);
1932        #[cfg(test)]
1933        println!("analyze_partial returned: {ctx:?}, {partial:?} for query: '{truncated}'");
1934        (ctx, partial)
1935    }
1936}
1937
1938#[must_use]
1939pub fn tokenize_query(query: &str) -> Vec<String> {
1940    let mut lexer = Lexer::new(query);
1941    let tokens = lexer.tokenize_all();
1942    tokens.iter().map(|t| format!("{t:?}")).collect()
1943}
1944
1945#[must_use]
1946/// Helper function to find the start of a quoted string searching backwards
1947fn find_quote_start(bytes: &[u8], mut pos: usize) -> Option<usize> {
1948    // Skip the closing quote and search backwards
1949    if pos > 0 {
1950        pos -= 1;
1951        while pos > 0 {
1952            if bytes[pos] == b'"' {
1953                // Check if it's not an escaped quote
1954                if pos == 0 || bytes[pos - 1] != b'\\' {
1955                    return Some(pos);
1956                }
1957            }
1958            pos -= 1;
1959        }
1960        // Check position 0 separately
1961        if bytes[0] == b'"' {
1962            return Some(0);
1963        }
1964    }
1965    None
1966}
1967
1968/// Helper function to handle method call context after validation
1969fn handle_method_call_context(col_name: &str, after_dot: &str) -> (CursorContext, Option<String>) {
1970    // Check if there's a partial method name after the dot
1971    let partial_method = if after_dot.is_empty() {
1972        None
1973    } else if after_dot.chars().all(|c| c.is_alphanumeric() || c == '_') {
1974        Some(after_dot.to_string())
1975    } else {
1976        None
1977    };
1978
1979    // For AfterColumn context, strip quotes if present for consistency
1980    let col_name_for_context =
1981        if col_name.starts_with('"') && col_name.ends_with('"') && col_name.len() > 2 {
1982            col_name[1..col_name.len() - 1].to_string()
1983        } else {
1984            col_name.to_string()
1985        };
1986
1987    (
1988        CursorContext::AfterColumn(col_name_for_context),
1989        partial_method,
1990    )
1991}
1992
1993/// Helper function to check if we're after a comparison operator
1994fn check_after_comparison_operator(query: &str) -> Option<(CursorContext, Option<String>)> {
1995    for op in &Parser::COMPARISON_OPERATORS {
1996        if let Some(op_pos) = query.rfind(op) {
1997            let before_op = safe_slice_to(query, op_pos);
1998            let after_op_start = op_pos + op.len();
1999            let after_op = if after_op_start < query.len() {
2000                &query[after_op_start..]
2001            } else {
2002                ""
2003            };
2004
2005            // Check if we have a column name before the operator
2006            if let Some(col_name) = before_op.split_whitespace().last() {
2007                if col_name.chars().all(|c| c.is_alphanumeric() || c == '_') {
2008                    // Check if we're at or near the end of the query
2009                    let after_op_trimmed = after_op.trim();
2010                    if after_op_trimmed.is_empty()
2011                        || (after_op_trimmed
2012                            .chars()
2013                            .all(|c| c.is_alphanumeric() || c == '_')
2014                            && !after_op_trimmed.contains('('))
2015                    {
2016                        let partial = if after_op_trimmed.is_empty() {
2017                            None
2018                        } else {
2019                            Some(after_op_trimmed.to_string())
2020                        };
2021                        return Some((
2022                            CursorContext::AfterComparisonOp(
2023                                col_name.to_string(),
2024                                op.trim().to_string(),
2025                            ),
2026                            partial,
2027                        ));
2028                    }
2029                }
2030            }
2031        }
2032    }
2033    None
2034}
2035
2036fn analyze_statement(
2037    stmt: &SelectStatement,
2038    query: &str,
2039    _cursor_pos: usize,
2040) -> (CursorContext, Option<String>) {
2041    // First check for method call context (e.g., "columnName." or "columnName.Con")
2042    let trimmed = query.trim();
2043
2044    // Check if we're after a comparison operator (e.g., "createdDate > ")
2045    if let Some(result) = check_after_comparison_operator(query) {
2046        return result;
2047    }
2048
2049    // First check if we're after AND/OR - this takes precedence
2050    // Helper function to check if string ends with a logical operator
2051    let ends_with_logical_op = |s: &str| -> bool {
2052        let s_upper = s.to_uppercase();
2053        s_upper.ends_with(" AND") || s_upper.ends_with(" OR")
2054    };
2055
2056    if ends_with_logical_op(trimmed) {
2057        // Don't check for method context if we're clearly after a logical operator
2058    } else {
2059        // Look for the last dot in the query
2060        if let Some(dot_pos) = trimmed.rfind('.') {
2061            // Check if we're after a column name and dot
2062            let before_dot = safe_slice_to(trimmed, dot_pos);
2063            let after_dot_start = dot_pos + 1;
2064            let after_dot = if after_dot_start < trimmed.len() {
2065                &trimmed[after_dot_start..]
2066            } else {
2067                ""
2068            };
2069
2070            // Check if the part after dot looks like an incomplete method call
2071            // (not a complete method call like "Contains(...)")
2072            if !after_dot.contains('(') {
2073                // Try to extract the column name - could be quoted or regular
2074                let col_name = if before_dot.ends_with('"') {
2075                    // Handle quoted identifier - search backwards for matching opening quote
2076                    let bytes = before_dot.as_bytes();
2077                    let pos = before_dot.len() - 1; // Position of closing quote
2078
2079                    find_quote_start(bytes, pos).map(|start| safe_slice_from(before_dot, start))
2080                } else {
2081                    // Regular identifier - get the last word, handling parentheses
2082                    // Strip all leading parentheses
2083                    before_dot
2084                        .split_whitespace()
2085                        .last()
2086                        .map(|word| word.trim_start_matches('('))
2087                };
2088
2089                if let Some(col_name) = col_name {
2090                    // For quoted identifiers, keep the quotes, for regular identifiers check validity
2091                    let is_valid = Parser::is_valid_identifier(col_name);
2092
2093                    if is_valid {
2094                        return handle_method_call_context(col_name, after_dot);
2095                    }
2096                }
2097            }
2098        }
2099    }
2100
2101    // Check if we're in WHERE clause
2102    if let Some(where_clause) = &stmt.where_clause {
2103        // Check if query ends with AND/OR (with or without trailing space/partial)
2104        let trimmed_upper = trimmed.to_uppercase();
2105        if trimmed_upper.ends_with(" AND") || trimmed_upper.ends_with(" OR") {
2106            let op = if trimmed_upper.ends_with(" AND") {
2107                LogicalOp::And
2108            } else {
2109                LogicalOp::Or
2110            };
2111            return (CursorContext::AfterLogicalOp(op), None);
2112        }
2113
2114        // Check if we have AND/OR followed by a partial word
2115        let query_upper = query.to_uppercase();
2116        if let Some(and_pos) = query_upper.rfind(" AND ") {
2117            let after_and = safe_slice_from(query, and_pos + 5);
2118            let partial = extract_partial_at_end(after_and);
2119            if partial.is_some() {
2120                return (CursorContext::AfterLogicalOp(LogicalOp::And), partial);
2121            }
2122        }
2123
2124        if let Some(or_pos) = query_upper.rfind(" OR ") {
2125            let after_or = safe_slice_from(query, or_pos + 4);
2126            let partial = extract_partial_at_end(after_or);
2127            if partial.is_some() {
2128                return (CursorContext::AfterLogicalOp(LogicalOp::Or), partial);
2129            }
2130        }
2131
2132        if let Some(last_condition) = where_clause.conditions.last() {
2133            if let Some(connector) = &last_condition.connector {
2134                // We're after AND/OR
2135                return (
2136                    CursorContext::AfterLogicalOp(connector.clone()),
2137                    extract_partial_at_end(query),
2138                );
2139            }
2140        }
2141        // We're in WHERE clause but not after AND/OR
2142        return (CursorContext::WhereClause, extract_partial_at_end(query));
2143    }
2144
2145    // Check if we're after ORDER BY
2146    let query_upper = query.to_uppercase();
2147    if query_upper.ends_with(" ORDER BY") {
2148        return (CursorContext::OrderByClause, None);
2149    }
2150
2151    // Check other contexts based on what's in the statement
2152    if stmt.order_by.is_some() {
2153        return (CursorContext::OrderByClause, extract_partial_at_end(query));
2154    }
2155
2156    if stmt.from_table.is_some() && stmt.where_clause.is_none() && stmt.order_by.is_none() {
2157        return (CursorContext::FromClause, extract_partial_at_end(query));
2158    }
2159
2160    if !stmt.columns.is_empty() && stmt.from_table.is_none() {
2161        return (CursorContext::SelectClause, extract_partial_at_end(query));
2162    }
2163
2164    (CursorContext::Unknown, None)
2165}
2166
2167/// Helper function to find the last occurrence of a token type in the token stream
2168fn find_last_token(tokens: &[(usize, usize, Token)], target: &Token) -> Option<usize> {
2169    tokens
2170        .iter()
2171        .rposition(|(_, _, t)| t == target)
2172        .map(|idx| tokens[idx].0)
2173}
2174
2175/// Helper function to find the last occurrence of any matching token
2176fn find_last_matching_token<F>(
2177    tokens: &[(usize, usize, Token)],
2178    predicate: F,
2179) -> Option<(usize, &Token)>
2180where
2181    F: Fn(&Token) -> bool,
2182{
2183    tokens
2184        .iter()
2185        .rposition(|(_, _, t)| predicate(t))
2186        .map(|idx| (tokens[idx].0, &tokens[idx].2))
2187}
2188
2189/// Helper function to check if we're in a specific clause based on tokens
2190fn is_in_clause(
2191    tokens: &[(usize, usize, Token)],
2192    clause_token: Token,
2193    exclude_tokens: &[Token],
2194) -> bool {
2195    // Find the last occurrence of the clause token
2196    if let Some(clause_pos) = find_last_token(tokens, &clause_token) {
2197        // Check if any exclude tokens appear after it
2198        for (pos, _, token) in tokens.iter() {
2199            if *pos > clause_pos && exclude_tokens.contains(token) {
2200                return false;
2201            }
2202        }
2203        return true;
2204    }
2205    false
2206}
2207
2208fn analyze_partial(query: &str, cursor_pos: usize) -> (CursorContext, Option<String>) {
2209    // Tokenize the query up to cursor position
2210    let mut lexer = Lexer::new(query);
2211    let tokens = lexer.tokenize_all_with_positions();
2212
2213    let trimmed = query.trim();
2214
2215    #[cfg(test)]
2216    {
2217        if trimmed.contains("\"Last Name\"") {
2218            eprintln!("DEBUG analyze_partial: query='{query}', trimmed='{trimmed}'");
2219        }
2220    }
2221
2222    // Check if we're after a comparison operator (e.g., "createdDate > ")
2223    if let Some(result) = check_after_comparison_operator(query) {
2224        return result;
2225    }
2226
2227    // Look for the last dot in the query (method call context) - check this FIRST
2228    // before AND/OR detection to properly handle cases like "AND (Country."
2229    if let Some(dot_pos) = trimmed.rfind('.') {
2230        #[cfg(test)]
2231        {
2232            if trimmed.contains("\"Last Name\"") {
2233                eprintln!("DEBUG: Found dot at position {dot_pos}");
2234            }
2235        }
2236        // Check if we're after a column name and dot
2237        let before_dot = &trimmed[..dot_pos];
2238        let after_dot = &trimmed[dot_pos + 1..];
2239
2240        // Check if the part after dot looks like an incomplete method call
2241        // (not a complete method call like "Contains(...)")
2242        if !after_dot.contains('(') {
2243            // Try to extract the column name before the dot
2244            // It could be a quoted identifier like "Last Name" or a regular identifier
2245            let col_name = if before_dot.ends_with('"') {
2246                // Handle quoted identifier - search backwards for matching opening quote
2247                let bytes = before_dot.as_bytes();
2248                let pos = before_dot.len() - 1; // Position of closing quote
2249
2250                #[cfg(test)]
2251                {
2252                    if trimmed.contains("\"Last Name\"") {
2253                        eprintln!("DEBUG: before_dot='{before_dot}', looking for opening quote");
2254                    }
2255                }
2256
2257                let found_start = find_quote_start(bytes, pos);
2258
2259                if let Some(start) = found_start {
2260                    // Extract the full quoted identifier including quotes
2261                    let result = safe_slice_from(before_dot, start);
2262                    #[cfg(test)]
2263                    {
2264                        if trimmed.contains("\"Last Name\"") {
2265                            eprintln!("DEBUG: Extracted quoted identifier: '{result}'");
2266                        }
2267                    }
2268                    Some(result)
2269                } else {
2270                    #[cfg(test)]
2271                    {
2272                        if trimmed.contains("\"Last Name\"") {
2273                            eprintln!("DEBUG: No opening quote found!");
2274                        }
2275                    }
2276                    None
2277                }
2278            } else {
2279                // Regular identifier - get the last word, handling parentheses
2280                // Strip all leading parentheses
2281                before_dot
2282                    .split_whitespace()
2283                    .last()
2284                    .map(|word| word.trim_start_matches('('))
2285            };
2286
2287            if let Some(col_name) = col_name {
2288                #[cfg(test)]
2289                {
2290                    if trimmed.contains("\"Last Name\"") {
2291                        eprintln!("DEBUG: col_name = '{col_name}'");
2292                    }
2293                }
2294
2295                // For quoted identifiers, keep the quotes, for regular identifiers check validity
2296                let is_valid = Parser::is_valid_identifier(col_name);
2297
2298                #[cfg(test)]
2299                {
2300                    if trimmed.contains("\"Last Name\"") {
2301                        eprintln!("DEBUG: is_valid = {is_valid}");
2302                    }
2303                }
2304
2305                if is_valid {
2306                    return handle_method_call_context(col_name, after_dot);
2307                }
2308            }
2309        }
2310    }
2311
2312    // Check if we're after AND/OR using tokens - but only after checking for method calls
2313    if let Some((pos, token)) =
2314        find_last_matching_token(&tokens, |t| matches!(t, Token::And | Token::Or))
2315    {
2316        // Check if cursor is after the logical operator
2317        let token_end_pos = if matches!(token, Token::And) {
2318            pos + 3 // "AND" is 3 characters
2319        } else {
2320            pos + 2 // "OR" is 2 characters
2321        };
2322
2323        if cursor_pos > token_end_pos {
2324            // Extract any partial word after the operator
2325            let after_op = safe_slice_from(query, token_end_pos + 1); // +1 for the space
2326            let partial = extract_partial_at_end(after_op);
2327            let op = if matches!(token, Token::And) {
2328                LogicalOp::And
2329            } else {
2330                LogicalOp::Or
2331            };
2332            return (CursorContext::AfterLogicalOp(op), partial);
2333        }
2334    }
2335
2336    // Check if the last token is AND or OR (handles case where it's at the very end)
2337    if let Some((_, _, last_token)) = tokens.last() {
2338        if matches!(last_token, Token::And | Token::Or) {
2339            let op = if matches!(last_token, Token::And) {
2340                LogicalOp::And
2341            } else {
2342                LogicalOp::Or
2343            };
2344            return (CursorContext::AfterLogicalOp(op), None);
2345        }
2346    }
2347
2348    // Check if we're in ORDER BY clause using tokens
2349    if let Some(order_pos) = find_last_token(&tokens, &Token::OrderBy) {
2350        // Check if there's a BY token after ORDER
2351        let has_by = tokens
2352            .iter()
2353            .any(|(pos, _, t)| *pos > order_pos && matches!(t, Token::By));
2354        if has_by
2355            || tokens
2356                .last()
2357                .map_or(false, |(_, _, t)| matches!(t, Token::OrderBy))
2358        {
2359            return (CursorContext::OrderByClause, extract_partial_at_end(query));
2360        }
2361    }
2362
2363    // Check if we're in WHERE clause using tokens
2364    if is_in_clause(&tokens, Token::Where, &[Token::OrderBy, Token::GroupBy]) {
2365        return (CursorContext::WhereClause, extract_partial_at_end(query));
2366    }
2367
2368    // Check if we're in FROM clause using tokens
2369    if is_in_clause(
2370        &tokens,
2371        Token::From,
2372        &[Token::Where, Token::OrderBy, Token::GroupBy],
2373    ) {
2374        return (CursorContext::FromClause, extract_partial_at_end(query));
2375    }
2376
2377    // Check if we're in SELECT clause using tokens
2378    if find_last_token(&tokens, &Token::Select).is_some()
2379        && find_last_token(&tokens, &Token::From).is_none()
2380    {
2381        return (CursorContext::SelectClause, extract_partial_at_end(query));
2382    }
2383
2384    (CursorContext::Unknown, None)
2385}
2386
2387fn extract_partial_at_end(query: &str) -> Option<String> {
2388    let trimmed = query.trim();
2389
2390    // First check if the last word itself starts with a quote (unclosed quoted identifier being typed)
2391    if let Some(last_word) = trimmed.split_whitespace().last() {
2392        if last_word.starts_with('"') && !last_word.ends_with('"') {
2393            // This is an unclosed quoted identifier like "Cust
2394            return Some(last_word.to_string());
2395        }
2396    }
2397
2398    // Regular identifier extraction
2399    let last_word = trimmed.split_whitespace().last()?;
2400
2401    // Check if it's a partial identifier (not a keyword or operator)
2402    // First check if it's alphanumeric (potential identifier)
2403    if last_word.chars().all(|c| c.is_alphanumeric() || c == '_') {
2404        // Use lexer to determine if it's a keyword or identifier
2405        if !is_sql_keyword(last_word) {
2406            Some(last_word.to_string())
2407        } else {
2408            None
2409        }
2410    } else {
2411        None
2412    }
2413}
2414
2415// Implement the ParsePrimary trait for Parser to use the modular expression parsing
2416impl ParsePrimary for Parser {
2417    fn current_token(&self) -> &Token {
2418        &self.current_token
2419    }
2420
2421    fn advance(&mut self) {
2422        self.advance();
2423    }
2424
2425    fn consume(&mut self, expected: Token) -> Result<(), String> {
2426        self.consume(expected)
2427    }
2428
2429    fn parse_case_expression(&mut self) -> Result<SqlExpression, String> {
2430        self.parse_case_expression()
2431    }
2432
2433    fn parse_function_args(&mut self) -> Result<(Vec<SqlExpression>, bool), String> {
2434        self.parse_function_args()
2435    }
2436
2437    fn parse_window_spec(&mut self) -> Result<WindowSpec, String> {
2438        self.parse_window_spec()
2439    }
2440
2441    fn parse_logical_or(&mut self) -> Result<SqlExpression, String> {
2442        self.parse_logical_or()
2443    }
2444
2445    fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
2446        self.parse_comparison()
2447    }
2448
2449    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2450        self.parse_expression_list()
2451    }
2452
2453    fn parse_subquery(&mut self) -> Result<SelectStatement, String> {
2454        // Parse subquery without parenthesis balance validation
2455        if matches!(self.current_token, Token::With) {
2456            self.parse_with_clause_inner()
2457        } else {
2458            self.parse_select_statement_inner()
2459        }
2460    }
2461}
2462
2463// Implement the ExpressionParser trait for Parser to use the modular expression parsing
2464impl ExpressionParser for Parser {
2465    fn current_token(&self) -> &Token {
2466        &self.current_token
2467    }
2468
2469    fn advance(&mut self) {
2470        // Call the main advance method directly to avoid recursion
2471        match &self.current_token {
2472            Token::LeftParen => self.paren_depth += 1,
2473            Token::RightParen => {
2474                self.paren_depth -= 1;
2475            }
2476            _ => {}
2477        }
2478        self.current_token = self.lexer.next_token();
2479    }
2480
2481    fn peek(&self) -> Option<&Token> {
2482        // We can't return a reference to a token from a temporary lexer,
2483        // so we need a different approach. For now, let's use a workaround
2484        // that checks the next token type without consuming it.
2485        // This is a limitation of the current design.
2486        // A proper fix would be to store the peeked token in the Parser struct.
2487        None // TODO: Implement proper lookahead
2488    }
2489
2490    fn is_at_end(&self) -> bool {
2491        matches!(self.current_token, Token::Eof)
2492    }
2493
2494    fn consume(&mut self, expected: Token) -> Result<(), String> {
2495        // Call the main consume method to avoid recursion
2496        if std::mem::discriminant(&self.current_token) == std::mem::discriminant(&expected) {
2497            self.update_paren_depth(&expected)?;
2498            self.current_token = self.lexer.next_token();
2499            Ok(())
2500        } else {
2501            Err(format!(
2502                "Expected {:?}, found {:?}",
2503                expected, self.current_token
2504            ))
2505        }
2506    }
2507
2508    fn parse_identifier(&mut self) -> Result<String, String> {
2509        if let Token::Identifier(id) = &self.current_token {
2510            let id = id.clone();
2511            self.advance();
2512            Ok(id)
2513        } else {
2514            Err(format!(
2515                "Expected identifier, found {:?}",
2516                self.current_token
2517            ))
2518        }
2519    }
2520}
2521
2522// Implement the ParseArithmetic trait for Parser to use the modular arithmetic parsing
2523impl ParseArithmetic for Parser {
2524    fn current_token(&self) -> &Token {
2525        &self.current_token
2526    }
2527
2528    fn advance(&mut self) {
2529        self.advance();
2530    }
2531
2532    fn consume(&mut self, expected: Token) -> Result<(), String> {
2533        self.consume(expected)
2534    }
2535
2536    fn parse_primary(&mut self) -> Result<SqlExpression, String> {
2537        self.parse_primary()
2538    }
2539
2540    fn parse_multiplicative(&mut self) -> Result<SqlExpression, String> {
2541        self.parse_multiplicative()
2542    }
2543
2544    fn parse_method_args(&mut self) -> Result<Vec<SqlExpression>, String> {
2545        self.parse_method_args()
2546    }
2547}
2548
2549// Implement the ParseComparison trait for Parser to use the modular comparison parsing
2550impl ParseComparison for Parser {
2551    fn current_token(&self) -> &Token {
2552        &self.current_token
2553    }
2554
2555    fn advance(&mut self) {
2556        self.advance();
2557    }
2558
2559    fn consume(&mut self, expected: Token) -> Result<(), String> {
2560        self.consume(expected)
2561    }
2562
2563    fn parse_primary(&mut self) -> Result<SqlExpression, String> {
2564        self.parse_primary()
2565    }
2566
2567    fn parse_additive(&mut self) -> Result<SqlExpression, String> {
2568        self.parse_additive()
2569    }
2570
2571    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2572        self.parse_expression_list()
2573    }
2574
2575    fn parse_subquery(&mut self) -> Result<SelectStatement, String> {
2576        // Parse subquery without parenthesis balance validation
2577        if matches!(self.current_token, Token::With) {
2578            self.parse_with_clause_inner()
2579        } else {
2580            self.parse_select_statement_inner()
2581        }
2582    }
2583}
2584
2585// Implement the ParseLogical trait for Parser to use the modular logical parsing
2586impl ParseLogical for Parser {
2587    fn current_token(&self) -> &Token {
2588        &self.current_token
2589    }
2590
2591    fn advance(&mut self) {
2592        self.advance();
2593    }
2594
2595    fn consume(&mut self, expected: Token) -> Result<(), String> {
2596        self.consume(expected)
2597    }
2598
2599    fn parse_logical_and(&mut self) -> Result<SqlExpression, String> {
2600        self.parse_logical_and()
2601    }
2602
2603    fn parse_base_logical_expression(&mut self) -> Result<SqlExpression, String> {
2604        // This is the base for logical AND - it should parse comparison expressions
2605        // to avoid infinite recursion with parse_expression
2606        self.parse_comparison()
2607    }
2608
2609    fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
2610        self.parse_comparison()
2611    }
2612
2613    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2614        self.parse_expression_list()
2615    }
2616}
2617
2618// Implement the ParseCase trait for Parser to use the modular CASE parsing
2619impl ParseCase for Parser {
2620    fn current_token(&self) -> &Token {
2621        &self.current_token
2622    }
2623
2624    fn advance(&mut self) {
2625        self.advance();
2626    }
2627
2628    fn consume(&mut self, expected: Token) -> Result<(), String> {
2629        self.consume(expected)
2630    }
2631
2632    fn parse_expression(&mut self) -> Result<SqlExpression, String> {
2633        self.parse_expression()
2634    }
2635}
2636
2637fn is_sql_keyword(word: &str) -> bool {
2638    // Use the lexer to check if this word produces a keyword token
2639    let mut lexer = Lexer::new(word);
2640    let token = lexer.next_token();
2641
2642    // Check if it's a keyword token (not an identifier)
2643    !matches!(token, Token::Identifier(_) | Token::Eof)
2644}
2645
2646#[cfg(test)]
2647mod tests {
2648    use super::*;
2649
2650    /// Test that Parser::new() defaults to Standard mode (backward compatible)
2651    #[test]
2652    fn test_parser_mode_default_is_standard() {
2653        let sql = "-- Leading comment\nSELECT * FROM users";
2654        let mut parser = Parser::new(sql);
2655        let stmt = parser.parse().unwrap();
2656
2657        // In Standard mode, comments should be empty
2658        assert!(stmt.leading_comments.is_empty());
2659        assert!(stmt.trailing_comment.is_none());
2660    }
2661
2662    /// Test that PreserveComments mode collects leading comments
2663    #[test]
2664    fn test_parser_mode_preserve_leading_comments() {
2665        let sql = "-- Important query\n-- Author: Alice\nSELECT id, name FROM users";
2666        let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2667        let stmt = parser.parse().unwrap();
2668
2669        // Should have 2 leading comments
2670        assert_eq!(stmt.leading_comments.len(), 2);
2671        assert!(stmt.leading_comments[0].is_line_comment);
2672        assert!(stmt.leading_comments[0].text.contains("Important query"));
2673        assert!(stmt.leading_comments[1].text.contains("Author: Alice"));
2674    }
2675
2676    /// Test that PreserveComments mode collects trailing comments
2677    #[test]
2678    fn test_parser_mode_preserve_trailing_comment() {
2679        let sql = "SELECT * FROM users -- Fetch all users";
2680        let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2681        let stmt = parser.parse().unwrap();
2682
2683        // Should have trailing comment
2684        assert!(stmt.trailing_comment.is_some());
2685        let comment = stmt.trailing_comment.unwrap();
2686        assert!(comment.is_line_comment);
2687        assert!(comment.text.contains("Fetch all users"));
2688    }
2689
2690    /// Test that PreserveComments mode handles block comments
2691    #[test]
2692    fn test_parser_mode_preserve_block_comments() {
2693        let sql = "/* Query explanation */\nSELECT * FROM users";
2694        let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2695        let stmt = parser.parse().unwrap();
2696
2697        // Should have leading block comment
2698        assert_eq!(stmt.leading_comments.len(), 1);
2699        assert!(!stmt.leading_comments[0].is_line_comment); // It's a block comment
2700        assert!(stmt.leading_comments[0].text.contains("Query explanation"));
2701    }
2702
2703    /// Test that PreserveComments mode collects both leading and trailing
2704    #[test]
2705    fn test_parser_mode_preserve_both_comments() {
2706        let sql = "-- Leading\nSELECT * FROM users -- Trailing";
2707        let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2708        let stmt = parser.parse().unwrap();
2709
2710        // Should have both
2711        assert_eq!(stmt.leading_comments.len(), 1);
2712        assert!(stmt.leading_comments[0].text.contains("Leading"));
2713        assert!(stmt.trailing_comment.is_some());
2714        assert!(stmt.trailing_comment.unwrap().text.contains("Trailing"));
2715    }
2716
2717    /// Test that Standard mode has zero performance overhead (no comment parsing)
2718    #[test]
2719    fn test_parser_mode_standard_ignores_comments() {
2720        let sql = "-- Comment 1\n/* Comment 2 */\nSELECT * FROM users -- Comment 3";
2721        let mut parser = Parser::with_mode(sql, ParserMode::Standard);
2722        let stmt = parser.parse().unwrap();
2723
2724        // Comments should be completely ignored
2725        assert!(stmt.leading_comments.is_empty());
2726        assert!(stmt.trailing_comment.is_none());
2727
2728        // But query should still parse correctly
2729        assert_eq!(stmt.select_items.len(), 1);
2730        assert_eq!(stmt.from_table, Some("users".to_string()));
2731    }
2732
2733    /// Test backward compatibility - existing code using Parser::new() unchanged
2734    #[test]
2735    fn test_parser_backward_compatibility() {
2736        let sql = "SELECT id, name FROM users WHERE active = true";
2737
2738        // Old way (still works, defaults to Standard mode)
2739        let mut parser1 = Parser::new(sql);
2740        let stmt1 = parser1.parse().unwrap();
2741
2742        // Explicit Standard mode (same behavior)
2743        let mut parser2 = Parser::with_mode(sql, ParserMode::Standard);
2744        let stmt2 = parser2.parse().unwrap();
2745
2746        // Both should produce identical ASTs (comments are empty in both)
2747        assert_eq!(stmt1.select_items.len(), stmt2.select_items.len());
2748        assert_eq!(stmt1.from_table, stmt2.from_table);
2749        assert_eq!(stmt1.where_clause.is_some(), stmt2.where_clause.is_some());
2750        assert!(stmt1.leading_comments.is_empty());
2751        assert!(stmt2.leading_comments.is_empty());
2752    }
2753}