sql_cli/sql/
recursive_parser.rs

1// Keep chrono imports for the parser implementation
2
3// Re-exports for backward compatibility - these serve as both imports and re-exports
4pub use super::parser::ast::{
5    CTEType, Condition, DataFormat, FrameBound, FrameUnit, HttpMethod, JoinClause, JoinCondition,
6    JoinOperator, JoinType, LogicalOp, OrderByColumn, SelectItem, SelectStatement,
7    SingleJoinCondition, SortDirection, SqlExpression, TableFunction, TableSource, WebCTESpec,
8    WhenBranch, WhereClause, WindowFrame, WindowSpec, CTE,
9};
10pub use super::parser::legacy::{ParseContext, ParseState, Schema, SqlParser, SqlToken, TableInfo};
11pub use super::parser::lexer::{Lexer, Token};
12pub use super::parser::ParserConfig;
13
14// Re-export formatting functions for backward compatibility
15pub use super::parser::formatter::{format_ast_tree, format_sql_pretty, format_sql_pretty_compact};
16
17// New AST-based formatter
18pub use super::parser::ast_formatter::{format_sql_ast, format_sql_ast_with_config, FormatConfig};
19
20// Import the new expression modules
21use super::parser::expressions::arithmetic::{
22    parse_additive as parse_additive_expr, parse_multiplicative as parse_multiplicative_expr,
23    ParseArithmetic,
24};
25use super::parser::expressions::case::{parse_case_expression as parse_case_expr, ParseCase};
26use super::parser::expressions::comparison::{
27    parse_comparison as parse_comparison_expr, parse_in_operator, ParseComparison,
28};
29use super::parser::expressions::logical::{
30    parse_logical_and as parse_logical_and_expr, parse_logical_or as parse_logical_or_expr,
31    ParseLogical,
32};
33use super::parser::expressions::primary::{
34    parse_primary as parse_primary_expr, ParsePrimary, PrimaryExpressionContext,
35};
36use super::parser::expressions::ExpressionParser;
37
38// Import function registry to check for function existence
39use crate::sql::functions::{FunctionCategory, FunctionRegistry};
40use crate::sql::generators::GeneratorRegistry;
41use std::sync::Arc;
42
43// Import Web CTE parser
44use super::parser::web_cte_parser::WebCteParser;
45pub struct Parser {
46    lexer: Lexer,
47    pub current_token: Token,    // Made public for web_cte_parser access
48    in_method_args: bool,        // Track if we're parsing method arguments
49    columns: Vec<String>,        // Known column names for context-aware parsing
50    paren_depth: i32,            // Track parentheses nesting depth
51    paren_depth_stack: Vec<i32>, // Stack to save/restore paren depth for nested contexts
52    _config: ParserConfig,       // Parser configuration including case sensitivity
53    debug_trace: bool,           // Enable detailed token-by-token trace
54    trace_depth: usize,          // Track recursion depth for indented trace
55    function_registry: Arc<FunctionRegistry>, // Function registry for validation
56    generator_registry: Arc<GeneratorRegistry>, // Generator registry for table functions
57}
58
59impl Parser {
60    #[must_use]
61    pub fn new(input: &str) -> Self {
62        let mut lexer = Lexer::new(input);
63        let current_token = lexer.next_token();
64        Self {
65            lexer,
66            current_token,
67            in_method_args: false,
68            columns: Vec::new(),
69            paren_depth: 0,
70            paren_depth_stack: Vec::new(),
71            _config: ParserConfig::default(),
72            debug_trace: false,
73            trace_depth: 0,
74            function_registry: Arc::new(FunctionRegistry::new()),
75            generator_registry: Arc::new(GeneratorRegistry::new()),
76        }
77    }
78
79    #[must_use]
80    pub fn with_config(input: &str, config: ParserConfig) -> Self {
81        let mut lexer = Lexer::new(input);
82        let current_token = lexer.next_token();
83        Self {
84            lexer,
85            current_token,
86            in_method_args: false,
87            columns: Vec::new(),
88            paren_depth: 0,
89            paren_depth_stack: Vec::new(),
90            _config: config,
91            debug_trace: false,
92            trace_depth: 0,
93            function_registry: Arc::new(FunctionRegistry::new()),
94            generator_registry: Arc::new(GeneratorRegistry::new()),
95        }
96    }
97
98    #[must_use]
99    pub fn with_columns(mut self, columns: Vec<String>) -> Self {
100        self.columns = columns;
101        self
102    }
103
104    #[must_use]
105    pub fn with_debug_trace(mut self, enabled: bool) -> Self {
106        self.debug_trace = enabled;
107        self
108    }
109
110    #[must_use]
111    pub fn with_function_registry(mut self, registry: Arc<FunctionRegistry>) -> Self {
112        self.function_registry = registry;
113        self
114    }
115
116    #[must_use]
117    pub fn with_generator_registry(mut self, registry: Arc<GeneratorRegistry>) -> Self {
118        self.generator_registry = registry;
119        self
120    }
121
122    fn trace_enter(&mut self, context: &str) {
123        if self.debug_trace {
124            let indent = "  ".repeat(self.trace_depth);
125            eprintln!("{}→ {} | Token: {:?}", indent, context, self.current_token);
126            self.trace_depth += 1;
127        }
128    }
129
130    fn trace_exit(&mut self, context: &str, result: &Result<impl std::fmt::Debug, String>) {
131        if self.debug_trace {
132            self.trace_depth = self.trace_depth.saturating_sub(1);
133            let indent = "  ".repeat(self.trace_depth);
134            match result {
135                Ok(val) => eprintln!("{}← {} ✓ | Result: {:?}", indent, context, val),
136                Err(e) => eprintln!("{}← {} ✗ | Error: {}", indent, context, e),
137            }
138        }
139    }
140
141    fn trace_token(&self, action: &str) {
142        if self.debug_trace {
143            let indent = "  ".repeat(self.trace_depth);
144            eprintln!("{}  {} | Token: {:?}", indent, action, self.current_token);
145        }
146    }
147
148    #[allow(dead_code)]
149    fn peek_token(&self) -> Option<Token> {
150        // Alternative peek that returns owned token
151        let mut temp_lexer = self.lexer.clone();
152        let next_token = temp_lexer.next_token();
153        if matches!(next_token, Token::Eof) {
154            None
155        } else {
156            Some(next_token)
157        }
158    }
159
160    /// Check if current token is one of the reserved keywords that should stop parsing
161    /// Check if an identifier string is a reserved keyword (for backward compatibility)
162    /// This is used when the lexer hasn't properly tokenized keywords and they come through
163    /// as Token::Identifier instead of their proper token types
164    fn is_identifier_reserved(id: &str) -> bool {
165        let id_upper = id.to_uppercase();
166        matches!(
167            id_upper.as_str(),
168            "ORDER" | "HAVING" | "LIMIT" | "OFFSET" | "UNION" | "INTERSECT" | "EXCEPT"
169        )
170    }
171
172    /// Get comparison operator string representation (for autocomplete context)
173    const COMPARISON_OPERATORS: [&'static str; 6] = [" > ", " < ", " >= ", " <= ", " = ", " != "];
174
175    pub fn consume(&mut self, expected: Token) -> Result<(), String> {
176        self.trace_token(&format!("Consuming expected {:?}", expected));
177        if std::mem::discriminant(&self.current_token) == std::mem::discriminant(&expected) {
178            // Track parentheses depth
179            self.update_paren_depth(&expected)?;
180
181            self.current_token = self.lexer.next_token();
182            Ok(())
183        } else {
184            // Provide better error messages for common cases
185            let error_msg = match (&expected, &self.current_token) {
186                (Token::RightParen, Token::Eof) if self.paren_depth > 0 => {
187                    format!(
188                        "Unclosed parenthesis - missing {} closing parenthes{}",
189                        self.paren_depth,
190                        if self.paren_depth == 1 { "is" } else { "es" }
191                    )
192                }
193                (Token::RightParen, _) if self.paren_depth > 0 => {
194                    format!(
195                        "Expected closing parenthesis but found {:?} (currently {} unclosed parenthes{})",
196                        self.current_token,
197                        self.paren_depth,
198                        if self.paren_depth == 1 { "is" } else { "es" }
199                    )
200                }
201                _ => format!("Expected {:?}, found {:?}", expected, self.current_token),
202            };
203            Err(error_msg)
204        }
205    }
206
207    pub fn advance(&mut self) {
208        // Track parentheses depth when advancing
209        match &self.current_token {
210            Token::LeftParen => self.paren_depth += 1,
211            Token::RightParen => {
212                self.paren_depth -= 1;
213                // Note: We don't check for < 0 here because advance() is used
214                // in contexts where we're not necessarily expecting a right paren
215            }
216            _ => {}
217        }
218        let old_token = self.current_token.clone();
219        self.current_token = self.lexer.next_token();
220        if self.debug_trace {
221            let indent = "  ".repeat(self.trace_depth);
222            eprintln!(
223                "{}  Advanced: {:?} → {:?}",
224                indent, old_token, self.current_token
225            );
226        }
227    }
228
229    fn push_paren_depth(&mut self) {
230        self.paren_depth_stack.push(self.paren_depth);
231        self.paren_depth = 0;
232    }
233
234    fn pop_paren_depth(&mut self) {
235        if let Some(depth) = self.paren_depth_stack.pop() {
236            // Ignore the internal depth - just restore the saved value
237            self.paren_depth = depth;
238        } else {
239        }
240    }
241
242    pub fn parse(&mut self) -> Result<SelectStatement, String> {
243        self.trace_enter("parse");
244
245        // Check for WITH clause at the beginning
246        let result = if matches!(self.current_token, Token::With) {
247            self.parse_with_clause()
248        } else {
249            self.parse_select_statement()
250        };
251
252        self.trace_exit("parse", &result);
253        result
254    }
255
256    fn parse_with_clause(&mut self) -> Result<SelectStatement, String> {
257        self.consume(Token::With)?;
258        let ctes = self.parse_cte_list()?;
259
260        // Parse the main SELECT statement - use inner version since we're already tracking parens
261        let mut main_query = self.parse_select_statement_inner()?;
262        main_query.ctes = ctes;
263
264        // Check for balanced parentheses at the end of parsing
265        self.check_balanced_parentheses()?;
266
267        Ok(main_query)
268    }
269
270    fn parse_with_clause_inner(&mut self) -> Result<SelectStatement, String> {
271        self.consume(Token::With)?;
272        let ctes = self.parse_cte_list()?;
273
274        // Parse the main SELECT statement (without parenthesis checking for subqueries)
275        let mut main_query = self.parse_select_statement_inner()?;
276        main_query.ctes = ctes;
277
278        Ok(main_query)
279    }
280
281    // Helper function to parse CTE list - eliminates duplication
282    fn parse_cte_list(&mut self) -> Result<Vec<CTE>, String> {
283        let mut ctes = Vec::new();
284
285        // Parse CTEs
286        loop {
287            // Check for WEB keyword for each CTE (can be different for each one)
288            let is_web = if matches!(&self.current_token, Token::Web) {
289                self.trace_token("Found WEB keyword for CTE");
290                self.advance();
291                true
292            } else {
293                false
294            };
295
296            // Parse CTE name
297            let name = match &self.current_token {
298                Token::Identifier(name) => name.clone(),
299                _ => {
300                    return Err(format!(
301                        "Expected CTE name after {}",
302                        if is_web { "WEB" } else { "WITH or comma" }
303                    ))
304                }
305            };
306            self.advance();
307
308            // Optional column list: WITH t(col1, col2) AS ...
309            let column_list = if matches!(self.current_token, Token::LeftParen) {
310                self.advance();
311                let cols = self.parse_identifier_list()?;
312                self.consume(Token::RightParen)?;
313                Some(cols)
314            } else {
315                None
316            };
317
318            // Expect AS
319            self.consume(Token::As)?;
320
321            let cte_type = if is_web {
322                // Expect opening parenthesis for WEB CTE
323                self.consume(Token::LeftParen)?;
324                // Parse WEB CTE specification using dedicated parser
325                let web_spec = WebCteParser::parse(self)?;
326                // Consume closing parenthesis for WEB CTE
327                self.consume(Token::RightParen)?;
328                CTEType::Web(web_spec)
329            } else {
330                // For standard CTEs, push depth BEFORE consuming opening paren
331                // This ensures the paren is counted in the inner context
332                self.push_paren_depth();
333                // Now consume opening parenthesis
334                self.consume(Token::LeftParen)?;
335                let query = self.parse_select_statement_inner()?;
336                // Expect closing parenthesis while still in CTE context
337                self.consume(Token::RightParen)?;
338                // Now pop to restore outer depth after consuming both parens
339                self.pop_paren_depth();
340                CTEType::Standard(query)
341            };
342
343            ctes.push(CTE {
344                name,
345                column_list,
346                cte_type,
347            });
348
349            // Check for more CTEs
350            if !matches!(self.current_token, Token::Comma) {
351                break;
352            }
353            self.advance();
354        }
355
356        Ok(ctes)
357    }
358
359    /// Helper function to parse an optional table alias (with or without AS keyword)
360    fn parse_optional_alias(&mut self) -> Result<Option<String>, String> {
361        if matches!(self.current_token, Token::As) {
362            self.advance();
363            match &self.current_token {
364                Token::Identifier(name) => {
365                    let alias = name.clone();
366                    self.advance();
367                    Ok(Some(alias))
368                }
369                _ => Err("Expected alias name after AS".to_string()),
370            }
371        } else if let Token::Identifier(name) = &self.current_token {
372            // AS is optional for table aliases
373            let alias = name.clone();
374            self.advance();
375            Ok(Some(alias))
376        } else {
377            Ok(None)
378        }
379    }
380
381    /// Helper function to check if an identifier is valid (quoted or regular)
382    fn is_valid_identifier(name: &str) -> bool {
383        if name.starts_with('"') && name.ends_with('"') {
384            // Quoted identifier - always valid
385            true
386        } else {
387            // Regular identifier - check if it's alphanumeric or underscore
388            name.chars().all(|c| c.is_alphanumeric() || c == '_')
389        }
390    }
391
392    /// Helper function to update parentheses depth tracking
393    fn update_paren_depth(&mut self, token: &Token) -> Result<(), String> {
394        match token {
395            Token::LeftParen => self.paren_depth += 1,
396            Token::RightParen => {
397                self.paren_depth -= 1;
398                // Check for extra closing parenthesis
399                if self.paren_depth < 0 {
400                    return Err(
401                        "Unexpected closing parenthesis - no matching opening parenthesis"
402                            .to_string(),
403                    );
404                }
405            }
406            _ => {}
407        }
408        Ok(())
409    }
410
411    /// Helper function to parse comma-separated argument list
412    fn parse_argument_list(&mut self) -> Result<Vec<SqlExpression>, String> {
413        let mut args = Vec::new();
414
415        if !matches!(self.current_token, Token::RightParen) {
416            loop {
417                args.push(self.parse_expression()?);
418
419                if matches!(self.current_token, Token::Comma) {
420                    self.advance();
421                } else {
422                    break;
423                }
424            }
425        }
426
427        Ok(args)
428    }
429
430    /// Helper function to check for balanced parentheses at the end of parsing
431    fn check_balanced_parentheses(&self) -> Result<(), String> {
432        if self.paren_depth > 0 {
433            Err(format!(
434                "Unclosed parenthesis - missing {} closing parenthes{}",
435                self.paren_depth,
436                if self.paren_depth == 1 { "is" } else { "es" }
437            ))
438        } else if self.paren_depth < 0 {
439            Err("Extra closing parenthesis found - no matching opening parenthesis".to_string())
440        } else {
441            Ok(())
442        }
443    }
444
445    fn parse_select_statement(&mut self) -> Result<SelectStatement, String> {
446        self.trace_enter("parse_select_statement");
447        let result = self.parse_select_statement_inner()?;
448
449        // Check for balanced parentheses at the end of parsing
450        self.check_balanced_parentheses()?;
451
452        Ok(result)
453    }
454
455    fn parse_select_statement_inner(&mut self) -> Result<SelectStatement, String> {
456        self.consume(Token::Select)?;
457
458        // Check for DISTINCT keyword
459        let distinct = if matches!(self.current_token, Token::Distinct) {
460            self.advance();
461            true
462        } else {
463            false
464        };
465
466        // Parse SELECT items (supports computed expressions)
467        let select_items = self.parse_select_items()?;
468
469        // Create legacy columns vector for backward compatibility
470        let columns = select_items
471            .iter()
472            .map(|item| match item {
473                SelectItem::Star => "*".to_string(),
474                SelectItem::Column(col_ref) => col_ref.name.clone(),
475                SelectItem::Expression { alias, .. } => alias.clone(),
476            })
477            .collect();
478
479        // Parse FROM clause - can be a table name, subquery, or table function
480        let (from_table, from_subquery, from_function, from_alias) =
481            if matches!(self.current_token, Token::From) {
482                self.advance();
483
484                // Check for table function like RANGE()
485                if let Token::Identifier(name) = &self.current_token.clone() {
486                    // Check if this is a table function by consulting the registry
487                    // We need to lookahead to see if there's a parenthesis to distinguish
488                    // between a function call and a table with the same name
489                    let has_paren = self.peek_token() == Some(Token::LeftParen);
490                    if self.debug_trace {
491                        eprintln!(
492                            "  Checking {} for table function, has_paren={}",
493                            name, has_paren
494                        );
495                    }
496
497                    // Check if it's a known table function or generator
498                    // In FROM clause context, prioritize generators over scalar functions
499                    let is_table_function = if has_paren {
500                        // First check generator registry (for FROM clause context)
501                        if self.debug_trace {
502                            eprintln!("  Checking generator registry for {}", name.to_uppercase());
503                        }
504                        if let Some(_gen) = self.generator_registry.get(&name.to_uppercase()) {
505                            if self.debug_trace {
506                                eprintln!("  Found {} in generator registry", name);
507                            }
508                            self.trace_token(&format!("Found generator: {}", name));
509                            true
510                        } else {
511                            // Then check if it's a table function in the function registry
512                            if let Some(func) = self.function_registry.get(&name.to_uppercase()) {
513                                let sig = func.signature();
514                                let is_table_fn = sig.category == FunctionCategory::TableFunction;
515                                if self.debug_trace {
516                                    eprintln!(
517                                        "  Found {} in function registry, is_table_function={}",
518                                        name, is_table_fn
519                                    );
520                                }
521                                if is_table_fn {
522                                    self.trace_token(&format!(
523                                        "Found table function in function registry: {}",
524                                        name
525                                    ));
526                                }
527                                is_table_fn
528                            } else {
529                                if self.debug_trace {
530                                    eprintln!("  {} not found in either registry", name);
531                                    self.trace_token(&format!(
532                                        "Not found as generator or table function: {}",
533                                        name
534                                    ));
535                                }
536                                false
537                            }
538                        }
539                    } else {
540                        if self.debug_trace {
541                            eprintln!("  No parenthesis after {}, treating as table", name);
542                        }
543                        false
544                    };
545
546                    if is_table_function {
547                        // Parse table function
548                        let function_name = name.clone();
549                        self.advance(); // Skip function name
550
551                        // Parse arguments
552                        self.consume(Token::LeftParen)?;
553                        let args = self.parse_argument_list()?;
554                        self.consume(Token::RightParen)?;
555
556                        // Optional alias
557                        let alias = if matches!(self.current_token, Token::As) {
558                            self.advance();
559                            match &self.current_token {
560                                Token::Identifier(name) => {
561                                    let alias = name.clone();
562                                    self.advance();
563                                    Some(alias)
564                                }
565                                _ => return Err("Expected alias name after AS".to_string()),
566                            }
567                        } else if let Token::Identifier(name) = &self.current_token {
568                            let alias = name.clone();
569                            self.advance();
570                            Some(alias)
571                        } else {
572                            None
573                        };
574
575                        (
576                            None,
577                            None,
578                            Some(TableFunction::Generator {
579                                name: function_name,
580                                args,
581                            }),
582                            alias,
583                        )
584                    } else {
585                        // Not a RANGE, SPLIT, or generator function, so it's a regular table name
586                        let table_name = name.clone();
587                        self.advance();
588
589                        // Check for optional alias
590                        let alias = self.parse_optional_alias()?;
591
592                        (Some(table_name), None, None, alias)
593                    }
594                } else if matches!(self.current_token, Token::LeftParen) {
595                    // Check for subquery: FROM (SELECT ...) or FROM (WITH ... SELECT ...)
596                    self.advance();
597
598                    // Parse the subquery - it might start with WITH
599                    let subquery = if matches!(self.current_token, Token::With) {
600                        self.parse_with_clause_inner()?
601                    } else {
602                        self.parse_select_statement_inner()?
603                    };
604
605                    self.consume(Token::RightParen)?;
606
607                    // Subqueries must have an alias
608                    let alias = if matches!(self.current_token, Token::As) {
609                        self.advance();
610                        match &self.current_token {
611                            Token::Identifier(name) => {
612                                let alias = name.clone();
613                                self.advance();
614                                alias
615                            }
616                            _ => return Err("Expected alias name after AS".to_string()),
617                        }
618                    } else {
619                        // AS is optional, but alias is required
620                        match &self.current_token {
621                            Token::Identifier(name) => {
622                                let alias = name.clone();
623                                self.advance();
624                                alias
625                            }
626                            _ => {
627                                return Err(
628                                    "Subquery in FROM must have an alias (e.g., AS t)".to_string()
629                                )
630                            }
631                        }
632                    };
633
634                    (None, Some(Box::new(subquery)), None, Some(alias))
635                } else {
636                    // Regular table name
637                    match &self.current_token {
638                        Token::Identifier(table) => {
639                            let table_name = table.clone();
640                            self.advance();
641
642                            // Check for optional alias
643                            let alias = self.parse_optional_alias()?;
644
645                            (Some(table_name), None, None, alias)
646                        }
647                        Token::QuotedIdentifier(table) => {
648                            // Handle quoted table names
649                            let table_name = table.clone();
650                            self.advance();
651
652                            // Check for optional alias
653                            let alias = self.parse_optional_alias()?;
654
655                            (Some(table_name), None, None, alias)
656                        }
657                        _ => return Err("Expected table name or subquery after FROM".to_string()),
658                    }
659                }
660            } else {
661                (None, None, None, None)
662            };
663
664        // Parse JOIN clauses
665        let mut joins = Vec::new();
666        while self.is_join_token() {
667            joins.push(self.parse_join_clause()?);
668        }
669
670        let where_clause = if matches!(self.current_token, Token::Where) {
671            self.advance();
672            Some(self.parse_where_clause()?)
673        } else {
674            None
675        };
676
677        let group_by = if matches!(self.current_token, Token::GroupBy) {
678            self.advance();
679            // Parse expressions instead of just identifiers for GROUP BY
680            // This allows GROUP BY TIME_BUCKET(...), CASE ..., etc.
681            Some(self.parse_expression_list()?)
682        } else {
683            None
684        };
685
686        // Parse HAVING clause (must come after GROUP BY)
687        let having = if matches!(self.current_token, Token::Having) {
688            if group_by.is_none() {
689                return Err("HAVING clause requires GROUP BY".to_string());
690            }
691            self.advance();
692            Some(self.parse_expression()?)
693        } else {
694            None
695        };
696
697        // Parse ORDER BY clause (comes after GROUP BY and HAVING)
698        let order_by = if matches!(self.current_token, Token::OrderBy) {
699            self.trace_token("Found OrderBy token");
700            self.advance();
701            Some(self.parse_order_by_list()?)
702        } else if let Token::Identifier(s) = &self.current_token {
703            // This shouldn't happen if the lexer properly tokenizes ORDER BY
704            // But keeping as fallback for compatibility
705            if Self::is_identifier_reserved(s) && s.to_uppercase() == "ORDER" {
706                self.trace_token("Warning: ORDER as identifier instead of OrderBy token");
707                self.advance(); // consume ORDER
708                if matches!(&self.current_token, Token::By) {
709                    self.advance(); // consume BY
710                    Some(self.parse_order_by_list()?)
711                } else {
712                    return Err("Expected BY after ORDER".to_string());
713                }
714            } else {
715                None
716            }
717        } else {
718            None
719        };
720
721        // Parse LIMIT clause
722        let limit = if matches!(self.current_token, Token::Limit) {
723            self.advance();
724            match &self.current_token {
725                Token::NumberLiteral(num) => {
726                    let limit_val = num
727                        .parse::<usize>()
728                        .map_err(|_| format!("Invalid LIMIT value: {num}"))?;
729                    self.advance();
730                    Some(limit_val)
731                }
732                _ => return Err("Expected number after LIMIT".to_string()),
733            }
734        } else {
735            None
736        };
737
738        // Parse OFFSET clause
739        let offset = if matches!(self.current_token, Token::Offset) {
740            self.advance();
741            match &self.current_token {
742                Token::NumberLiteral(num) => {
743                    let offset_val = num
744                        .parse::<usize>()
745                        .map_err(|_| format!("Invalid OFFSET value: {num}"))?;
746                    self.advance();
747                    Some(offset_val)
748                }
749                _ => return Err("Expected number after OFFSET".to_string()),
750            }
751        } else {
752            None
753        };
754
755        Ok(SelectStatement {
756            distinct,
757            columns,
758            select_items,
759            from_table,
760            from_subquery,
761            from_function,
762            from_alias,
763            joins,
764            where_clause,
765            order_by,
766            group_by,
767            having,
768            limit,
769            offset,
770            ctes: Vec::new(), // Will be populated by WITH clause parser
771        })
772    }
773
774    /// Parse SELECT items that support computed expressions with aliases
775    fn parse_select_items(&mut self) -> Result<Vec<SelectItem>, String> {
776        let mut items = Vec::new();
777
778        loop {
779            // Check for * only at the beginning of a select item
780            // After a comma, * could be either SELECT * or part of multiplication
781            if matches!(self.current_token, Token::Star) {
782                // Determine if this is SELECT * or multiplication
783                // SELECT * is only valid:
784                // 1. As the first item in SELECT
785                // 2. Right after a comma (but not if followed by something that makes it multiplication)
786
787                // For now, treat Star as SELECT * only if we're at the start or just after a comma
788                // and the star is not immediately followed by something that would make it multiplication
789                items.push(SelectItem::Star);
790                self.advance();
791            } else {
792                // Parse expression or column
793                let expr = self.parse_comparison()?; // Use comparison to support IS NULL and other comparisons
794
795                // Check for AS alias
796                let alias = if matches!(self.current_token, Token::As) {
797                    self.advance();
798                    match &self.current_token {
799                        Token::Identifier(alias_name) => {
800                            let alias = alias_name.clone();
801                            self.advance();
802                            alias
803                        }
804                        Token::QuotedIdentifier(alias_name) => {
805                            let alias = alias_name.clone();
806                            self.advance();
807                            alias
808                        }
809                        _ => return Err("Expected alias name after AS".to_string()),
810                    }
811                } else {
812                    // Generate default alias based on expression
813                    match &expr {
814                        SqlExpression::Column(col_ref) => col_ref.name.clone(),
815                        _ => format!("expr_{}", items.len() + 1), // Default alias for computed expressions
816                    }
817                };
818
819                // Create SelectItem based on expression type
820                let item = match expr {
821                    SqlExpression::Column(col_ref) if alias == col_ref.name => {
822                        // Simple column reference without alias
823                        SelectItem::Column(col_ref)
824                    }
825                    _ => {
826                        // Computed expression or column with different alias
827                        SelectItem::Expression { expr, alias }
828                    }
829                };
830
831                items.push(item);
832            }
833
834            // Check for comma to continue
835            if matches!(self.current_token, Token::Comma) {
836                self.advance();
837            } else {
838                break;
839            }
840        }
841
842        Ok(items)
843    }
844
845    fn parse_identifier_list(&mut self) -> Result<Vec<String>, String> {
846        let mut identifiers = Vec::new();
847
848        loop {
849            match &self.current_token {
850                Token::Identifier(id) => {
851                    // Check if this is a reserved keyword that should stop identifier parsing
852                    if Self::is_identifier_reserved(id) {
853                        // Stop parsing identifiers if we hit a reserved keyword
854                        break;
855                    }
856                    identifiers.push(id.clone());
857                    self.advance();
858                }
859                Token::QuotedIdentifier(id) => {
860                    // Handle quoted identifiers like "Customer Id"
861                    identifiers.push(id.clone());
862                    self.advance();
863                }
864                _ => {
865                    // Stop parsing if we hit any other token type
866                    break;
867                }
868            }
869
870            if matches!(self.current_token, Token::Comma) {
871                self.advance();
872            } else {
873                break;
874            }
875        }
876
877        if identifiers.is_empty() {
878            return Err("Expected at least one identifier".to_string());
879        }
880
881        Ok(identifiers)
882    }
883
884    fn parse_window_spec(&mut self) -> Result<WindowSpec, String> {
885        let mut partition_by = Vec::new();
886        let mut order_by = Vec::new();
887
888        // Check for PARTITION BY
889        if matches!(self.current_token, Token::Partition) {
890            self.advance(); // consume PARTITION
891            if !matches!(self.current_token, Token::By) {
892                return Err("Expected BY after PARTITION".to_string());
893            }
894            self.advance(); // consume BY
895
896            // Parse partition columns
897            partition_by = self.parse_identifier_list()?;
898        }
899
900        // Check for ORDER BY
901        if matches!(self.current_token, Token::OrderBy) {
902            self.advance(); // consume ORDER BY (as single token)
903            order_by = self.parse_order_by_list()?;
904        } else if let Token::Identifier(s) = &self.current_token {
905            if Self::is_identifier_reserved(s) && s.to_uppercase() == "ORDER" {
906                // Handle ORDER BY as two tokens
907                self.advance(); // consume ORDER
908                if !matches!(self.current_token, Token::By) {
909                    return Err("Expected BY after ORDER".to_string());
910                }
911                self.advance(); // consume BY
912                order_by = self.parse_order_by_list()?;
913            }
914        }
915
916        // Parse optional window frame (ROWS/RANGE BETWEEN ... AND ...)
917        let frame = self.parse_window_frame()?;
918
919        Ok(WindowSpec {
920            partition_by,
921            order_by,
922            frame,
923        })
924    }
925
926    fn parse_order_by_list(&mut self) -> Result<Vec<OrderByColumn>, String> {
927        let mut order_columns = Vec::new();
928
929        loop {
930            let column = match &self.current_token {
931                Token::Identifier(id) => {
932                    let col = id.clone();
933                    self.advance();
934                    col
935                }
936                Token::QuotedIdentifier(id) => {
937                    let col = id.clone();
938                    self.advance();
939                    col
940                }
941                Token::NumberLiteral(num) if self.columns.iter().any(|col| col == num) => {
942                    // Support numeric column names like "202204"
943                    let col = num.clone();
944                    self.advance();
945                    col
946                }
947                // Handle window keywords that can be column names
948                Token::Row => {
949                    self.advance();
950                    "row".to_string()
951                }
952                Token::Rows => {
953                    self.advance();
954                    "rows".to_string()
955                }
956                Token::Range => {
957                    self.advance();
958                    "range".to_string()
959                }
960                _ => return Err("Expected column name in ORDER BY".to_string()),
961            };
962
963            // Check for ASC/DESC
964            let direction = match &self.current_token {
965                Token::Asc => {
966                    self.advance();
967                    SortDirection::Asc
968                }
969                Token::Desc => {
970                    self.advance();
971                    SortDirection::Desc
972                }
973                _ => SortDirection::Asc, // Default to ASC if not specified
974            };
975
976            order_columns.push(OrderByColumn { column, direction });
977
978            if matches!(self.current_token, Token::Comma) {
979                self.advance();
980            } else {
981                break;
982            }
983        }
984
985        Ok(order_columns)
986    }
987
988    fn parse_window_frame(&mut self) -> Result<Option<WindowFrame>, String> {
989        // Check for ROWS or RANGE keyword
990        let unit = match &self.current_token {
991            Token::Rows => {
992                self.advance();
993                FrameUnit::Rows
994            }
995            Token::Identifier(id) if id.to_uppercase() == "RANGE" => {
996                // RANGE as window frame unit
997                self.advance();
998                FrameUnit::Range
999            }
1000            _ => return Ok(None), // No window frame specified
1001        };
1002
1003        // Check for BETWEEN or just a single bound
1004        let (start, end) = if let Token::Between = &self.current_token {
1005            self.advance(); // consume BETWEEN
1006                            // Parse start bound
1007            let start = self.parse_frame_bound()?;
1008
1009            // Expect AND
1010            if !matches!(&self.current_token, Token::And) {
1011                return Err("Expected AND after window frame start bound".to_string());
1012            }
1013            self.advance();
1014
1015            // Parse end bound
1016            let end = self.parse_frame_bound()?;
1017            (start, Some(end))
1018        } else {
1019            // Single bound (e.g., "ROWS 5 PRECEDING")
1020            let bound = self.parse_frame_bound()?;
1021            (bound, None)
1022        };
1023
1024        Ok(Some(WindowFrame { unit, start, end }))
1025    }
1026
1027    fn parse_frame_bound(&mut self) -> Result<FrameBound, String> {
1028        match &self.current_token {
1029            Token::Unbounded => {
1030                self.advance();
1031                match &self.current_token {
1032                    Token::Preceding => {
1033                        self.advance();
1034                        Ok(FrameBound::UnboundedPreceding)
1035                    }
1036                    Token::Following => {
1037                        self.advance();
1038                        Ok(FrameBound::UnboundedFollowing)
1039                    }
1040                    _ => Err("Expected PRECEDING or FOLLOWING after UNBOUNDED".to_string()),
1041                }
1042            }
1043            Token::Current => {
1044                self.advance();
1045                if matches!(&self.current_token, Token::Row) {
1046                    self.advance();
1047                    return Ok(FrameBound::CurrentRow);
1048                }
1049                Err("Expected ROW after CURRENT".to_string())
1050            }
1051            Token::NumberLiteral(num) => {
1052                let n: i64 = num
1053                    .parse()
1054                    .map_err(|_| "Invalid number in window frame".to_string())?;
1055                self.advance();
1056                match &self.current_token {
1057                    Token::Preceding => {
1058                        self.advance();
1059                        Ok(FrameBound::Preceding(n))
1060                    }
1061                    Token::Following => {
1062                        self.advance();
1063                        Ok(FrameBound::Following(n))
1064                    }
1065                    _ => Err("Expected PRECEDING or FOLLOWING after number".to_string()),
1066                }
1067            }
1068            _ => Err("Invalid window frame bound".to_string()),
1069        }
1070    }
1071
1072    fn parse_where_clause(&mut self) -> Result<WhereClause, String> {
1073        // Parse the entire WHERE clause as a single expression tree
1074        // The logical operators (AND/OR) are now handled within parse_expression
1075        let expr = self.parse_expression()?;
1076
1077        // Check for unexpected closing parenthesis
1078        if matches!(self.current_token, Token::RightParen) && self.paren_depth <= 0 {
1079            return Err(
1080                "Unexpected closing parenthesis - no matching opening parenthesis".to_string(),
1081            );
1082        }
1083
1084        // Create a single condition with the entire expression
1085        let conditions = vec![Condition {
1086            expr,
1087            connector: None,
1088        }];
1089
1090        Ok(WhereClause { conditions })
1091    }
1092
1093    fn parse_expression(&mut self) -> Result<SqlExpression, String> {
1094        self.trace_enter("parse_expression");
1095        // Start with logical OR as the lowest precedence operator
1096        // The hierarchy is: OR -> AND -> comparison -> additive -> multiplicative -> primary
1097        let mut left = self.parse_logical_or()?;
1098
1099        // Handle IN operator (not preceded by NOT)
1100        // This uses the modular comparison module
1101        left = parse_in_operator(self, left)?;
1102
1103        let result = Ok(left);
1104        self.trace_exit("parse_expression", &result);
1105        result
1106    }
1107
1108    fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
1109        // Use the new modular comparison expression parser
1110        parse_comparison_expr(self)
1111    }
1112
1113    fn parse_additive(&mut self) -> Result<SqlExpression, String> {
1114        // Use the new modular arithmetic expression parser
1115        parse_additive_expr(self)
1116    }
1117
1118    fn parse_multiplicative(&mut self) -> Result<SqlExpression, String> {
1119        // Use the new modular arithmetic expression parser
1120        parse_multiplicative_expr(self)
1121    }
1122
1123    fn parse_logical_or(&mut self) -> Result<SqlExpression, String> {
1124        // Use the new modular logical expression parser
1125        parse_logical_or_expr(self)
1126    }
1127
1128    fn parse_logical_and(&mut self) -> Result<SqlExpression, String> {
1129        // Use the new modular logical expression parser
1130        parse_logical_and_expr(self)
1131    }
1132
1133    fn parse_case_expression(&mut self) -> Result<SqlExpression, String> {
1134        // Use the new modular CASE expression parser
1135        parse_case_expr(self)
1136    }
1137
1138    fn parse_primary(&mut self) -> Result<SqlExpression, String> {
1139        // Use the new modular primary expression parser
1140        // Clone the necessary data to avoid borrowing issues
1141        let columns = self.columns.clone();
1142        let in_method_args = self.in_method_args;
1143        let ctx = PrimaryExpressionContext {
1144            columns: &columns,
1145            in_method_args,
1146        };
1147        parse_primary_expr(self, &ctx)
1148    }
1149
1150    // Keep the old implementation temporarily for reference (will be removed)
1151    fn parse_method_args(&mut self) -> Result<Vec<SqlExpression>, String> {
1152        // Set flag to indicate we're parsing method arguments
1153        self.in_method_args = true;
1154
1155        let args = self.parse_argument_list()?;
1156
1157        // Clear the flag
1158        self.in_method_args = false;
1159
1160        Ok(args)
1161    }
1162
1163    fn parse_function_args(&mut self) -> Result<(Vec<SqlExpression>, bool), String> {
1164        let mut args = Vec::new();
1165        let mut has_distinct = false;
1166
1167        if !matches!(self.current_token, Token::RightParen) {
1168            // Check if first argument starts with DISTINCT
1169            if matches!(self.current_token, Token::Distinct) {
1170                self.advance(); // consume DISTINCT
1171                has_distinct = true;
1172            }
1173
1174            // Parse the expression (either after DISTINCT or directly)
1175            args.push(self.parse_additive()?);
1176
1177            // Parse any remaining arguments (DISTINCT only applies to first arg for aggregates)
1178            while matches!(self.current_token, Token::Comma) {
1179                self.advance();
1180                args.push(self.parse_additive()?);
1181            }
1182        }
1183
1184        Ok((args, has_distinct))
1185    }
1186
1187    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
1188        let mut expressions = Vec::new();
1189
1190        loop {
1191            expressions.push(self.parse_expression()?);
1192
1193            if matches!(self.current_token, Token::Comma) {
1194                self.advance();
1195            } else {
1196                break;
1197            }
1198        }
1199
1200        Ok(expressions)
1201    }
1202
1203    #[must_use]
1204    pub fn get_position(&self) -> usize {
1205        self.lexer.get_position()
1206    }
1207
1208    // Check if current token is a JOIN-related token
1209    fn is_join_token(&self) -> bool {
1210        matches!(
1211            self.current_token,
1212            Token::Join | Token::Inner | Token::Left | Token::Right | Token::Full | Token::Cross
1213        )
1214    }
1215
1216    // Parse a JOIN clause
1217    fn parse_join_clause(&mut self) -> Result<JoinClause, String> {
1218        // Determine join type
1219        let join_type = match &self.current_token {
1220            Token::Join => {
1221                self.advance();
1222                JoinType::Inner // Default JOIN is INNER JOIN
1223            }
1224            Token::Inner => {
1225                self.advance();
1226                if !matches!(self.current_token, Token::Join) {
1227                    return Err("Expected JOIN after INNER".to_string());
1228                }
1229                self.advance();
1230                JoinType::Inner
1231            }
1232            Token::Left => {
1233                self.advance();
1234                // Handle optional OUTER keyword
1235                if matches!(self.current_token, Token::Outer) {
1236                    self.advance();
1237                }
1238                if !matches!(self.current_token, Token::Join) {
1239                    return Err("Expected JOIN after LEFT".to_string());
1240                }
1241                self.advance();
1242                JoinType::Left
1243            }
1244            Token::Right => {
1245                self.advance();
1246                // Handle optional OUTER keyword
1247                if matches!(self.current_token, Token::Outer) {
1248                    self.advance();
1249                }
1250                if !matches!(self.current_token, Token::Join) {
1251                    return Err("Expected JOIN after RIGHT".to_string());
1252                }
1253                self.advance();
1254                JoinType::Right
1255            }
1256            Token::Full => {
1257                self.advance();
1258                // Handle optional OUTER keyword
1259                if matches!(self.current_token, Token::Outer) {
1260                    self.advance();
1261                }
1262                if !matches!(self.current_token, Token::Join) {
1263                    return Err("Expected JOIN after FULL".to_string());
1264                }
1265                self.advance();
1266                JoinType::Full
1267            }
1268            Token::Cross => {
1269                self.advance();
1270                if !matches!(self.current_token, Token::Join) {
1271                    return Err("Expected JOIN after CROSS".to_string());
1272                }
1273                self.advance();
1274                JoinType::Cross
1275            }
1276            _ => return Err("Expected JOIN keyword".to_string()),
1277        };
1278
1279        // Parse the table being joined
1280        let (table, alias) = self.parse_join_table_source()?;
1281
1282        // Parse ON condition (required for all joins except CROSS JOIN)
1283        let condition = if join_type == JoinType::Cross {
1284            // CROSS JOIN doesn't have ON condition - create empty condition
1285            JoinCondition { conditions: vec![] }
1286        } else {
1287            if !matches!(self.current_token, Token::On) {
1288                return Err("Expected ON keyword after JOIN table".to_string());
1289            }
1290            self.advance();
1291            self.parse_join_condition()?
1292        };
1293
1294        Ok(JoinClause {
1295            join_type,
1296            table,
1297            alias,
1298            condition,
1299        })
1300    }
1301
1302    fn parse_join_table_source(&mut self) -> Result<(TableSource, Option<String>), String> {
1303        let table = match &self.current_token {
1304            Token::Identifier(name) => {
1305                let table_name = name.clone();
1306                self.advance();
1307                TableSource::Table(table_name)
1308            }
1309            Token::LeftParen => {
1310                // Subquery as table source
1311                self.advance();
1312                let subquery = self.parse_select_statement_inner()?;
1313                if !matches!(self.current_token, Token::RightParen) {
1314                    return Err("Expected ')' after subquery".to_string());
1315                }
1316                self.advance();
1317
1318                // Subqueries must have an alias
1319                let alias = match &self.current_token {
1320                    Token::Identifier(alias_name) => {
1321                        let alias = alias_name.clone();
1322                        self.advance();
1323                        alias
1324                    }
1325                    Token::As => {
1326                        self.advance();
1327                        match &self.current_token {
1328                            Token::Identifier(alias_name) => {
1329                                let alias = alias_name.clone();
1330                                self.advance();
1331                                alias
1332                            }
1333                            _ => return Err("Expected alias after AS keyword".to_string()),
1334                        }
1335                    }
1336                    _ => return Err("Subqueries must have an alias".to_string()),
1337                };
1338
1339                return Ok((
1340                    TableSource::DerivedTable {
1341                        query: Box::new(subquery),
1342                        alias: alias.clone(),
1343                    },
1344                    Some(alias),
1345                ));
1346            }
1347            _ => return Err("Expected table name or subquery in JOIN clause".to_string()),
1348        };
1349
1350        // Check for optional alias
1351        let alias = match &self.current_token {
1352            Token::Identifier(alias_name) => {
1353                let alias = alias_name.clone();
1354                self.advance();
1355                Some(alias)
1356            }
1357            Token::As => {
1358                self.advance();
1359                match &self.current_token {
1360                    Token::Identifier(alias_name) => {
1361                        let alias = alias_name.clone();
1362                        self.advance();
1363                        Some(alias)
1364                    }
1365                    _ => return Err("Expected alias after AS keyword".to_string()),
1366                }
1367            }
1368            _ => None,
1369        };
1370
1371        Ok((table, alias))
1372    }
1373
1374    fn parse_join_condition(&mut self) -> Result<JoinCondition, String> {
1375        let mut conditions = Vec::new();
1376
1377        // Parse first condition
1378        conditions.push(self.parse_single_join_condition()?);
1379
1380        // Parse additional conditions connected by AND
1381        while matches!(self.current_token, Token::And) {
1382            self.advance(); // consume AND
1383            conditions.push(self.parse_single_join_condition()?);
1384        }
1385
1386        Ok(JoinCondition { conditions })
1387    }
1388
1389    fn parse_single_join_condition(&mut self) -> Result<SingleJoinCondition, String> {
1390        // Parse left column (can include table prefix)
1391        let left_column = self.parse_column_reference()?;
1392
1393        // Parse operator
1394        let operator = match &self.current_token {
1395            Token::Equal => JoinOperator::Equal,
1396            Token::NotEqual => JoinOperator::NotEqual,
1397            Token::LessThan => JoinOperator::LessThan,
1398            Token::LessThanOrEqual => JoinOperator::LessThanOrEqual,
1399            Token::GreaterThan => JoinOperator::GreaterThan,
1400            Token::GreaterThanOrEqual => JoinOperator::GreaterThanOrEqual,
1401            _ => return Err("Expected comparison operator in JOIN condition".to_string()),
1402        };
1403        self.advance();
1404
1405        // Parse right column (can include table prefix)
1406        let right_column = self.parse_column_reference()?;
1407
1408        Ok(SingleJoinCondition {
1409            left_column,
1410            operator,
1411            right_column,
1412        })
1413    }
1414
1415    fn parse_column_reference(&mut self) -> Result<String, String> {
1416        match &self.current_token {
1417            Token::Identifier(name) => {
1418                let mut column_ref = name.clone();
1419                self.advance();
1420
1421                // Check for table.column notation
1422                if matches!(self.current_token, Token::Dot) {
1423                    self.advance();
1424                    match &self.current_token {
1425                        Token::Identifier(col_name) => {
1426                            column_ref.push('.');
1427                            column_ref.push_str(col_name);
1428                            self.advance();
1429                        }
1430                        _ => return Err("Expected column name after '.'".to_string()),
1431                    }
1432                }
1433
1434                Ok(column_ref)
1435            }
1436            _ => Err("Expected column reference".to_string()),
1437        }
1438    }
1439}
1440
1441// Context detection for cursor position
1442#[derive(Debug, Clone)]
1443pub enum CursorContext {
1444    SelectClause,
1445    FromClause,
1446    WhereClause,
1447    OrderByClause,
1448    AfterColumn(String),
1449    AfterLogicalOp(LogicalOp),
1450    AfterComparisonOp(String, String), // column_name, operator
1451    InMethodCall(String, String),      // object, method
1452    InExpression,
1453    Unknown,
1454}
1455
1456/// Safe UTF-8 string slicing that ensures we don't slice in the middle of a character
1457fn safe_slice_to(s: &str, pos: usize) -> &str {
1458    if pos >= s.len() {
1459        return s;
1460    }
1461
1462    // Find the nearest valid character boundary at or before pos
1463    let mut safe_pos = pos;
1464    while safe_pos > 0 && !s.is_char_boundary(safe_pos) {
1465        safe_pos -= 1;
1466    }
1467
1468    &s[..safe_pos]
1469}
1470
1471/// Safe UTF-8 string slicing from a position to the end
1472fn safe_slice_from(s: &str, pos: usize) -> &str {
1473    if pos >= s.len() {
1474        return "";
1475    }
1476
1477    // Find the nearest valid character boundary at or after pos
1478    let mut safe_pos = pos;
1479    while safe_pos < s.len() && !s.is_char_boundary(safe_pos) {
1480        safe_pos += 1;
1481    }
1482
1483    &s[safe_pos..]
1484}
1485
1486#[must_use]
1487pub fn detect_cursor_context(query: &str, cursor_pos: usize) -> (CursorContext, Option<String>) {
1488    let truncated = safe_slice_to(query, cursor_pos);
1489    let mut parser = Parser::new(truncated);
1490
1491    // Try to parse as much as possible
1492    if let Ok(stmt) = parser.parse() {
1493        let (ctx, partial) = analyze_statement(&stmt, truncated, cursor_pos);
1494        #[cfg(test)]
1495        println!("analyze_statement returned: {ctx:?}, {partial:?} for query: '{truncated}'");
1496        (ctx, partial)
1497    } else {
1498        // Partial parse - analyze what we have
1499        let (ctx, partial) = analyze_partial(truncated, cursor_pos);
1500        #[cfg(test)]
1501        println!("analyze_partial returned: {ctx:?}, {partial:?} for query: '{truncated}'");
1502        (ctx, partial)
1503    }
1504}
1505
1506#[must_use]
1507pub fn tokenize_query(query: &str) -> Vec<String> {
1508    let mut lexer = Lexer::new(query);
1509    let tokens = lexer.tokenize_all();
1510    tokens.iter().map(|t| format!("{t:?}")).collect()
1511}
1512
1513#[must_use]
1514/// Helper function to find the start of a quoted string searching backwards
1515fn find_quote_start(bytes: &[u8], mut pos: usize) -> Option<usize> {
1516    // Skip the closing quote and search backwards
1517    if pos > 0 {
1518        pos -= 1;
1519        while pos > 0 {
1520            if bytes[pos] == b'"' {
1521                // Check if it's not an escaped quote
1522                if pos == 0 || bytes[pos - 1] != b'\\' {
1523                    return Some(pos);
1524                }
1525            }
1526            pos -= 1;
1527        }
1528        // Check position 0 separately
1529        if bytes[0] == b'"' {
1530            return Some(0);
1531        }
1532    }
1533    None
1534}
1535
1536/// Helper function to handle method call context after validation
1537fn handle_method_call_context(col_name: &str, after_dot: &str) -> (CursorContext, Option<String>) {
1538    // Check if there's a partial method name after the dot
1539    let partial_method = if after_dot.is_empty() {
1540        None
1541    } else if after_dot.chars().all(|c| c.is_alphanumeric() || c == '_') {
1542        Some(after_dot.to_string())
1543    } else {
1544        None
1545    };
1546
1547    // For AfterColumn context, strip quotes if present for consistency
1548    let col_name_for_context =
1549        if col_name.starts_with('"') && col_name.ends_with('"') && col_name.len() > 2 {
1550            col_name[1..col_name.len() - 1].to_string()
1551        } else {
1552            col_name.to_string()
1553        };
1554
1555    (
1556        CursorContext::AfterColumn(col_name_for_context),
1557        partial_method,
1558    )
1559}
1560
1561/// Helper function to check if we're after a comparison operator
1562fn check_after_comparison_operator(query: &str) -> Option<(CursorContext, Option<String>)> {
1563    for op in &Parser::COMPARISON_OPERATORS {
1564        if let Some(op_pos) = query.rfind(op) {
1565            let before_op = safe_slice_to(query, op_pos);
1566            let after_op_start = op_pos + op.len();
1567            let after_op = if after_op_start < query.len() {
1568                &query[after_op_start..]
1569            } else {
1570                ""
1571            };
1572
1573            // Check if we have a column name before the operator
1574            if let Some(col_name) = before_op.split_whitespace().last() {
1575                if col_name.chars().all(|c| c.is_alphanumeric() || c == '_') {
1576                    // Check if we're at or near the end of the query
1577                    let after_op_trimmed = after_op.trim();
1578                    if after_op_trimmed.is_empty()
1579                        || (after_op_trimmed
1580                            .chars()
1581                            .all(|c| c.is_alphanumeric() || c == '_')
1582                            && !after_op_trimmed.contains('('))
1583                    {
1584                        let partial = if after_op_trimmed.is_empty() {
1585                            None
1586                        } else {
1587                            Some(after_op_trimmed.to_string())
1588                        };
1589                        return Some((
1590                            CursorContext::AfterComparisonOp(
1591                                col_name.to_string(),
1592                                op.trim().to_string(),
1593                            ),
1594                            partial,
1595                        ));
1596                    }
1597                }
1598            }
1599        }
1600    }
1601    None
1602}
1603
1604fn analyze_statement(
1605    stmt: &SelectStatement,
1606    query: &str,
1607    _cursor_pos: usize,
1608) -> (CursorContext, Option<String>) {
1609    // First check for method call context (e.g., "columnName." or "columnName.Con")
1610    let trimmed = query.trim();
1611
1612    // Check if we're after a comparison operator (e.g., "createdDate > ")
1613    if let Some(result) = check_after_comparison_operator(query) {
1614        return result;
1615    }
1616
1617    // First check if we're after AND/OR - this takes precedence
1618    // Helper function to check if string ends with a logical operator
1619    let ends_with_logical_op = |s: &str| -> bool {
1620        let s_upper = s.to_uppercase();
1621        s_upper.ends_with(" AND") || s_upper.ends_with(" OR")
1622    };
1623
1624    if ends_with_logical_op(trimmed) {
1625        // Don't check for method context if we're clearly after a logical operator
1626    } else {
1627        // Look for the last dot in the query
1628        if let Some(dot_pos) = trimmed.rfind('.') {
1629            // Check if we're after a column name and dot
1630            let before_dot = safe_slice_to(trimmed, dot_pos);
1631            let after_dot_start = dot_pos + 1;
1632            let after_dot = if after_dot_start < trimmed.len() {
1633                &trimmed[after_dot_start..]
1634            } else {
1635                ""
1636            };
1637
1638            // Check if the part after dot looks like an incomplete method call
1639            // (not a complete method call like "Contains(...)")
1640            if !after_dot.contains('(') {
1641                // Try to extract the column name - could be quoted or regular
1642                let col_name = if before_dot.ends_with('"') {
1643                    // Handle quoted identifier - search backwards for matching opening quote
1644                    let bytes = before_dot.as_bytes();
1645                    let pos = before_dot.len() - 1; // Position of closing quote
1646
1647                    find_quote_start(bytes, pos).map(|start| safe_slice_from(before_dot, start))
1648                } else {
1649                    // Regular identifier - get the last word, handling parentheses
1650                    // Strip all leading parentheses
1651                    before_dot
1652                        .split_whitespace()
1653                        .last()
1654                        .map(|word| word.trim_start_matches('('))
1655                };
1656
1657                if let Some(col_name) = col_name {
1658                    // For quoted identifiers, keep the quotes, for regular identifiers check validity
1659                    let is_valid = Parser::is_valid_identifier(col_name);
1660
1661                    if is_valid {
1662                        return handle_method_call_context(col_name, after_dot);
1663                    }
1664                }
1665            }
1666        }
1667    }
1668
1669    // Check if we're in WHERE clause
1670    if let Some(where_clause) = &stmt.where_clause {
1671        // Check if query ends with AND/OR (with or without trailing space/partial)
1672        let trimmed_upper = trimmed.to_uppercase();
1673        if trimmed_upper.ends_with(" AND") || trimmed_upper.ends_with(" OR") {
1674            let op = if trimmed_upper.ends_with(" AND") {
1675                LogicalOp::And
1676            } else {
1677                LogicalOp::Or
1678            };
1679            return (CursorContext::AfterLogicalOp(op), None);
1680        }
1681
1682        // Check if we have AND/OR followed by a partial word
1683        let query_upper = query.to_uppercase();
1684        if let Some(and_pos) = query_upper.rfind(" AND ") {
1685            let after_and = safe_slice_from(query, and_pos + 5);
1686            let partial = extract_partial_at_end(after_and);
1687            if partial.is_some() {
1688                return (CursorContext::AfterLogicalOp(LogicalOp::And), partial);
1689            }
1690        }
1691
1692        if let Some(or_pos) = query_upper.rfind(" OR ") {
1693            let after_or = safe_slice_from(query, or_pos + 4);
1694            let partial = extract_partial_at_end(after_or);
1695            if partial.is_some() {
1696                return (CursorContext::AfterLogicalOp(LogicalOp::Or), partial);
1697            }
1698        }
1699
1700        if let Some(last_condition) = where_clause.conditions.last() {
1701            if let Some(connector) = &last_condition.connector {
1702                // We're after AND/OR
1703                return (
1704                    CursorContext::AfterLogicalOp(connector.clone()),
1705                    extract_partial_at_end(query),
1706                );
1707            }
1708        }
1709        // We're in WHERE clause but not after AND/OR
1710        return (CursorContext::WhereClause, extract_partial_at_end(query));
1711    }
1712
1713    // Check if we're after ORDER BY
1714    let query_upper = query.to_uppercase();
1715    if query_upper.ends_with(" ORDER BY") {
1716        return (CursorContext::OrderByClause, None);
1717    }
1718
1719    // Check other contexts based on what's in the statement
1720    if stmt.order_by.is_some() {
1721        return (CursorContext::OrderByClause, extract_partial_at_end(query));
1722    }
1723
1724    if stmt.from_table.is_some() && stmt.where_clause.is_none() && stmt.order_by.is_none() {
1725        return (CursorContext::FromClause, extract_partial_at_end(query));
1726    }
1727
1728    if !stmt.columns.is_empty() && stmt.from_table.is_none() {
1729        return (CursorContext::SelectClause, extract_partial_at_end(query));
1730    }
1731
1732    (CursorContext::Unknown, None)
1733}
1734
1735/// Helper function to find the last occurrence of a token type in the token stream
1736fn find_last_token(tokens: &[(usize, usize, Token)], target: &Token) -> Option<usize> {
1737    tokens
1738        .iter()
1739        .rposition(|(_, _, t)| t == target)
1740        .map(|idx| tokens[idx].0)
1741}
1742
1743/// Helper function to find the last occurrence of any matching token
1744fn find_last_matching_token<F>(
1745    tokens: &[(usize, usize, Token)],
1746    predicate: F,
1747) -> Option<(usize, &Token)>
1748where
1749    F: Fn(&Token) -> bool,
1750{
1751    tokens
1752        .iter()
1753        .rposition(|(_, _, t)| predicate(t))
1754        .map(|idx| (tokens[idx].0, &tokens[idx].2))
1755}
1756
1757/// Helper function to check if we're in a specific clause based on tokens
1758fn is_in_clause(
1759    tokens: &[(usize, usize, Token)],
1760    clause_token: Token,
1761    exclude_tokens: &[Token],
1762) -> bool {
1763    // Find the last occurrence of the clause token
1764    if let Some(clause_pos) = find_last_token(tokens, &clause_token) {
1765        // Check if any exclude tokens appear after it
1766        for (pos, _, token) in tokens.iter() {
1767            if *pos > clause_pos && exclude_tokens.contains(token) {
1768                return false;
1769            }
1770        }
1771        return true;
1772    }
1773    false
1774}
1775
1776fn analyze_partial(query: &str, cursor_pos: usize) -> (CursorContext, Option<String>) {
1777    // Tokenize the query up to cursor position
1778    let mut lexer = Lexer::new(query);
1779    let tokens = lexer.tokenize_all_with_positions();
1780
1781    let trimmed = query.trim();
1782
1783    #[cfg(test)]
1784    {
1785        if trimmed.contains("\"Last Name\"") {
1786            eprintln!("DEBUG analyze_partial: query='{query}', trimmed='{trimmed}'");
1787        }
1788    }
1789
1790    // Check if we're after a comparison operator (e.g., "createdDate > ")
1791    if let Some(result) = check_after_comparison_operator(query) {
1792        return result;
1793    }
1794
1795    // Look for the last dot in the query (method call context) - check this FIRST
1796    // before AND/OR detection to properly handle cases like "AND (Country."
1797    if let Some(dot_pos) = trimmed.rfind('.') {
1798        #[cfg(test)]
1799        {
1800            if trimmed.contains("\"Last Name\"") {
1801                eprintln!("DEBUG: Found dot at position {dot_pos}");
1802            }
1803        }
1804        // Check if we're after a column name and dot
1805        let before_dot = &trimmed[..dot_pos];
1806        let after_dot = &trimmed[dot_pos + 1..];
1807
1808        // Check if the part after dot looks like an incomplete method call
1809        // (not a complete method call like "Contains(...)")
1810        if !after_dot.contains('(') {
1811            // Try to extract the column name before the dot
1812            // It could be a quoted identifier like "Last Name" or a regular identifier
1813            let col_name = if before_dot.ends_with('"') {
1814                // Handle quoted identifier - search backwards for matching opening quote
1815                let bytes = before_dot.as_bytes();
1816                let pos = before_dot.len() - 1; // Position of closing quote
1817
1818                #[cfg(test)]
1819                {
1820                    if trimmed.contains("\"Last Name\"") {
1821                        eprintln!("DEBUG: before_dot='{before_dot}', looking for opening quote");
1822                    }
1823                }
1824
1825                let found_start = find_quote_start(bytes, pos);
1826
1827                if let Some(start) = found_start {
1828                    // Extract the full quoted identifier including quotes
1829                    let result = safe_slice_from(before_dot, start);
1830                    #[cfg(test)]
1831                    {
1832                        if trimmed.contains("\"Last Name\"") {
1833                            eprintln!("DEBUG: Extracted quoted identifier: '{result}'");
1834                        }
1835                    }
1836                    Some(result)
1837                } else {
1838                    #[cfg(test)]
1839                    {
1840                        if trimmed.contains("\"Last Name\"") {
1841                            eprintln!("DEBUG: No opening quote found!");
1842                        }
1843                    }
1844                    None
1845                }
1846            } else {
1847                // Regular identifier - get the last word, handling parentheses
1848                // Strip all leading parentheses
1849                before_dot
1850                    .split_whitespace()
1851                    .last()
1852                    .map(|word| word.trim_start_matches('('))
1853            };
1854
1855            if let Some(col_name) = col_name {
1856                #[cfg(test)]
1857                {
1858                    if trimmed.contains("\"Last Name\"") {
1859                        eprintln!("DEBUG: col_name = '{col_name}'");
1860                    }
1861                }
1862
1863                // For quoted identifiers, keep the quotes, for regular identifiers check validity
1864                let is_valid = Parser::is_valid_identifier(col_name);
1865
1866                #[cfg(test)]
1867                {
1868                    if trimmed.contains("\"Last Name\"") {
1869                        eprintln!("DEBUG: is_valid = {is_valid}");
1870                    }
1871                }
1872
1873                if is_valid {
1874                    return handle_method_call_context(col_name, after_dot);
1875                }
1876            }
1877        }
1878    }
1879
1880    // Check if we're after AND/OR using tokens - but only after checking for method calls
1881    if let Some((pos, token)) =
1882        find_last_matching_token(&tokens, |t| matches!(t, Token::And | Token::Or))
1883    {
1884        // Check if cursor is after the logical operator
1885        let token_end_pos = if matches!(token, Token::And) {
1886            pos + 3 // "AND" is 3 characters
1887        } else {
1888            pos + 2 // "OR" is 2 characters
1889        };
1890
1891        if cursor_pos > token_end_pos {
1892            // Extract any partial word after the operator
1893            let after_op = safe_slice_from(query, token_end_pos + 1); // +1 for the space
1894            let partial = extract_partial_at_end(after_op);
1895            let op = if matches!(token, Token::And) {
1896                LogicalOp::And
1897            } else {
1898                LogicalOp::Or
1899            };
1900            return (CursorContext::AfterLogicalOp(op), partial);
1901        }
1902    }
1903
1904    // Check if the last token is AND or OR (handles case where it's at the very end)
1905    if let Some((_, _, last_token)) = tokens.last() {
1906        if matches!(last_token, Token::And | Token::Or) {
1907            let op = if matches!(last_token, Token::And) {
1908                LogicalOp::And
1909            } else {
1910                LogicalOp::Or
1911            };
1912            return (CursorContext::AfterLogicalOp(op), None);
1913        }
1914    }
1915
1916    // Check if we're in ORDER BY clause using tokens
1917    if let Some(order_pos) = find_last_token(&tokens, &Token::OrderBy) {
1918        // Check if there's a BY token after ORDER
1919        let has_by = tokens
1920            .iter()
1921            .any(|(pos, _, t)| *pos > order_pos && matches!(t, Token::By));
1922        if has_by
1923            || tokens
1924                .last()
1925                .map_or(false, |(_, _, t)| matches!(t, Token::OrderBy))
1926        {
1927            return (CursorContext::OrderByClause, extract_partial_at_end(query));
1928        }
1929    }
1930
1931    // Check if we're in WHERE clause using tokens
1932    if is_in_clause(&tokens, Token::Where, &[Token::OrderBy, Token::GroupBy]) {
1933        return (CursorContext::WhereClause, extract_partial_at_end(query));
1934    }
1935
1936    // Check if we're in FROM clause using tokens
1937    if is_in_clause(
1938        &tokens,
1939        Token::From,
1940        &[Token::Where, Token::OrderBy, Token::GroupBy],
1941    ) {
1942        return (CursorContext::FromClause, extract_partial_at_end(query));
1943    }
1944
1945    // Check if we're in SELECT clause using tokens
1946    if find_last_token(&tokens, &Token::Select).is_some()
1947        && find_last_token(&tokens, &Token::From).is_none()
1948    {
1949        return (CursorContext::SelectClause, extract_partial_at_end(query));
1950    }
1951
1952    (CursorContext::Unknown, None)
1953}
1954
1955fn extract_partial_at_end(query: &str) -> Option<String> {
1956    let trimmed = query.trim();
1957
1958    // First check if the last word itself starts with a quote (unclosed quoted identifier being typed)
1959    if let Some(last_word) = trimmed.split_whitespace().last() {
1960        if last_word.starts_with('"') && !last_word.ends_with('"') {
1961            // This is an unclosed quoted identifier like "Cust
1962            return Some(last_word.to_string());
1963        }
1964    }
1965
1966    // Regular identifier extraction
1967    let last_word = trimmed.split_whitespace().last()?;
1968
1969    // Check if it's a partial identifier (not a keyword or operator)
1970    // First check if it's alphanumeric (potential identifier)
1971    if last_word.chars().all(|c| c.is_alphanumeric() || c == '_') {
1972        // Use lexer to determine if it's a keyword or identifier
1973        if !is_sql_keyword(last_word) {
1974            Some(last_word.to_string())
1975        } else {
1976            None
1977        }
1978    } else {
1979        None
1980    }
1981}
1982
1983// Implement the ParsePrimary trait for Parser to use the modular expression parsing
1984impl ParsePrimary for Parser {
1985    fn current_token(&self) -> &Token {
1986        &self.current_token
1987    }
1988
1989    fn advance(&mut self) {
1990        self.advance();
1991    }
1992
1993    fn consume(&mut self, expected: Token) -> Result<(), String> {
1994        self.consume(expected)
1995    }
1996
1997    fn parse_case_expression(&mut self) -> Result<SqlExpression, String> {
1998        self.parse_case_expression()
1999    }
2000
2001    fn parse_function_args(&mut self) -> Result<(Vec<SqlExpression>, bool), String> {
2002        self.parse_function_args()
2003    }
2004
2005    fn parse_window_spec(&mut self) -> Result<WindowSpec, String> {
2006        self.parse_window_spec()
2007    }
2008
2009    fn parse_logical_or(&mut self) -> Result<SqlExpression, String> {
2010        self.parse_logical_or()
2011    }
2012
2013    fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
2014        self.parse_comparison()
2015    }
2016
2017    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2018        self.parse_expression_list()
2019    }
2020
2021    fn parse_subquery(&mut self) -> Result<SelectStatement, String> {
2022        // Parse subquery without parenthesis balance validation
2023        if matches!(self.current_token, Token::With) {
2024            self.parse_with_clause_inner()
2025        } else {
2026            self.parse_select_statement_inner()
2027        }
2028    }
2029}
2030
2031// Implement the ExpressionParser trait for Parser to use the modular expression parsing
2032impl ExpressionParser for Parser {
2033    fn current_token(&self) -> &Token {
2034        &self.current_token
2035    }
2036
2037    fn advance(&mut self) {
2038        // Call the main advance method directly to avoid recursion
2039        match &self.current_token {
2040            Token::LeftParen => self.paren_depth += 1,
2041            Token::RightParen => {
2042                self.paren_depth -= 1;
2043            }
2044            _ => {}
2045        }
2046        self.current_token = self.lexer.next_token();
2047    }
2048
2049    fn peek(&self) -> Option<&Token> {
2050        // We can't return a reference to a token from a temporary lexer,
2051        // so we need a different approach. For now, let's use a workaround
2052        // that checks the next token type without consuming it.
2053        // This is a limitation of the current design.
2054        // A proper fix would be to store the peeked token in the Parser struct.
2055        None // TODO: Implement proper lookahead
2056    }
2057
2058    fn is_at_end(&self) -> bool {
2059        matches!(self.current_token, Token::Eof)
2060    }
2061
2062    fn consume(&mut self, expected: Token) -> Result<(), String> {
2063        // Call the main consume method to avoid recursion
2064        if std::mem::discriminant(&self.current_token) == std::mem::discriminant(&expected) {
2065            self.update_paren_depth(&expected)?;
2066            self.current_token = self.lexer.next_token();
2067            Ok(())
2068        } else {
2069            Err(format!(
2070                "Expected {:?}, found {:?}",
2071                expected, self.current_token
2072            ))
2073        }
2074    }
2075
2076    fn parse_identifier(&mut self) -> Result<String, String> {
2077        if let Token::Identifier(id) = &self.current_token {
2078            let id = id.clone();
2079            self.advance();
2080            Ok(id)
2081        } else {
2082            Err(format!(
2083                "Expected identifier, found {:?}",
2084                self.current_token
2085            ))
2086        }
2087    }
2088}
2089
2090// Implement the ParseArithmetic trait for Parser to use the modular arithmetic parsing
2091impl ParseArithmetic for Parser {
2092    fn current_token(&self) -> &Token {
2093        &self.current_token
2094    }
2095
2096    fn advance(&mut self) {
2097        self.advance();
2098    }
2099
2100    fn consume(&mut self, expected: Token) -> Result<(), String> {
2101        self.consume(expected)
2102    }
2103
2104    fn parse_primary(&mut self) -> Result<SqlExpression, String> {
2105        self.parse_primary()
2106    }
2107
2108    fn parse_multiplicative(&mut self) -> Result<SqlExpression, String> {
2109        self.parse_multiplicative()
2110    }
2111
2112    fn parse_method_args(&mut self) -> Result<Vec<SqlExpression>, String> {
2113        self.parse_method_args()
2114    }
2115}
2116
2117// Implement the ParseComparison trait for Parser to use the modular comparison parsing
2118impl ParseComparison for Parser {
2119    fn current_token(&self) -> &Token {
2120        &self.current_token
2121    }
2122
2123    fn advance(&mut self) {
2124        self.advance();
2125    }
2126
2127    fn consume(&mut self, expected: Token) -> Result<(), String> {
2128        self.consume(expected)
2129    }
2130
2131    fn parse_primary(&mut self) -> Result<SqlExpression, String> {
2132        self.parse_primary()
2133    }
2134
2135    fn parse_additive(&mut self) -> Result<SqlExpression, String> {
2136        self.parse_additive()
2137    }
2138
2139    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2140        self.parse_expression_list()
2141    }
2142
2143    fn parse_subquery(&mut self) -> Result<SelectStatement, String> {
2144        // Parse subquery without parenthesis balance validation
2145        if matches!(self.current_token, Token::With) {
2146            self.parse_with_clause_inner()
2147        } else {
2148            self.parse_select_statement_inner()
2149        }
2150    }
2151}
2152
2153// Implement the ParseLogical trait for Parser to use the modular logical parsing
2154impl ParseLogical for Parser {
2155    fn current_token(&self) -> &Token {
2156        &self.current_token
2157    }
2158
2159    fn advance(&mut self) {
2160        self.advance();
2161    }
2162
2163    fn consume(&mut self, expected: Token) -> Result<(), String> {
2164        self.consume(expected)
2165    }
2166
2167    fn parse_logical_and(&mut self) -> Result<SqlExpression, String> {
2168        self.parse_logical_and()
2169    }
2170
2171    fn parse_base_logical_expression(&mut self) -> Result<SqlExpression, String> {
2172        // This is the base for logical AND - it should parse comparison expressions
2173        // to avoid infinite recursion with parse_expression
2174        self.parse_comparison()
2175    }
2176
2177    fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
2178        self.parse_comparison()
2179    }
2180
2181    fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2182        self.parse_expression_list()
2183    }
2184}
2185
2186// Implement the ParseCase trait for Parser to use the modular CASE parsing
2187impl ParseCase for Parser {
2188    fn current_token(&self) -> &Token {
2189        &self.current_token
2190    }
2191
2192    fn advance(&mut self) {
2193        self.advance();
2194    }
2195
2196    fn consume(&mut self, expected: Token) -> Result<(), String> {
2197        self.consume(expected)
2198    }
2199
2200    fn parse_expression(&mut self) -> Result<SqlExpression, String> {
2201        self.parse_expression()
2202    }
2203}
2204
2205fn is_sql_keyword(word: &str) -> bool {
2206    // Use the lexer to check if this word produces a keyword token
2207    let mut lexer = Lexer::new(word);
2208    let token = lexer.next_token();
2209
2210    // Check if it's a keyword token (not an identifier)
2211    !matches!(token, Token::Identifier(_) | Token::Eof)
2212}