1pub use super::parser::ast::{
5 CTEType, Comment, Condition, DataFormat, FrameBound, FrameUnit, HttpMethod, IntoTable,
6 JoinClause, JoinCondition, JoinOperator, JoinType, LogicalOp, OrderByColumn, OrderByItem,
7 SelectItem, SelectStatement, SetOperation, SingleJoinCondition, SortDirection, SqlExpression,
8 TableFunction, TableSource, WebCTESpec, WhenBranch, WhereClause, WindowFrame, WindowSpec, CTE,
9};
10pub use super::parser::legacy::{ParseContext, ParseState, Schema, SqlParser, SqlToken, TableInfo};
11pub use super::parser::lexer::{Lexer, LexerMode, Token};
12pub use super::parser::ParserConfig;
13
14pub use super::parser::formatter::{format_ast_tree, format_sql_pretty, format_sql_pretty_compact};
16
17pub use super::parser::ast_formatter::{format_sql_ast, format_sql_ast_with_config, FormatConfig};
19
20use super::parser::expressions::arithmetic::{
22 parse_additive as parse_additive_expr, parse_multiplicative as parse_multiplicative_expr,
23 ParseArithmetic,
24};
25use super::parser::expressions::case::{parse_case_expression as parse_case_expr, ParseCase};
26use super::parser::expressions::comparison::{
27 parse_comparison as parse_comparison_expr, parse_in_operator, ParseComparison,
28};
29use super::parser::expressions::logical::{
30 parse_logical_and as parse_logical_and_expr, parse_logical_or as parse_logical_or_expr,
31 ParseLogical,
32};
33use super::parser::expressions::primary::{
34 parse_primary as parse_primary_expr, ParsePrimary, PrimaryExpressionContext,
35};
36use super::parser::expressions::ExpressionParser;
37
38use crate::sql::functions::{FunctionCategory, FunctionRegistry};
40use crate::sql::generators::GeneratorRegistry;
41use std::sync::Arc;
42
43use super::parser::web_cte_parser::WebCteParser;
45
46#[derive(Debug, Clone, Copy, PartialEq)]
48pub enum ParserMode {
49 Standard,
51 PreserveComments,
53}
54
55impl Default for ParserMode {
56 fn default() -> Self {
57 ParserMode::Standard
58 }
59}
60
61pub struct Parser {
62 lexer: Lexer,
63 pub current_token: Token, in_method_args: bool, columns: Vec<String>, paren_depth: i32, paren_depth_stack: Vec<i32>, _config: ParserConfig, debug_trace: bool, trace_depth: usize, function_registry: Arc<FunctionRegistry>, generator_registry: Arc<GeneratorRegistry>, mode: ParserMode, }
75
76impl Parser {
77 #[must_use]
78 pub fn new(input: &str) -> Self {
79 Self::with_mode(input, ParserMode::default())
80 }
81
82 #[must_use]
84 pub fn with_mode(input: &str, mode: ParserMode) -> Self {
85 let lexer_mode = match mode {
87 ParserMode::Standard => LexerMode::SkipComments,
88 ParserMode::PreserveComments => LexerMode::PreserveComments,
89 };
90
91 let mut lexer = Lexer::with_mode(input, lexer_mode);
92 let current_token = lexer.next_token();
93 Self {
94 lexer,
95 current_token,
96 in_method_args: false,
97 columns: Vec::new(),
98 paren_depth: 0,
99 paren_depth_stack: Vec::new(),
100 _config: ParserConfig::default(),
101 debug_trace: false,
102 trace_depth: 0,
103 function_registry: Arc::new(FunctionRegistry::new()),
104 generator_registry: Arc::new(GeneratorRegistry::new()),
105 mode,
106 }
107 }
108
109 #[must_use]
110 pub fn with_config(input: &str, config: ParserConfig) -> Self {
111 let mut lexer = Lexer::new(input);
112 let current_token = lexer.next_token();
113 Self {
114 lexer,
115 current_token,
116 in_method_args: false,
117 columns: Vec::new(),
118 paren_depth: 0,
119 paren_depth_stack: Vec::new(),
120 _config: config,
121 debug_trace: false,
122 trace_depth: 0,
123 function_registry: Arc::new(FunctionRegistry::new()),
124 generator_registry: Arc::new(GeneratorRegistry::new()),
125 mode: ParserMode::default(),
126 }
127 }
128
129 #[must_use]
130 pub fn with_columns(mut self, columns: Vec<String>) -> Self {
131 self.columns = columns;
132 self
133 }
134
135 #[must_use]
136 pub fn with_debug_trace(mut self, enabled: bool) -> Self {
137 self.debug_trace = enabled;
138 self
139 }
140
141 #[must_use]
142 pub fn with_function_registry(mut self, registry: Arc<FunctionRegistry>) -> Self {
143 self.function_registry = registry;
144 self
145 }
146
147 #[must_use]
148 pub fn with_generator_registry(mut self, registry: Arc<GeneratorRegistry>) -> Self {
149 self.generator_registry = registry;
150 self
151 }
152
153 fn trace_enter(&mut self, context: &str) {
154 if self.debug_trace {
155 let indent = " ".repeat(self.trace_depth);
156 eprintln!("{}→ {} | Token: {:?}", indent, context, self.current_token);
157 self.trace_depth += 1;
158 }
159 }
160
161 fn trace_exit(&mut self, context: &str, result: &Result<impl std::fmt::Debug, String>) {
162 if self.debug_trace {
163 self.trace_depth = self.trace_depth.saturating_sub(1);
164 let indent = " ".repeat(self.trace_depth);
165 match result {
166 Ok(val) => eprintln!("{}← {} ✓ | Result: {:?}", indent, context, val),
167 Err(e) => eprintln!("{}← {} ✗ | Error: {}", indent, context, e),
168 }
169 }
170 }
171
172 fn trace_token(&self, action: &str) {
173 if self.debug_trace {
174 let indent = " ".repeat(self.trace_depth);
175 eprintln!("{} {} | Token: {:?}", indent, action, self.current_token);
176 }
177 }
178
179 #[allow(dead_code)]
180 fn peek_token(&self) -> Option<Token> {
181 let mut temp_lexer = self.lexer.clone();
183 let next_token = temp_lexer.next_token();
184 if matches!(next_token, Token::Eof) {
185 None
186 } else {
187 Some(next_token)
188 }
189 }
190
191 fn is_identifier_reserved(id: &str) -> bool {
196 let id_upper = id.to_uppercase();
197 matches!(
198 id_upper.as_str(),
199 "ORDER" | "HAVING" | "LIMIT" | "OFFSET" | "UNION" | "INTERSECT" | "EXCEPT"
200 )
201 }
202
203 const COMPARISON_OPERATORS: [&'static str; 6] = [" > ", " < ", " >= ", " <= ", " = ", " != "];
205
206 pub fn consume(&mut self, expected: Token) -> Result<(), String> {
207 self.trace_token(&format!("Consuming expected {:?}", expected));
208 if std::mem::discriminant(&self.current_token) == std::mem::discriminant(&expected) {
209 self.update_paren_depth(&expected)?;
211
212 self.current_token = self.lexer.next_token();
213 Ok(())
214 } else {
215 let error_msg = match (&expected, &self.current_token) {
217 (Token::RightParen, Token::Eof) if self.paren_depth > 0 => {
218 format!(
219 "Unclosed parenthesis - missing {} closing parenthes{}",
220 self.paren_depth,
221 if self.paren_depth == 1 { "is" } else { "es" }
222 )
223 }
224 (Token::RightParen, _) if self.paren_depth > 0 => {
225 format!(
226 "Expected closing parenthesis but found {:?} (currently {} unclosed parenthes{})",
227 self.current_token,
228 self.paren_depth,
229 if self.paren_depth == 1 { "is" } else { "es" }
230 )
231 }
232 _ => format!("Expected {:?}, found {:?}", expected, self.current_token),
233 };
234 Err(error_msg)
235 }
236 }
237
238 pub fn advance(&mut self) {
239 match &self.current_token {
241 Token::LeftParen => self.paren_depth += 1,
242 Token::RightParen => {
243 self.paren_depth -= 1;
244 }
247 _ => {}
248 }
249 let old_token = self.current_token.clone();
250 self.current_token = self.lexer.next_token();
251 if self.debug_trace {
252 let indent = " ".repeat(self.trace_depth);
253 eprintln!(
254 "{} Advanced: {:?} → {:?}",
255 indent, old_token, self.current_token
256 );
257 }
258 }
259
260 fn collect_leading_comments(&mut self) -> Vec<Comment> {
263 let mut comments = Vec::new();
264 loop {
265 match &self.current_token {
266 Token::LineComment(text) => {
267 comments.push(Comment::line(text.clone()));
268 self.advance();
269 }
270 Token::BlockComment(text) => {
271 comments.push(Comment::block(text.clone()));
272 self.advance();
273 }
274 _ => break,
275 }
276 }
277 comments
278 }
279
280 fn collect_trailing_comment(&mut self) -> Option<Comment> {
283 match &self.current_token {
284 Token::LineComment(text) => {
285 let comment = Some(Comment::line(text.clone()));
286 self.advance();
287 comment
288 }
289 Token::BlockComment(text) => {
290 let comment = Some(Comment::block(text.clone()));
291 self.advance();
292 comment
293 }
294 _ => None,
295 }
296 }
297
298 fn push_paren_depth(&mut self) {
299 self.paren_depth_stack.push(self.paren_depth);
300 self.paren_depth = 0;
301 }
302
303 fn pop_paren_depth(&mut self) {
304 if let Some(depth) = self.paren_depth_stack.pop() {
305 self.paren_depth = depth;
307 }
308 }
309
310 pub fn parse(&mut self) -> Result<SelectStatement, String> {
311 self.trace_enter("parse");
312
313 let leading_comments = if self.mode == ParserMode::PreserveComments {
316 self.collect_leading_comments()
317 } else {
318 vec![]
319 };
320
321 let result = if matches!(self.current_token, Token::With) {
323 let mut stmt = self.parse_with_clause()?;
324 stmt.leading_comments = leading_comments;
326 stmt
327 } else {
328 let stmt = self.parse_select_statement_with_comments_public(leading_comments)?;
330 self.check_balanced_parentheses()?;
331 stmt
332 };
333
334 self.trace_exit("parse", &Ok(&result));
335 Ok(result)
336 }
337
338 fn parse_select_statement_with_comments_public(
340 &mut self,
341 comments: Vec<Comment>,
342 ) -> Result<SelectStatement, String> {
343 self.parse_select_statement_with_comments(comments)
344 }
345
346 fn parse_with_clause(&mut self) -> Result<SelectStatement, String> {
347 self.consume(Token::With)?;
348 let ctes = self.parse_cte_list()?;
349
350 let mut main_query = self.parse_select_statement_inner_no_comments()?;
352 main_query.ctes = ctes;
353
354 self.check_balanced_parentheses()?;
356
357 Ok(main_query)
358 }
359
360 fn parse_with_clause_inner(&mut self) -> Result<SelectStatement, String> {
361 self.consume(Token::With)?;
362 let ctes = self.parse_cte_list()?;
363
364 let mut main_query = self.parse_select_statement_inner()?;
366 main_query.ctes = ctes;
367
368 Ok(main_query)
369 }
370
371 fn parse_cte_list(&mut self) -> Result<Vec<CTE>, String> {
373 let mut ctes = Vec::new();
374
375 loop {
377 let is_web = if matches!(&self.current_token, Token::Web) {
379 self.trace_token("Found WEB keyword for CTE");
380 self.advance();
381 true
382 } else {
383 false
384 };
385
386 let name = match &self.current_token {
388 Token::Identifier(name) => name.clone(),
389 _ => {
390 return Err(format!(
391 "Expected CTE name after {}",
392 if is_web { "WEB" } else { "WITH or comma" }
393 ))
394 }
395 };
396 self.advance();
397
398 let column_list = if matches!(self.current_token, Token::LeftParen) {
400 self.advance();
401 let cols = self.parse_identifier_list()?;
402 self.consume(Token::RightParen)?;
403 Some(cols)
404 } else {
405 None
406 };
407
408 self.consume(Token::As)?;
410
411 let cte_type = if is_web {
412 self.consume(Token::LeftParen)?;
414 let web_spec = WebCteParser::parse(self)?;
416 self.consume(Token::RightParen)?;
418 CTEType::Web(web_spec)
419 } else {
420 self.push_paren_depth();
423 self.consume(Token::LeftParen)?;
425 let query = self.parse_select_statement_inner()?;
426 self.consume(Token::RightParen)?;
428 self.pop_paren_depth();
430 CTEType::Standard(query)
431 };
432
433 ctes.push(CTE {
434 name,
435 column_list,
436 cte_type,
437 });
438
439 if !matches!(self.current_token, Token::Comma) {
441 break;
442 }
443 self.advance();
444 }
445
446 Ok(ctes)
447 }
448
449 fn parse_optional_alias(&mut self) -> Result<Option<String>, String> {
451 if matches!(self.current_token, Token::As) {
452 self.advance();
453 match &self.current_token {
454 Token::Identifier(name) => {
455 let alias = name.clone();
456 self.advance();
457 Ok(Some(alias))
458 }
459 token => {
460 if let Some(keyword) = token.as_keyword_str() {
462 Err(format!(
463 "Reserved keyword '{}' cannot be used as column alias. Use a different name or quote it with double quotes: \"{}\"",
464 keyword,
465 keyword.to_lowercase()
466 ))
467 } else {
468 Err("Expected alias name after AS".to_string())
469 }
470 }
471 }
472 } else if let Token::Identifier(name) = &self.current_token {
473 let alias = name.clone();
475 self.advance();
476 Ok(Some(alias))
477 } else {
478 Ok(None)
479 }
480 }
481
482 fn is_valid_identifier(name: &str) -> bool {
484 if name.starts_with('"') && name.ends_with('"') {
485 true
487 } else {
488 name.chars().all(|c| c.is_alphanumeric() || c == '_')
490 }
491 }
492
493 fn update_paren_depth(&mut self, token: &Token) -> Result<(), String> {
495 match token {
496 Token::LeftParen => self.paren_depth += 1,
497 Token::RightParen => {
498 self.paren_depth -= 1;
499 if self.paren_depth < 0 {
501 return Err(
502 "Unexpected closing parenthesis - no matching opening parenthesis"
503 .to_string(),
504 );
505 }
506 }
507 _ => {}
508 }
509 Ok(())
510 }
511
512 fn parse_argument_list(&mut self) -> Result<Vec<SqlExpression>, String> {
514 let mut args = Vec::new();
515
516 if !matches!(self.current_token, Token::RightParen) {
517 loop {
518 args.push(self.parse_expression()?);
519
520 if matches!(self.current_token, Token::Comma) {
521 self.advance();
522 } else {
523 break;
524 }
525 }
526 }
527
528 Ok(args)
529 }
530
531 fn check_balanced_parentheses(&self) -> Result<(), String> {
533 if self.paren_depth > 0 {
534 Err(format!(
535 "Unclosed parenthesis - missing {} closing parenthes{}",
536 self.paren_depth,
537 if self.paren_depth == 1 { "is" } else { "es" }
538 ))
539 } else if self.paren_depth < 0 {
540 Err("Extra closing parenthesis found - no matching opening parenthesis".to_string())
541 } else {
542 Ok(())
543 }
544 }
545
546 fn contains_aggregate_function(expr: &SqlExpression) -> bool {
549 match expr {
550 SqlExpression::FunctionCall { name, args, .. } => {
551 let upper_name = name.to_uppercase();
553 let is_aggregate = matches!(
554 upper_name.as_str(),
555 "COUNT" | "SUM" | "AVG" | "MIN" | "MAX" | "GROUP_CONCAT" | "STRING_AGG"
556 );
557
558 is_aggregate || args.iter().any(Self::contains_aggregate_function)
561 }
562 SqlExpression::BinaryOp { left, right, .. } => {
564 Self::contains_aggregate_function(left) || Self::contains_aggregate_function(right)
565 }
566 SqlExpression::Not { expr } => Self::contains_aggregate_function(expr),
567 SqlExpression::MethodCall { args, .. } => {
568 args.iter().any(Self::contains_aggregate_function)
569 }
570 SqlExpression::ChainedMethodCall { base, args, .. } => {
571 Self::contains_aggregate_function(base)
572 || args.iter().any(Self::contains_aggregate_function)
573 }
574 SqlExpression::CaseExpression {
575 when_branches,
576 else_branch,
577 } => {
578 when_branches.iter().any(|branch| {
579 Self::contains_aggregate_function(&branch.condition)
580 || Self::contains_aggregate_function(&branch.result)
581 }) || else_branch
582 .as_ref()
583 .map_or(false, |e| Self::contains_aggregate_function(e))
584 }
585 SqlExpression::SimpleCaseExpression {
586 expr,
587 when_branches,
588 else_branch,
589 } => {
590 Self::contains_aggregate_function(expr)
591 || when_branches.iter().any(|branch| {
592 Self::contains_aggregate_function(&branch.value)
593 || Self::contains_aggregate_function(&branch.result)
594 })
595 || else_branch
596 .as_ref()
597 .map_or(false, |e| Self::contains_aggregate_function(e))
598 }
599 SqlExpression::ScalarSubquery { query } => {
600 query
603 .having
604 .as_ref()
605 .map_or(false, |h| Self::contains_aggregate_function(h))
606 }
607 SqlExpression::Column(_)
609 | SqlExpression::StringLiteral(_)
610 | SqlExpression::NumberLiteral(_)
611 | SqlExpression::BooleanLiteral(_)
612 | SqlExpression::Null
613 | SqlExpression::DateTimeConstructor { .. }
614 | SqlExpression::DateTimeToday { .. } => false,
615
616 SqlExpression::WindowFunction { .. } => true,
618
619 SqlExpression::Between { expr, lower, upper } => {
621 Self::contains_aggregate_function(expr)
622 || Self::contains_aggregate_function(lower)
623 || Self::contains_aggregate_function(upper)
624 }
625
626 SqlExpression::InList { expr, values } | SqlExpression::NotInList { expr, values } => {
628 Self::contains_aggregate_function(expr)
629 || values.iter().any(Self::contains_aggregate_function)
630 }
631
632 SqlExpression::InSubquery { expr, subquery }
634 | SqlExpression::NotInSubquery { expr, subquery } => {
635 Self::contains_aggregate_function(expr)
636 || subquery
637 .having
638 .as_ref()
639 .map_or(false, |h| Self::contains_aggregate_function(h))
640 }
641
642 SqlExpression::Unnest { column, .. } => Self::contains_aggregate_function(column),
644 }
645 }
646
647 fn parse_select_statement(&mut self) -> Result<SelectStatement, String> {
648 self.trace_enter("parse_select_statement");
649 let result = self.parse_select_statement_inner()?;
650
651 self.check_balanced_parentheses()?;
653
654 Ok(result)
655 }
656
657 fn parse_select_statement_inner(&mut self) -> Result<SelectStatement, String> {
658 let leading_comments = if self.mode == ParserMode::PreserveComments {
660 self.collect_leading_comments()
661 } else {
662 vec![]
663 };
664
665 self.parse_select_statement_with_comments(leading_comments)
666 }
667
668 fn parse_select_statement_inner_no_comments(&mut self) -> Result<SelectStatement, String> {
671 self.parse_select_statement_with_comments(vec![])
672 }
673
674 fn parse_select_statement_with_comments(
676 &mut self,
677 leading_comments: Vec<Comment>,
678 ) -> Result<SelectStatement, String> {
679 self.consume(Token::Select)?;
680
681 let distinct = if matches!(self.current_token, Token::Distinct) {
683 self.advance();
684 true
685 } else {
686 false
687 };
688
689 let select_items = self.parse_select_items()?;
691
692 let columns = select_items
694 .iter()
695 .map(|item| match item {
696 SelectItem::Star { .. } => "*".to_string(),
697 SelectItem::Column {
698 column: col_ref, ..
699 } => col_ref.name.clone(),
700 SelectItem::Expression { alias, .. } => alias.clone(),
701 })
702 .collect();
703
704 let into_table = if matches!(self.current_token, Token::Into) {
706 self.advance();
707 Some(self.parse_into_clause()?)
708 } else {
709 None
710 };
711
712 let (from_table, from_subquery, from_function, from_alias) = if matches!(
714 self.current_token,
715 Token::From
716 ) {
717 self.advance();
718
719 if let Token::Identifier(name) = &self.current_token.clone() {
721 let has_paren = self.peek_token() == Some(Token::LeftParen);
725 if self.debug_trace {
726 eprintln!(
727 " Checking {} for table function, has_paren={}",
728 name, has_paren
729 );
730 }
731
732 let is_table_function = if has_paren {
735 if self.debug_trace {
737 eprintln!(" Checking generator registry for {}", name.to_uppercase());
738 }
739 if let Some(_gen) = self.generator_registry.get(&name.to_uppercase()) {
740 if self.debug_trace {
741 eprintln!(" Found {} in generator registry", name);
742 }
743 self.trace_token(&format!("Found generator: {}", name));
744 true
745 } else {
746 if let Some(func) = self.function_registry.get(&name.to_uppercase()) {
748 let sig = func.signature();
749 let is_table_fn = sig.category == FunctionCategory::TableFunction;
750 if self.debug_trace {
751 eprintln!(
752 " Found {} in function registry, is_table_function={}",
753 name, is_table_fn
754 );
755 }
756 if is_table_fn {
757 self.trace_token(&format!(
758 "Found table function in function registry: {}",
759 name
760 ));
761 }
762 is_table_fn
763 } else {
764 if self.debug_trace {
765 eprintln!(" {} not found in either registry", name);
766 self.trace_token(&format!(
767 "Not found as generator or table function: {}",
768 name
769 ));
770 }
771 false
772 }
773 }
774 } else {
775 if self.debug_trace {
776 eprintln!(" No parenthesis after {}, treating as table", name);
777 }
778 false
779 };
780
781 if is_table_function {
782 let function_name = name.clone();
784 self.advance(); self.consume(Token::LeftParen)?;
788 let args = self.parse_argument_list()?;
789 self.consume(Token::RightParen)?;
790
791 let alias = if matches!(self.current_token, Token::As) {
793 self.advance();
794 match &self.current_token {
795 Token::Identifier(name) => {
796 let alias = name.clone();
797 self.advance();
798 Some(alias)
799 }
800 token => {
801 if let Some(keyword) = token.as_keyword_str() {
802 return Err(format!(
803 "Reserved keyword '{}' cannot be used as column alias. Use a different name or quote it with double quotes: \"{}\"",
804 keyword,
805 keyword.to_lowercase()
806 ));
807 } else {
808 return Err("Expected alias name after AS".to_string());
809 }
810 }
811 }
812 } else if let Token::Identifier(name) = &self.current_token {
813 let alias = name.clone();
814 self.advance();
815 Some(alias)
816 } else {
817 None
818 };
819
820 (
821 None,
822 None,
823 Some(TableFunction::Generator {
824 name: function_name,
825 args,
826 }),
827 alias,
828 )
829 } else {
830 let table_name = name.clone();
832 self.advance();
833
834 let alias = self.parse_optional_alias()?;
836
837 (Some(table_name), None, None, alias)
838 }
839 } else if matches!(self.current_token, Token::LeftParen) {
840 self.advance();
842
843 let subquery = if matches!(self.current_token, Token::With) {
845 self.parse_with_clause_inner()?
846 } else {
847 self.parse_select_statement_inner()?
848 };
849
850 self.consume(Token::RightParen)?;
851
852 let alias = if matches!(self.current_token, Token::As) {
854 self.advance();
855 match &self.current_token {
856 Token::Identifier(name) => {
857 let alias = name.clone();
858 self.advance();
859 alias
860 }
861 token => {
862 if let Some(keyword) = token.as_keyword_str() {
863 return Err(format!(
864 "Reserved keyword '{}' cannot be used as subquery alias. Use a different name or quote it with double quotes: \"{}\"",
865 keyword,
866 keyword.to_lowercase()
867 ));
868 } else {
869 return Err("Expected alias name after AS".to_string());
870 }
871 }
872 }
873 } else {
874 match &self.current_token {
876 Token::Identifier(name) => {
877 let alias = name.clone();
878 self.advance();
879 alias
880 }
881 _ => {
882 return Err(
883 "Subquery in FROM must have an alias (e.g., AS t)".to_string()
884 )
885 }
886 }
887 };
888
889 (None, Some(Box::new(subquery)), None, Some(alias))
890 } else {
891 match &self.current_token {
893 Token::Identifier(table) => {
894 let table_name = table.clone();
895 self.advance();
896
897 let alias = self.parse_optional_alias()?;
899
900 (Some(table_name), None, None, alias)
901 }
902 Token::QuotedIdentifier(table) => {
903 let table_name = table.clone();
905 self.advance();
906
907 let alias = self.parse_optional_alias()?;
909
910 (Some(table_name), None, None, alias)
911 }
912 _ => return Err("Expected table name or subquery after FROM".to_string()),
913 }
914 }
915 } else {
916 (None, None, None, None)
917 };
918
919 let mut joins = Vec::new();
921 while self.is_join_token() {
922 joins.push(self.parse_join_clause()?);
923 }
924
925 let where_clause = if matches!(self.current_token, Token::Where) {
926 self.advance();
927 Some(self.parse_where_clause()?)
928 } else {
929 None
930 };
931
932 let group_by = if matches!(self.current_token, Token::GroupBy) {
933 self.advance();
934 Some(self.parse_expression_list()?)
937 } else {
938 None
939 };
940
941 let having = if matches!(self.current_token, Token::Having) {
943 if group_by.is_none() {
944 return Err("HAVING clause requires GROUP BY".to_string());
945 }
946 self.advance();
947 let having_expr = self.parse_expression()?;
948
949 Some(having_expr)
954 } else {
955 None
956 };
957
958 let qualify = if matches!(self.current_token, Token::Qualify) {
962 self.advance();
963 let qualify_expr = self.parse_expression()?;
964
965 Some(qualify_expr)
969 } else {
970 None
971 };
972
973 let order_by = if matches!(self.current_token, Token::OrderBy) {
975 self.trace_token("Found OrderBy token");
976 self.advance();
977 Some(self.parse_order_by_list()?)
978 } else if let Token::Identifier(s) = &self.current_token {
979 if Self::is_identifier_reserved(s) && s.to_uppercase() == "ORDER" {
982 self.trace_token("Warning: ORDER as identifier instead of OrderBy token");
983 self.advance(); if matches!(&self.current_token, Token::By) {
985 self.advance(); Some(self.parse_order_by_list()?)
987 } else {
988 return Err("Expected BY after ORDER".to_string());
989 }
990 } else {
991 None
992 }
993 } else {
994 None
995 };
996
997 let limit = if matches!(self.current_token, Token::Limit) {
999 self.advance();
1000 match &self.current_token {
1001 Token::NumberLiteral(num) => {
1002 let limit_val = num
1003 .parse::<usize>()
1004 .map_err(|_| format!("Invalid LIMIT value: {num}"))?;
1005 self.advance();
1006 Some(limit_val)
1007 }
1008 _ => return Err("Expected number after LIMIT".to_string()),
1009 }
1010 } else {
1011 None
1012 };
1013
1014 let offset = if matches!(self.current_token, Token::Offset) {
1016 self.advance();
1017 match &self.current_token {
1018 Token::NumberLiteral(num) => {
1019 let offset_val = num
1020 .parse::<usize>()
1021 .map_err(|_| format!("Invalid OFFSET value: {num}"))?;
1022 self.advance();
1023 Some(offset_val)
1024 }
1025 _ => return Err("Expected number after OFFSET".to_string()),
1026 }
1027 } else {
1028 None
1029 };
1030
1031 let into_table = if into_table.is_none() && matches!(self.current_token, Token::Into) {
1035 self.advance();
1036 Some(self.parse_into_clause()?)
1037 } else {
1038 into_table };
1040
1041 let set_operations = self.parse_set_operations()?;
1043
1044 let trailing_comment = if self.mode == ParserMode::PreserveComments {
1046 self.collect_trailing_comment()
1047 } else {
1048 None
1049 };
1050
1051 Ok(SelectStatement {
1052 distinct,
1053 columns,
1054 select_items,
1055 from_table,
1056 from_subquery,
1057 from_function,
1058 from_alias,
1059 joins,
1060 where_clause,
1061 order_by,
1062 group_by,
1063 having,
1064 qualify,
1065 limit,
1066 offset,
1067 ctes: Vec::new(), into_table,
1069 set_operations,
1070 leading_comments,
1071 trailing_comment,
1072 })
1073 }
1074
1075 fn parse_set_operations(
1078 &mut self,
1079 ) -> Result<Vec<(SetOperation, Box<SelectStatement>)>, String> {
1080 let mut operations = Vec::new();
1081
1082 while matches!(
1083 self.current_token,
1084 Token::Union | Token::Intersect | Token::Except
1085 ) {
1086 let operation = match &self.current_token {
1088 Token::Union => {
1089 self.advance();
1090 if let Token::Identifier(id) = &self.current_token {
1092 if id.to_uppercase() == "ALL" {
1093 self.advance();
1094 SetOperation::UnionAll
1095 } else {
1096 SetOperation::Union
1097 }
1098 } else {
1099 SetOperation::Union
1100 }
1101 }
1102 Token::Intersect => {
1103 self.advance();
1104 SetOperation::Intersect
1105 }
1106 Token::Except => {
1107 self.advance();
1108 SetOperation::Except
1109 }
1110 _ => unreachable!(),
1111 };
1112
1113 let next_select = self.parse_select_statement_inner()?;
1115
1116 operations.push((operation, Box::new(next_select)));
1117 }
1118
1119 Ok(operations)
1120 }
1121
1122 fn parse_select_items(&mut self) -> Result<Vec<SelectItem>, String> {
1124 let mut items = Vec::new();
1125
1126 loop {
1127 if let Token::Identifier(name) = &self.current_token.clone() {
1130 let saved_pos = self.lexer.clone();
1132 let saved_token = self.current_token.clone();
1133 let table_name = name.clone();
1134
1135 self.advance();
1136
1137 if matches!(self.current_token, Token::Dot) {
1138 self.advance();
1139 if matches!(self.current_token, Token::Star) {
1140 items.push(SelectItem::Star {
1142 table_prefix: Some(table_name),
1143 leading_comments: vec![],
1144 trailing_comment: None,
1145 });
1146 self.advance();
1147
1148 if matches!(self.current_token, Token::Comma) {
1150 self.advance();
1151 continue;
1152 } else {
1153 break;
1154 }
1155 }
1156 }
1157
1158 self.lexer = saved_pos;
1160 self.current_token = saved_token;
1161 }
1162
1163 if matches!(self.current_token, Token::Star) {
1165 items.push(SelectItem::Star {
1166 table_prefix: None,
1167 leading_comments: vec![],
1168 trailing_comment: None,
1169 });
1170 self.advance();
1171 } else {
1172 let expr = self.parse_comparison()?; let alias = if matches!(self.current_token, Token::As) {
1177 self.advance();
1178 match &self.current_token {
1179 Token::Identifier(alias_name) => {
1180 let alias = alias_name.clone();
1181 self.advance();
1182 alias
1183 }
1184 Token::QuotedIdentifier(alias_name) => {
1185 let alias = alias_name.clone();
1186 self.advance();
1187 alias
1188 }
1189 token => {
1190 if let Some(keyword) = token.as_keyword_str() {
1191 return Err(format!(
1192 "Reserved keyword '{}' cannot be used as column alias. Use a different name or quote it with double quotes: \"{}\"",
1193 keyword,
1194 keyword.to_lowercase()
1195 ));
1196 } else {
1197 return Err("Expected alias name after AS".to_string());
1198 }
1199 }
1200 }
1201 } else {
1202 match &expr {
1204 SqlExpression::Column(col_ref) => col_ref.name.clone(),
1205 _ => format!("expr_{}", items.len() + 1), }
1207 };
1208
1209 let item = match expr {
1211 SqlExpression::Column(col_ref) if alias == col_ref.name => {
1212 SelectItem::Column {
1214 column: col_ref,
1215 leading_comments: vec![],
1216 trailing_comment: None,
1217 }
1218 }
1219 _ => {
1220 SelectItem::Expression {
1222 expr,
1223 alias,
1224 leading_comments: vec![],
1225 trailing_comment: None,
1226 }
1227 }
1228 };
1229
1230 items.push(item);
1231 }
1232
1233 if matches!(self.current_token, Token::Comma) {
1235 self.advance();
1236 } else {
1237 break;
1238 }
1239 }
1240
1241 Ok(items)
1242 }
1243
1244 fn parse_identifier_list(&mut self) -> Result<Vec<String>, String> {
1245 let mut identifiers = Vec::new();
1246
1247 loop {
1248 match &self.current_token {
1249 Token::Identifier(id) => {
1250 if Self::is_identifier_reserved(id) {
1252 break;
1254 }
1255 identifiers.push(id.clone());
1256 self.advance();
1257 }
1258 Token::QuotedIdentifier(id) => {
1259 identifiers.push(id.clone());
1261 self.advance();
1262 }
1263 _ => {
1264 break;
1266 }
1267 }
1268
1269 if matches!(self.current_token, Token::Comma) {
1270 self.advance();
1271 } else {
1272 break;
1273 }
1274 }
1275
1276 if identifiers.is_empty() {
1277 return Err("Expected at least one identifier".to_string());
1278 }
1279
1280 Ok(identifiers)
1281 }
1282
1283 fn parse_window_spec(&mut self) -> Result<WindowSpec, String> {
1284 let mut partition_by = Vec::new();
1285 let mut order_by = Vec::new();
1286
1287 if matches!(self.current_token, Token::Partition) {
1289 self.advance(); if !matches!(self.current_token, Token::By) {
1291 return Err("Expected BY after PARTITION".to_string());
1292 }
1293 self.advance(); partition_by = self.parse_identifier_list()?;
1297 }
1298
1299 if matches!(self.current_token, Token::OrderBy) {
1301 self.advance(); order_by = self.parse_order_by_list()?;
1303 } else if let Token::Identifier(s) = &self.current_token {
1304 if Self::is_identifier_reserved(s) && s.to_uppercase() == "ORDER" {
1305 self.advance(); if !matches!(self.current_token, Token::By) {
1308 return Err("Expected BY after ORDER".to_string());
1309 }
1310 self.advance(); order_by = self.parse_order_by_list()?;
1312 }
1313 }
1314
1315 let frame = self.parse_window_frame()?;
1317
1318 Ok(WindowSpec {
1319 partition_by,
1320 order_by,
1321 frame,
1322 })
1323 }
1324
1325 fn parse_order_by_list(&mut self) -> Result<Vec<OrderByItem>, String> {
1326 let mut order_items = Vec::new();
1327
1328 loop {
1329 let expr = self.parse_expression()?;
1337
1338 let direction = match &self.current_token {
1340 Token::Asc => {
1341 self.advance();
1342 SortDirection::Asc
1343 }
1344 Token::Desc => {
1345 self.advance();
1346 SortDirection::Desc
1347 }
1348 _ => SortDirection::Asc, };
1350
1351 order_items.push(OrderByItem { expr, direction });
1352
1353 if matches!(self.current_token, Token::Comma) {
1354 self.advance();
1355 } else {
1356 break;
1357 }
1358 }
1359
1360 Ok(order_items)
1361 }
1362
1363 fn parse_into_clause(&mut self) -> Result<IntoTable, String> {
1366 let name = match &self.current_token {
1368 Token::Identifier(id) if id.starts_with('#') => {
1369 let table_name = id.clone();
1370 self.advance();
1371 table_name
1372 }
1373 Token::Identifier(id) => {
1374 return Err(format!(
1375 "Temporary table name must start with #, got: {}",
1376 id
1377 ));
1378 }
1379 _ => {
1380 return Err(
1381 "Expected temporary table name (starting with #) after INTO".to_string()
1382 );
1383 }
1384 };
1385
1386 Ok(IntoTable { name })
1387 }
1388
1389 fn parse_window_frame(&mut self) -> Result<Option<WindowFrame>, String> {
1390 let unit = match &self.current_token {
1392 Token::Rows => {
1393 self.advance();
1394 FrameUnit::Rows
1395 }
1396 Token::Identifier(id) if id.to_uppercase() == "RANGE" => {
1397 self.advance();
1399 FrameUnit::Range
1400 }
1401 _ => return Ok(None), };
1403
1404 let (start, end) = if let Token::Between = &self.current_token {
1406 self.advance(); let start = self.parse_frame_bound()?;
1409
1410 if !matches!(&self.current_token, Token::And) {
1412 return Err("Expected AND after window frame start bound".to_string());
1413 }
1414 self.advance();
1415
1416 let end = self.parse_frame_bound()?;
1418 (start, Some(end))
1419 } else {
1420 let bound = self.parse_frame_bound()?;
1422 (bound, None)
1423 };
1424
1425 Ok(Some(WindowFrame { unit, start, end }))
1426 }
1427
1428 fn parse_frame_bound(&mut self) -> Result<FrameBound, String> {
1429 match &self.current_token {
1430 Token::Unbounded => {
1431 self.advance();
1432 match &self.current_token {
1433 Token::Preceding => {
1434 self.advance();
1435 Ok(FrameBound::UnboundedPreceding)
1436 }
1437 Token::Following => {
1438 self.advance();
1439 Ok(FrameBound::UnboundedFollowing)
1440 }
1441 _ => Err("Expected PRECEDING or FOLLOWING after UNBOUNDED".to_string()),
1442 }
1443 }
1444 Token::Current => {
1445 self.advance();
1446 if matches!(&self.current_token, Token::Row) {
1447 self.advance();
1448 return Ok(FrameBound::CurrentRow);
1449 }
1450 Err("Expected ROW after CURRENT".to_string())
1451 }
1452 Token::NumberLiteral(num) => {
1453 let n: i64 = num
1454 .parse()
1455 .map_err(|_| "Invalid number in window frame".to_string())?;
1456 self.advance();
1457 match &self.current_token {
1458 Token::Preceding => {
1459 self.advance();
1460 Ok(FrameBound::Preceding(n))
1461 }
1462 Token::Following => {
1463 self.advance();
1464 Ok(FrameBound::Following(n))
1465 }
1466 _ => Err("Expected PRECEDING or FOLLOWING after number".to_string()),
1467 }
1468 }
1469 _ => Err("Invalid window frame bound".to_string()),
1470 }
1471 }
1472
1473 fn parse_where_clause(&mut self) -> Result<WhereClause, String> {
1474 let expr = self.parse_expression()?;
1477
1478 if matches!(self.current_token, Token::RightParen) && self.paren_depth <= 0 {
1480 return Err(
1481 "Unexpected closing parenthesis - no matching opening parenthesis".to_string(),
1482 );
1483 }
1484
1485 let conditions = vec![Condition {
1487 expr,
1488 connector: None,
1489 }];
1490
1491 Ok(WhereClause { conditions })
1492 }
1493
1494 fn parse_expression(&mut self) -> Result<SqlExpression, String> {
1495 self.trace_enter("parse_expression");
1496 let mut left = self.parse_logical_or()?;
1499
1500 left = parse_in_operator(self, left)?;
1503
1504 let result = Ok(left);
1505 self.trace_exit("parse_expression", &result);
1506 result
1507 }
1508
1509 fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
1510 parse_comparison_expr(self)
1512 }
1513
1514 fn parse_additive(&mut self) -> Result<SqlExpression, String> {
1515 parse_additive_expr(self)
1517 }
1518
1519 fn parse_multiplicative(&mut self) -> Result<SqlExpression, String> {
1520 parse_multiplicative_expr(self)
1522 }
1523
1524 fn parse_logical_or(&mut self) -> Result<SqlExpression, String> {
1525 parse_logical_or_expr(self)
1527 }
1528
1529 fn parse_logical_and(&mut self) -> Result<SqlExpression, String> {
1530 parse_logical_and_expr(self)
1532 }
1533
1534 fn parse_case_expression(&mut self) -> Result<SqlExpression, String> {
1535 parse_case_expr(self)
1537 }
1538
1539 fn parse_primary(&mut self) -> Result<SqlExpression, String> {
1540 let columns = self.columns.clone();
1543 let in_method_args = self.in_method_args;
1544 let ctx = PrimaryExpressionContext {
1545 columns: &columns,
1546 in_method_args,
1547 };
1548 parse_primary_expr(self, &ctx)
1549 }
1550
1551 fn parse_method_args(&mut self) -> Result<Vec<SqlExpression>, String> {
1553 self.in_method_args = true;
1555
1556 let args = self.parse_argument_list()?;
1557
1558 self.in_method_args = false;
1560
1561 Ok(args)
1562 }
1563
1564 fn parse_function_args(&mut self) -> Result<(Vec<SqlExpression>, bool), String> {
1565 let mut args = Vec::new();
1566 let mut has_distinct = false;
1567
1568 if !matches!(self.current_token, Token::RightParen) {
1569 if matches!(self.current_token, Token::Distinct) {
1571 self.advance(); has_distinct = true;
1573 }
1574
1575 args.push(self.parse_additive()?);
1577
1578 while matches!(self.current_token, Token::Comma) {
1580 self.advance();
1581 args.push(self.parse_additive()?);
1582 }
1583 }
1584
1585 Ok((args, has_distinct))
1586 }
1587
1588 fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
1589 let mut expressions = Vec::new();
1590
1591 loop {
1592 expressions.push(self.parse_expression()?);
1593
1594 if matches!(self.current_token, Token::Comma) {
1595 self.advance();
1596 } else {
1597 break;
1598 }
1599 }
1600
1601 Ok(expressions)
1602 }
1603
1604 #[must_use]
1605 pub fn get_position(&self) -> usize {
1606 self.lexer.get_position()
1607 }
1608
1609 fn is_join_token(&self) -> bool {
1611 matches!(
1612 self.current_token,
1613 Token::Join | Token::Inner | Token::Left | Token::Right | Token::Full | Token::Cross
1614 )
1615 }
1616
1617 fn parse_join_clause(&mut self) -> Result<JoinClause, String> {
1619 let join_type = match &self.current_token {
1621 Token::Join => {
1622 self.advance();
1623 JoinType::Inner }
1625 Token::Inner => {
1626 self.advance();
1627 if !matches!(self.current_token, Token::Join) {
1628 return Err("Expected JOIN after INNER".to_string());
1629 }
1630 self.advance();
1631 JoinType::Inner
1632 }
1633 Token::Left => {
1634 self.advance();
1635 if matches!(self.current_token, Token::Outer) {
1637 self.advance();
1638 }
1639 if !matches!(self.current_token, Token::Join) {
1640 return Err("Expected JOIN after LEFT".to_string());
1641 }
1642 self.advance();
1643 JoinType::Left
1644 }
1645 Token::Right => {
1646 self.advance();
1647 if matches!(self.current_token, Token::Outer) {
1649 self.advance();
1650 }
1651 if !matches!(self.current_token, Token::Join) {
1652 return Err("Expected JOIN after RIGHT".to_string());
1653 }
1654 self.advance();
1655 JoinType::Right
1656 }
1657 Token::Full => {
1658 self.advance();
1659 if matches!(self.current_token, Token::Outer) {
1661 self.advance();
1662 }
1663 if !matches!(self.current_token, Token::Join) {
1664 return Err("Expected JOIN after FULL".to_string());
1665 }
1666 self.advance();
1667 JoinType::Full
1668 }
1669 Token::Cross => {
1670 self.advance();
1671 if !matches!(self.current_token, Token::Join) {
1672 return Err("Expected JOIN after CROSS".to_string());
1673 }
1674 self.advance();
1675 JoinType::Cross
1676 }
1677 _ => return Err("Expected JOIN keyword".to_string()),
1678 };
1679
1680 let (table, alias) = self.parse_join_table_source()?;
1682
1683 let condition = if join_type == JoinType::Cross {
1685 JoinCondition { conditions: vec![] }
1687 } else {
1688 if !matches!(self.current_token, Token::On) {
1689 return Err("Expected ON keyword after JOIN table".to_string());
1690 }
1691 self.advance();
1692 self.parse_join_condition()?
1693 };
1694
1695 Ok(JoinClause {
1696 join_type,
1697 table,
1698 alias,
1699 condition,
1700 })
1701 }
1702
1703 fn parse_join_table_source(&mut self) -> Result<(TableSource, Option<String>), String> {
1704 let table = match &self.current_token {
1705 Token::Identifier(name) => {
1706 let table_name = name.clone();
1707 self.advance();
1708 TableSource::Table(table_name)
1709 }
1710 Token::LeftParen => {
1711 self.advance();
1713 let subquery = self.parse_select_statement_inner()?;
1714 if !matches!(self.current_token, Token::RightParen) {
1715 return Err("Expected ')' after subquery".to_string());
1716 }
1717 self.advance();
1718
1719 let alias = match &self.current_token {
1721 Token::Identifier(alias_name) => {
1722 let alias = alias_name.clone();
1723 self.advance();
1724 alias
1725 }
1726 Token::As => {
1727 self.advance();
1728 match &self.current_token {
1729 Token::Identifier(alias_name) => {
1730 let alias = alias_name.clone();
1731 self.advance();
1732 alias
1733 }
1734 _ => return Err("Expected alias after AS keyword".to_string()),
1735 }
1736 }
1737 _ => return Err("Subqueries must have an alias".to_string()),
1738 };
1739
1740 return Ok((
1741 TableSource::DerivedTable {
1742 query: Box::new(subquery),
1743 alias: alias.clone(),
1744 },
1745 Some(alias),
1746 ));
1747 }
1748 _ => return Err("Expected table name or subquery in JOIN clause".to_string()),
1749 };
1750
1751 let alias = match &self.current_token {
1753 Token::Identifier(alias_name) => {
1754 let alias = alias_name.clone();
1755 self.advance();
1756 Some(alias)
1757 }
1758 Token::As => {
1759 self.advance();
1760 match &self.current_token {
1761 Token::Identifier(alias_name) => {
1762 let alias = alias_name.clone();
1763 self.advance();
1764 Some(alias)
1765 }
1766 _ => return Err("Expected alias after AS keyword".to_string()),
1767 }
1768 }
1769 _ => None,
1770 };
1771
1772 Ok((table, alias))
1773 }
1774
1775 fn parse_join_condition(&mut self) -> Result<JoinCondition, String> {
1776 let mut conditions = Vec::new();
1777
1778 conditions.push(self.parse_single_join_condition()?);
1780
1781 while matches!(self.current_token, Token::And) {
1783 self.advance(); conditions.push(self.parse_single_join_condition()?);
1785 }
1786
1787 Ok(JoinCondition { conditions })
1788 }
1789
1790 fn parse_single_join_condition(&mut self) -> Result<SingleJoinCondition, String> {
1791 let left_expr = self.parse_additive()?;
1794
1795 let operator = match &self.current_token {
1797 Token::Equal => JoinOperator::Equal,
1798 Token::NotEqual => JoinOperator::NotEqual,
1799 Token::LessThan => JoinOperator::LessThan,
1800 Token::LessThanOrEqual => JoinOperator::LessThanOrEqual,
1801 Token::GreaterThan => JoinOperator::GreaterThan,
1802 Token::GreaterThanOrEqual => JoinOperator::GreaterThanOrEqual,
1803 _ => return Err("Expected comparison operator in JOIN condition".to_string()),
1804 };
1805 self.advance();
1806
1807 let right_expr = self.parse_additive()?;
1809
1810 Ok(SingleJoinCondition {
1811 left_expr,
1812 operator,
1813 right_expr,
1814 })
1815 }
1816
1817 fn parse_column_reference(&mut self) -> Result<String, String> {
1818 match &self.current_token {
1819 Token::Identifier(name) => {
1820 let mut column_ref = name.clone();
1821 self.advance();
1822
1823 if matches!(self.current_token, Token::Dot) {
1825 self.advance();
1826 match &self.current_token {
1827 Token::Identifier(col_name) => {
1828 column_ref.push('.');
1829 column_ref.push_str(col_name);
1830 self.advance();
1831 }
1832 _ => return Err("Expected column name after '.'".to_string()),
1833 }
1834 }
1835
1836 Ok(column_ref)
1837 }
1838 _ => Err("Expected column reference".to_string()),
1839 }
1840 }
1841}
1842
1843#[derive(Debug, Clone)]
1845pub enum CursorContext {
1846 SelectClause,
1847 FromClause,
1848 WhereClause,
1849 OrderByClause,
1850 AfterColumn(String),
1851 AfterLogicalOp(LogicalOp),
1852 AfterComparisonOp(String, String), InMethodCall(String, String), InExpression,
1855 Unknown,
1856}
1857
1858fn safe_slice_to(s: &str, pos: usize) -> &str {
1860 if pos >= s.len() {
1861 return s;
1862 }
1863
1864 let mut safe_pos = pos;
1866 while safe_pos > 0 && !s.is_char_boundary(safe_pos) {
1867 safe_pos -= 1;
1868 }
1869
1870 &s[..safe_pos]
1871}
1872
1873fn safe_slice_from(s: &str, pos: usize) -> &str {
1875 if pos >= s.len() {
1876 return "";
1877 }
1878
1879 let mut safe_pos = pos;
1881 while safe_pos < s.len() && !s.is_char_boundary(safe_pos) {
1882 safe_pos += 1;
1883 }
1884
1885 &s[safe_pos..]
1886}
1887
1888#[must_use]
1889pub fn detect_cursor_context(query: &str, cursor_pos: usize) -> (CursorContext, Option<String>) {
1890 let truncated = safe_slice_to(query, cursor_pos);
1891 let mut parser = Parser::new(truncated);
1892
1893 if let Ok(stmt) = parser.parse() {
1895 let (ctx, partial) = analyze_statement(&stmt, truncated, cursor_pos);
1896 #[cfg(test)]
1897 println!("analyze_statement returned: {ctx:?}, {partial:?} for query: '{truncated}'");
1898 (ctx, partial)
1899 } else {
1900 let (ctx, partial) = analyze_partial(truncated, cursor_pos);
1902 #[cfg(test)]
1903 println!("analyze_partial returned: {ctx:?}, {partial:?} for query: '{truncated}'");
1904 (ctx, partial)
1905 }
1906}
1907
1908#[must_use]
1909pub fn tokenize_query(query: &str) -> Vec<String> {
1910 let mut lexer = Lexer::new(query);
1911 let tokens = lexer.tokenize_all();
1912 tokens.iter().map(|t| format!("{t:?}")).collect()
1913}
1914
1915#[must_use]
1916fn find_quote_start(bytes: &[u8], mut pos: usize) -> Option<usize> {
1918 if pos > 0 {
1920 pos -= 1;
1921 while pos > 0 {
1922 if bytes[pos] == b'"' {
1923 if pos == 0 || bytes[pos - 1] != b'\\' {
1925 return Some(pos);
1926 }
1927 }
1928 pos -= 1;
1929 }
1930 if bytes[0] == b'"' {
1932 return Some(0);
1933 }
1934 }
1935 None
1936}
1937
1938fn handle_method_call_context(col_name: &str, after_dot: &str) -> (CursorContext, Option<String>) {
1940 let partial_method = if after_dot.is_empty() {
1942 None
1943 } else if after_dot.chars().all(|c| c.is_alphanumeric() || c == '_') {
1944 Some(after_dot.to_string())
1945 } else {
1946 None
1947 };
1948
1949 let col_name_for_context =
1951 if col_name.starts_with('"') && col_name.ends_with('"') && col_name.len() > 2 {
1952 col_name[1..col_name.len() - 1].to_string()
1953 } else {
1954 col_name.to_string()
1955 };
1956
1957 (
1958 CursorContext::AfterColumn(col_name_for_context),
1959 partial_method,
1960 )
1961}
1962
1963fn check_after_comparison_operator(query: &str) -> Option<(CursorContext, Option<String>)> {
1965 for op in &Parser::COMPARISON_OPERATORS {
1966 if let Some(op_pos) = query.rfind(op) {
1967 let before_op = safe_slice_to(query, op_pos);
1968 let after_op_start = op_pos + op.len();
1969 let after_op = if after_op_start < query.len() {
1970 &query[after_op_start..]
1971 } else {
1972 ""
1973 };
1974
1975 if let Some(col_name) = before_op.split_whitespace().last() {
1977 if col_name.chars().all(|c| c.is_alphanumeric() || c == '_') {
1978 let after_op_trimmed = after_op.trim();
1980 if after_op_trimmed.is_empty()
1981 || (after_op_trimmed
1982 .chars()
1983 .all(|c| c.is_alphanumeric() || c == '_')
1984 && !after_op_trimmed.contains('('))
1985 {
1986 let partial = if after_op_trimmed.is_empty() {
1987 None
1988 } else {
1989 Some(after_op_trimmed.to_string())
1990 };
1991 return Some((
1992 CursorContext::AfterComparisonOp(
1993 col_name.to_string(),
1994 op.trim().to_string(),
1995 ),
1996 partial,
1997 ));
1998 }
1999 }
2000 }
2001 }
2002 }
2003 None
2004}
2005
2006fn analyze_statement(
2007 stmt: &SelectStatement,
2008 query: &str,
2009 _cursor_pos: usize,
2010) -> (CursorContext, Option<String>) {
2011 let trimmed = query.trim();
2013
2014 if let Some(result) = check_after_comparison_operator(query) {
2016 return result;
2017 }
2018
2019 let ends_with_logical_op = |s: &str| -> bool {
2022 let s_upper = s.to_uppercase();
2023 s_upper.ends_with(" AND") || s_upper.ends_with(" OR")
2024 };
2025
2026 if ends_with_logical_op(trimmed) {
2027 } else {
2029 if let Some(dot_pos) = trimmed.rfind('.') {
2031 let before_dot = safe_slice_to(trimmed, dot_pos);
2033 let after_dot_start = dot_pos + 1;
2034 let after_dot = if after_dot_start < trimmed.len() {
2035 &trimmed[after_dot_start..]
2036 } else {
2037 ""
2038 };
2039
2040 if !after_dot.contains('(') {
2043 let col_name = if before_dot.ends_with('"') {
2045 let bytes = before_dot.as_bytes();
2047 let pos = before_dot.len() - 1; find_quote_start(bytes, pos).map(|start| safe_slice_from(before_dot, start))
2050 } else {
2051 before_dot
2054 .split_whitespace()
2055 .last()
2056 .map(|word| word.trim_start_matches('('))
2057 };
2058
2059 if let Some(col_name) = col_name {
2060 let is_valid = Parser::is_valid_identifier(col_name);
2062
2063 if is_valid {
2064 return handle_method_call_context(col_name, after_dot);
2065 }
2066 }
2067 }
2068 }
2069 }
2070
2071 if let Some(where_clause) = &stmt.where_clause {
2073 let trimmed_upper = trimmed.to_uppercase();
2075 if trimmed_upper.ends_with(" AND") || trimmed_upper.ends_with(" OR") {
2076 let op = if trimmed_upper.ends_with(" AND") {
2077 LogicalOp::And
2078 } else {
2079 LogicalOp::Or
2080 };
2081 return (CursorContext::AfterLogicalOp(op), None);
2082 }
2083
2084 let query_upper = query.to_uppercase();
2086 if let Some(and_pos) = query_upper.rfind(" AND ") {
2087 let after_and = safe_slice_from(query, and_pos + 5);
2088 let partial = extract_partial_at_end(after_and);
2089 if partial.is_some() {
2090 return (CursorContext::AfterLogicalOp(LogicalOp::And), partial);
2091 }
2092 }
2093
2094 if let Some(or_pos) = query_upper.rfind(" OR ") {
2095 let after_or = safe_slice_from(query, or_pos + 4);
2096 let partial = extract_partial_at_end(after_or);
2097 if partial.is_some() {
2098 return (CursorContext::AfterLogicalOp(LogicalOp::Or), partial);
2099 }
2100 }
2101
2102 if let Some(last_condition) = where_clause.conditions.last() {
2103 if let Some(connector) = &last_condition.connector {
2104 return (
2106 CursorContext::AfterLogicalOp(connector.clone()),
2107 extract_partial_at_end(query),
2108 );
2109 }
2110 }
2111 return (CursorContext::WhereClause, extract_partial_at_end(query));
2113 }
2114
2115 let query_upper = query.to_uppercase();
2117 if query_upper.ends_with(" ORDER BY") {
2118 return (CursorContext::OrderByClause, None);
2119 }
2120
2121 if stmt.order_by.is_some() {
2123 return (CursorContext::OrderByClause, extract_partial_at_end(query));
2124 }
2125
2126 if stmt.from_table.is_some() && stmt.where_clause.is_none() && stmt.order_by.is_none() {
2127 return (CursorContext::FromClause, extract_partial_at_end(query));
2128 }
2129
2130 if !stmt.columns.is_empty() && stmt.from_table.is_none() {
2131 return (CursorContext::SelectClause, extract_partial_at_end(query));
2132 }
2133
2134 (CursorContext::Unknown, None)
2135}
2136
2137fn find_last_token(tokens: &[(usize, usize, Token)], target: &Token) -> Option<usize> {
2139 tokens
2140 .iter()
2141 .rposition(|(_, _, t)| t == target)
2142 .map(|idx| tokens[idx].0)
2143}
2144
2145fn find_last_matching_token<F>(
2147 tokens: &[(usize, usize, Token)],
2148 predicate: F,
2149) -> Option<(usize, &Token)>
2150where
2151 F: Fn(&Token) -> bool,
2152{
2153 tokens
2154 .iter()
2155 .rposition(|(_, _, t)| predicate(t))
2156 .map(|idx| (tokens[idx].0, &tokens[idx].2))
2157}
2158
2159fn is_in_clause(
2161 tokens: &[(usize, usize, Token)],
2162 clause_token: Token,
2163 exclude_tokens: &[Token],
2164) -> bool {
2165 if let Some(clause_pos) = find_last_token(tokens, &clause_token) {
2167 for (pos, _, token) in tokens.iter() {
2169 if *pos > clause_pos && exclude_tokens.contains(token) {
2170 return false;
2171 }
2172 }
2173 return true;
2174 }
2175 false
2176}
2177
2178fn analyze_partial(query: &str, cursor_pos: usize) -> (CursorContext, Option<String>) {
2179 let mut lexer = Lexer::new(query);
2181 let tokens = lexer.tokenize_all_with_positions();
2182
2183 let trimmed = query.trim();
2184
2185 #[cfg(test)]
2186 {
2187 if trimmed.contains("\"Last Name\"") {
2188 eprintln!("DEBUG analyze_partial: query='{query}', trimmed='{trimmed}'");
2189 }
2190 }
2191
2192 if let Some(result) = check_after_comparison_operator(query) {
2194 return result;
2195 }
2196
2197 if let Some(dot_pos) = trimmed.rfind('.') {
2200 #[cfg(test)]
2201 {
2202 if trimmed.contains("\"Last Name\"") {
2203 eprintln!("DEBUG: Found dot at position {dot_pos}");
2204 }
2205 }
2206 let before_dot = &trimmed[..dot_pos];
2208 let after_dot = &trimmed[dot_pos + 1..];
2209
2210 if !after_dot.contains('(') {
2213 let col_name = if before_dot.ends_with('"') {
2216 let bytes = before_dot.as_bytes();
2218 let pos = before_dot.len() - 1; #[cfg(test)]
2221 {
2222 if trimmed.contains("\"Last Name\"") {
2223 eprintln!("DEBUG: before_dot='{before_dot}', looking for opening quote");
2224 }
2225 }
2226
2227 let found_start = find_quote_start(bytes, pos);
2228
2229 if let Some(start) = found_start {
2230 let result = safe_slice_from(before_dot, start);
2232 #[cfg(test)]
2233 {
2234 if trimmed.contains("\"Last Name\"") {
2235 eprintln!("DEBUG: Extracted quoted identifier: '{result}'");
2236 }
2237 }
2238 Some(result)
2239 } else {
2240 #[cfg(test)]
2241 {
2242 if trimmed.contains("\"Last Name\"") {
2243 eprintln!("DEBUG: No opening quote found!");
2244 }
2245 }
2246 None
2247 }
2248 } else {
2249 before_dot
2252 .split_whitespace()
2253 .last()
2254 .map(|word| word.trim_start_matches('('))
2255 };
2256
2257 if let Some(col_name) = col_name {
2258 #[cfg(test)]
2259 {
2260 if trimmed.contains("\"Last Name\"") {
2261 eprintln!("DEBUG: col_name = '{col_name}'");
2262 }
2263 }
2264
2265 let is_valid = Parser::is_valid_identifier(col_name);
2267
2268 #[cfg(test)]
2269 {
2270 if trimmed.contains("\"Last Name\"") {
2271 eprintln!("DEBUG: is_valid = {is_valid}");
2272 }
2273 }
2274
2275 if is_valid {
2276 return handle_method_call_context(col_name, after_dot);
2277 }
2278 }
2279 }
2280 }
2281
2282 if let Some((pos, token)) =
2284 find_last_matching_token(&tokens, |t| matches!(t, Token::And | Token::Or))
2285 {
2286 let token_end_pos = if matches!(token, Token::And) {
2288 pos + 3 } else {
2290 pos + 2 };
2292
2293 if cursor_pos > token_end_pos {
2294 let after_op = safe_slice_from(query, token_end_pos + 1); let partial = extract_partial_at_end(after_op);
2297 let op = if matches!(token, Token::And) {
2298 LogicalOp::And
2299 } else {
2300 LogicalOp::Or
2301 };
2302 return (CursorContext::AfterLogicalOp(op), partial);
2303 }
2304 }
2305
2306 if let Some((_, _, last_token)) = tokens.last() {
2308 if matches!(last_token, Token::And | Token::Or) {
2309 let op = if matches!(last_token, Token::And) {
2310 LogicalOp::And
2311 } else {
2312 LogicalOp::Or
2313 };
2314 return (CursorContext::AfterLogicalOp(op), None);
2315 }
2316 }
2317
2318 if let Some(order_pos) = find_last_token(&tokens, &Token::OrderBy) {
2320 let has_by = tokens
2322 .iter()
2323 .any(|(pos, _, t)| *pos > order_pos && matches!(t, Token::By));
2324 if has_by
2325 || tokens
2326 .last()
2327 .map_or(false, |(_, _, t)| matches!(t, Token::OrderBy))
2328 {
2329 return (CursorContext::OrderByClause, extract_partial_at_end(query));
2330 }
2331 }
2332
2333 if is_in_clause(&tokens, Token::Where, &[Token::OrderBy, Token::GroupBy]) {
2335 return (CursorContext::WhereClause, extract_partial_at_end(query));
2336 }
2337
2338 if is_in_clause(
2340 &tokens,
2341 Token::From,
2342 &[Token::Where, Token::OrderBy, Token::GroupBy],
2343 ) {
2344 return (CursorContext::FromClause, extract_partial_at_end(query));
2345 }
2346
2347 if find_last_token(&tokens, &Token::Select).is_some()
2349 && find_last_token(&tokens, &Token::From).is_none()
2350 {
2351 return (CursorContext::SelectClause, extract_partial_at_end(query));
2352 }
2353
2354 (CursorContext::Unknown, None)
2355}
2356
2357fn extract_partial_at_end(query: &str) -> Option<String> {
2358 let trimmed = query.trim();
2359
2360 if let Some(last_word) = trimmed.split_whitespace().last() {
2362 if last_word.starts_with('"') && !last_word.ends_with('"') {
2363 return Some(last_word.to_string());
2365 }
2366 }
2367
2368 let last_word = trimmed.split_whitespace().last()?;
2370
2371 if last_word.chars().all(|c| c.is_alphanumeric() || c == '_') {
2374 if !is_sql_keyword(last_word) {
2376 Some(last_word.to_string())
2377 } else {
2378 None
2379 }
2380 } else {
2381 None
2382 }
2383}
2384
2385impl ParsePrimary for Parser {
2387 fn current_token(&self) -> &Token {
2388 &self.current_token
2389 }
2390
2391 fn advance(&mut self) {
2392 self.advance();
2393 }
2394
2395 fn consume(&mut self, expected: Token) -> Result<(), String> {
2396 self.consume(expected)
2397 }
2398
2399 fn parse_case_expression(&mut self) -> Result<SqlExpression, String> {
2400 self.parse_case_expression()
2401 }
2402
2403 fn parse_function_args(&mut self) -> Result<(Vec<SqlExpression>, bool), String> {
2404 self.parse_function_args()
2405 }
2406
2407 fn parse_window_spec(&mut self) -> Result<WindowSpec, String> {
2408 self.parse_window_spec()
2409 }
2410
2411 fn parse_logical_or(&mut self) -> Result<SqlExpression, String> {
2412 self.parse_logical_or()
2413 }
2414
2415 fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
2416 self.parse_comparison()
2417 }
2418
2419 fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2420 self.parse_expression_list()
2421 }
2422
2423 fn parse_subquery(&mut self) -> Result<SelectStatement, String> {
2424 if matches!(self.current_token, Token::With) {
2426 self.parse_with_clause_inner()
2427 } else {
2428 self.parse_select_statement_inner()
2429 }
2430 }
2431}
2432
2433impl ExpressionParser for Parser {
2435 fn current_token(&self) -> &Token {
2436 &self.current_token
2437 }
2438
2439 fn advance(&mut self) {
2440 match &self.current_token {
2442 Token::LeftParen => self.paren_depth += 1,
2443 Token::RightParen => {
2444 self.paren_depth -= 1;
2445 }
2446 _ => {}
2447 }
2448 self.current_token = self.lexer.next_token();
2449 }
2450
2451 fn peek(&self) -> Option<&Token> {
2452 None }
2459
2460 fn is_at_end(&self) -> bool {
2461 matches!(self.current_token, Token::Eof)
2462 }
2463
2464 fn consume(&mut self, expected: Token) -> Result<(), String> {
2465 if std::mem::discriminant(&self.current_token) == std::mem::discriminant(&expected) {
2467 self.update_paren_depth(&expected)?;
2468 self.current_token = self.lexer.next_token();
2469 Ok(())
2470 } else {
2471 Err(format!(
2472 "Expected {:?}, found {:?}",
2473 expected, self.current_token
2474 ))
2475 }
2476 }
2477
2478 fn parse_identifier(&mut self) -> Result<String, String> {
2479 if let Token::Identifier(id) = &self.current_token {
2480 let id = id.clone();
2481 self.advance();
2482 Ok(id)
2483 } else {
2484 Err(format!(
2485 "Expected identifier, found {:?}",
2486 self.current_token
2487 ))
2488 }
2489 }
2490}
2491
2492impl ParseArithmetic for Parser {
2494 fn current_token(&self) -> &Token {
2495 &self.current_token
2496 }
2497
2498 fn advance(&mut self) {
2499 self.advance();
2500 }
2501
2502 fn consume(&mut self, expected: Token) -> Result<(), String> {
2503 self.consume(expected)
2504 }
2505
2506 fn parse_primary(&mut self) -> Result<SqlExpression, String> {
2507 self.parse_primary()
2508 }
2509
2510 fn parse_multiplicative(&mut self) -> Result<SqlExpression, String> {
2511 self.parse_multiplicative()
2512 }
2513
2514 fn parse_method_args(&mut self) -> Result<Vec<SqlExpression>, String> {
2515 self.parse_method_args()
2516 }
2517}
2518
2519impl ParseComparison for Parser {
2521 fn current_token(&self) -> &Token {
2522 &self.current_token
2523 }
2524
2525 fn advance(&mut self) {
2526 self.advance();
2527 }
2528
2529 fn consume(&mut self, expected: Token) -> Result<(), String> {
2530 self.consume(expected)
2531 }
2532
2533 fn parse_primary(&mut self) -> Result<SqlExpression, String> {
2534 self.parse_primary()
2535 }
2536
2537 fn parse_additive(&mut self) -> Result<SqlExpression, String> {
2538 self.parse_additive()
2539 }
2540
2541 fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2542 self.parse_expression_list()
2543 }
2544
2545 fn parse_subquery(&mut self) -> Result<SelectStatement, String> {
2546 if matches!(self.current_token, Token::With) {
2548 self.parse_with_clause_inner()
2549 } else {
2550 self.parse_select_statement_inner()
2551 }
2552 }
2553}
2554
2555impl ParseLogical for Parser {
2557 fn current_token(&self) -> &Token {
2558 &self.current_token
2559 }
2560
2561 fn advance(&mut self) {
2562 self.advance();
2563 }
2564
2565 fn consume(&mut self, expected: Token) -> Result<(), String> {
2566 self.consume(expected)
2567 }
2568
2569 fn parse_logical_and(&mut self) -> Result<SqlExpression, String> {
2570 self.parse_logical_and()
2571 }
2572
2573 fn parse_base_logical_expression(&mut self) -> Result<SqlExpression, String> {
2574 self.parse_comparison()
2577 }
2578
2579 fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
2580 self.parse_comparison()
2581 }
2582
2583 fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2584 self.parse_expression_list()
2585 }
2586}
2587
2588impl ParseCase for Parser {
2590 fn current_token(&self) -> &Token {
2591 &self.current_token
2592 }
2593
2594 fn advance(&mut self) {
2595 self.advance();
2596 }
2597
2598 fn consume(&mut self, expected: Token) -> Result<(), String> {
2599 self.consume(expected)
2600 }
2601
2602 fn parse_expression(&mut self) -> Result<SqlExpression, String> {
2603 self.parse_expression()
2604 }
2605}
2606
2607fn is_sql_keyword(word: &str) -> bool {
2608 let mut lexer = Lexer::new(word);
2610 let token = lexer.next_token();
2611
2612 !matches!(token, Token::Identifier(_) | Token::Eof)
2614}
2615
2616#[cfg(test)]
2617mod tests {
2618 use super::*;
2619
2620 #[test]
2622 fn test_parser_mode_default_is_standard() {
2623 let sql = "-- Leading comment\nSELECT * FROM users";
2624 let mut parser = Parser::new(sql);
2625 let stmt = parser.parse().unwrap();
2626
2627 assert!(stmt.leading_comments.is_empty());
2629 assert!(stmt.trailing_comment.is_none());
2630 }
2631
2632 #[test]
2634 fn test_parser_mode_preserve_leading_comments() {
2635 let sql = "-- Important query\n-- Author: Alice\nSELECT id, name FROM users";
2636 let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2637 let stmt = parser.parse().unwrap();
2638
2639 assert_eq!(stmt.leading_comments.len(), 2);
2641 assert!(stmt.leading_comments[0].is_line_comment);
2642 assert!(stmt.leading_comments[0].text.contains("Important query"));
2643 assert!(stmt.leading_comments[1].text.contains("Author: Alice"));
2644 }
2645
2646 #[test]
2648 fn test_parser_mode_preserve_trailing_comment() {
2649 let sql = "SELECT * FROM users -- Fetch all users";
2650 let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2651 let stmt = parser.parse().unwrap();
2652
2653 assert!(stmt.trailing_comment.is_some());
2655 let comment = stmt.trailing_comment.unwrap();
2656 assert!(comment.is_line_comment);
2657 assert!(comment.text.contains("Fetch all users"));
2658 }
2659
2660 #[test]
2662 fn test_parser_mode_preserve_block_comments() {
2663 let sql = "/* Query explanation */\nSELECT * FROM users";
2664 let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2665 let stmt = parser.parse().unwrap();
2666
2667 assert_eq!(stmt.leading_comments.len(), 1);
2669 assert!(!stmt.leading_comments[0].is_line_comment); assert!(stmt.leading_comments[0].text.contains("Query explanation"));
2671 }
2672
2673 #[test]
2675 fn test_parser_mode_preserve_both_comments() {
2676 let sql = "-- Leading\nSELECT * FROM users -- Trailing";
2677 let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2678 let stmt = parser.parse().unwrap();
2679
2680 assert_eq!(stmt.leading_comments.len(), 1);
2682 assert!(stmt.leading_comments[0].text.contains("Leading"));
2683 assert!(stmt.trailing_comment.is_some());
2684 assert!(stmt.trailing_comment.unwrap().text.contains("Trailing"));
2685 }
2686
2687 #[test]
2689 fn test_parser_mode_standard_ignores_comments() {
2690 let sql = "-- Comment 1\n/* Comment 2 */\nSELECT * FROM users -- Comment 3";
2691 let mut parser = Parser::with_mode(sql, ParserMode::Standard);
2692 let stmt = parser.parse().unwrap();
2693
2694 assert!(stmt.leading_comments.is_empty());
2696 assert!(stmt.trailing_comment.is_none());
2697
2698 assert_eq!(stmt.select_items.len(), 1);
2700 assert_eq!(stmt.from_table, Some("users".to_string()));
2701 }
2702
2703 #[test]
2705 fn test_parser_backward_compatibility() {
2706 let sql = "SELECT id, name FROM users WHERE active = true";
2707
2708 let mut parser1 = Parser::new(sql);
2710 let stmt1 = parser1.parse().unwrap();
2711
2712 let mut parser2 = Parser::with_mode(sql, ParserMode::Standard);
2714 let stmt2 = parser2.parse().unwrap();
2715
2716 assert_eq!(stmt1.select_items.len(), stmt2.select_items.len());
2718 assert_eq!(stmt1.from_table, stmt2.from_table);
2719 assert_eq!(stmt1.where_clause.is_some(), stmt2.where_clause.is_some());
2720 assert!(stmt1.leading_comments.is_empty());
2721 assert!(stmt2.leading_comments.is_empty());
2722 }
2723}