1pub use super::parser::ast::{
5 CTEType, Comment, Condition, DataFormat, FileCTESpec, FrameBound, FrameUnit, HttpMethod,
6 IntoTable, JoinClause, JoinCondition, JoinOperator, JoinType, LogicalOp, OrderByColumn,
7 OrderByItem, PivotAggregate, SelectItem, SelectStatement, SetOperation, SingleJoinCondition,
8 SortDirection, SqlExpression, TableFunction, TableSource, WebCTESpec, WhenBranch, WhereClause,
9 WindowFrame, WindowSpec, CTE,
10};
11pub use super::parser::legacy::{ParseContext, ParseState, Schema, SqlParser, SqlToken, TableInfo};
12pub use super::parser::lexer::{Lexer, LexerMode, Token};
13pub use super::parser::ParserConfig;
14
15pub use super::parser::formatter::{format_ast_tree, format_sql_pretty, format_sql_pretty_compact};
17
18pub use super::parser::ast_formatter::{format_sql_ast, format_sql_ast_with_config, FormatConfig};
20
21use super::parser::expressions::arithmetic::{
23 parse_additive as parse_additive_expr, parse_multiplicative as parse_multiplicative_expr,
24 ParseArithmetic,
25};
26use super::parser::expressions::case::{parse_case_expression as parse_case_expr, ParseCase};
27use super::parser::expressions::comparison::{
28 parse_comparison as parse_comparison_expr, parse_in_operator, ParseComparison,
29};
30use super::parser::expressions::logical::{
31 parse_logical_and as parse_logical_and_expr, parse_logical_or as parse_logical_or_expr,
32 ParseLogical,
33};
34use super::parser::expressions::primary::{
35 parse_primary as parse_primary_expr, ParsePrimary, PrimaryExpressionContext,
36};
37use super::parser::expressions::ExpressionParser;
38
39use crate::sql::functions::{FunctionCategory, FunctionRegistry};
41use crate::sql::generators::GeneratorRegistry;
42use std::sync::Arc;
43
44use super::parser::file_cte_parser::FileCteParser;
46use super::parser::web_cte_parser::WebCteParser;
47
48#[derive(Debug, Clone, Copy, PartialEq)]
50pub enum ParserMode {
51 Standard,
53 PreserveComments,
55}
56
57impl Default for ParserMode {
58 fn default() -> Self {
59 ParserMode::Standard
60 }
61}
62
63pub struct Parser {
64 lexer: Lexer,
65 pub current_token: Token, in_method_args: bool, columns: Vec<String>, paren_depth: i32, paren_depth_stack: Vec<i32>, _config: ParserConfig, debug_trace: bool, trace_depth: usize, function_registry: Arc<FunctionRegistry>, generator_registry: Arc<GeneratorRegistry>, mode: ParserMode, }
77
78impl Parser {
79 #[must_use]
80 pub fn new(input: &str) -> Self {
81 Self::with_mode(input, ParserMode::default())
82 }
83
84 #[must_use]
86 pub fn with_mode(input: &str, mode: ParserMode) -> Self {
87 let lexer_mode = match mode {
89 ParserMode::Standard => LexerMode::SkipComments,
90 ParserMode::PreserveComments => LexerMode::PreserveComments,
91 };
92
93 let mut lexer = Lexer::with_mode(input, lexer_mode);
94 let current_token = lexer.next_token();
95 Self {
96 lexer,
97 current_token,
98 in_method_args: false,
99 columns: Vec::new(),
100 paren_depth: 0,
101 paren_depth_stack: Vec::new(),
102 _config: ParserConfig::default(),
103 debug_trace: false,
104 trace_depth: 0,
105 function_registry: Arc::new(FunctionRegistry::new()),
106 generator_registry: Arc::new(GeneratorRegistry::new()),
107 mode,
108 }
109 }
110
111 #[must_use]
112 pub fn with_config(input: &str, config: ParserConfig) -> Self {
113 let mut lexer = Lexer::new(input);
114 let current_token = lexer.next_token();
115 Self {
116 lexer,
117 current_token,
118 in_method_args: false,
119 columns: Vec::new(),
120 paren_depth: 0,
121 paren_depth_stack: Vec::new(),
122 _config: config,
123 debug_trace: false,
124 trace_depth: 0,
125 function_registry: Arc::new(FunctionRegistry::new()),
126 generator_registry: Arc::new(GeneratorRegistry::new()),
127 mode: ParserMode::default(),
128 }
129 }
130
131 #[must_use]
132 pub fn with_columns(mut self, columns: Vec<String>) -> Self {
133 self.columns = columns;
134 self
135 }
136
137 #[must_use]
138 pub fn with_debug_trace(mut self, enabled: bool) -> Self {
139 self.debug_trace = enabled;
140 self
141 }
142
143 #[must_use]
144 pub fn with_function_registry(mut self, registry: Arc<FunctionRegistry>) -> Self {
145 self.function_registry = registry;
146 self
147 }
148
149 #[must_use]
150 pub fn with_generator_registry(mut self, registry: Arc<GeneratorRegistry>) -> Self {
151 self.generator_registry = registry;
152 self
153 }
154
155 fn trace_enter(&mut self, context: &str) {
156 if self.debug_trace {
157 let indent = " ".repeat(self.trace_depth);
158 eprintln!("{}→ {} | Token: {:?}", indent, context, self.current_token);
159 self.trace_depth += 1;
160 }
161 }
162
163 fn trace_exit(&mut self, context: &str, result: &Result<impl std::fmt::Debug, String>) {
164 if self.debug_trace {
165 self.trace_depth = self.trace_depth.saturating_sub(1);
166 let indent = " ".repeat(self.trace_depth);
167 match result {
168 Ok(val) => eprintln!("{}← {} ✓ | Result: {:?}", indent, context, val),
169 Err(e) => eprintln!("{}← {} ✗ | Error: {}", indent, context, e),
170 }
171 }
172 }
173
174 fn trace_token(&self, action: &str) {
175 if self.debug_trace {
176 let indent = " ".repeat(self.trace_depth);
177 eprintln!("{} {} | Token: {:?}", indent, action, self.current_token);
178 }
179 }
180
181 #[allow(dead_code)]
182 fn peek_token(&self) -> Option<Token> {
183 let mut temp_lexer = self.lexer.clone();
185 let next_token = temp_lexer.next_token();
186 if matches!(next_token, Token::Eof) {
187 None
188 } else {
189 Some(next_token)
190 }
191 }
192
193 fn is_identifier_reserved(id: &str) -> bool {
198 let id_upper = id.to_uppercase();
199 matches!(
200 id_upper.as_str(),
201 "ORDER" | "HAVING" | "LIMIT" | "OFFSET" | "UNION" | "INTERSECT" | "EXCEPT"
202 )
203 }
204
205 const COMPARISON_OPERATORS: [&'static str; 6] = [" > ", " < ", " >= ", " <= ", " = ", " != "];
207
208 pub fn consume(&mut self, expected: Token) -> Result<(), String> {
209 self.trace_token(&format!("Consuming expected {:?}", expected));
210 if std::mem::discriminant(&self.current_token) == std::mem::discriminant(&expected) {
211 self.update_paren_depth(&expected)?;
213
214 self.current_token = self.lexer.next_token();
215 Ok(())
216 } else {
217 let error_msg = match (&expected, &self.current_token) {
219 (Token::RightParen, Token::Eof) if self.paren_depth > 0 => {
220 format!(
221 "Unclosed parenthesis - missing {} closing parenthes{}",
222 self.paren_depth,
223 if self.paren_depth == 1 { "is" } else { "es" }
224 )
225 }
226 (Token::RightParen, _) if self.paren_depth > 0 => {
227 format!(
228 "Expected closing parenthesis but found {:?} (currently {} unclosed parenthes{})",
229 self.current_token,
230 self.paren_depth,
231 if self.paren_depth == 1 { "is" } else { "es" }
232 )
233 }
234 _ => format!("Expected {:?}, found {:?}", expected, self.current_token),
235 };
236 Err(error_msg)
237 }
238 }
239
240 pub fn advance(&mut self) {
241 match &self.current_token {
243 Token::LeftParen => self.paren_depth += 1,
244 Token::RightParen => {
245 self.paren_depth -= 1;
246 }
249 _ => {}
250 }
251 let old_token = self.current_token.clone();
252 self.current_token = self.lexer.next_token();
253 if self.debug_trace {
254 let indent = " ".repeat(self.trace_depth);
255 eprintln!(
256 "{} Advanced: {:?} → {:?}",
257 indent, old_token, self.current_token
258 );
259 }
260 }
261
262 fn collect_leading_comments(&mut self) -> Vec<Comment> {
265 let mut comments = Vec::new();
266 loop {
267 match &self.current_token {
268 Token::LineComment(text) => {
269 comments.push(Comment::line(text.clone()));
270 self.advance();
271 }
272 Token::BlockComment(text) => {
273 comments.push(Comment::block(text.clone()));
274 self.advance();
275 }
276 _ => break,
277 }
278 }
279 comments
280 }
281
282 fn collect_trailing_comment(&mut self) -> Option<Comment> {
285 match &self.current_token {
286 Token::LineComment(text) => {
287 let comment = Some(Comment::line(text.clone()));
288 self.advance();
289 comment
290 }
291 Token::BlockComment(text) => {
292 let comment = Some(Comment::block(text.clone()));
293 self.advance();
294 comment
295 }
296 _ => None,
297 }
298 }
299
300 fn push_paren_depth(&mut self) {
301 self.paren_depth_stack.push(self.paren_depth);
302 self.paren_depth = 0;
303 }
304
305 fn pop_paren_depth(&mut self) {
306 if let Some(depth) = self.paren_depth_stack.pop() {
307 self.paren_depth = depth;
309 }
310 }
311
312 pub fn parse(&mut self) -> Result<SelectStatement, String> {
313 self.trace_enter("parse");
314
315 let leading_comments = if self.mode == ParserMode::PreserveComments {
318 self.collect_leading_comments()
319 } else {
320 vec![]
321 };
322
323 let result = if matches!(self.current_token, Token::With) {
325 let mut stmt = self.parse_with_clause()?;
326 stmt.leading_comments = leading_comments;
328 stmt
329 } else {
330 let stmt = self.parse_select_statement_with_comments_public(leading_comments)?;
332 self.check_balanced_parentheses()?;
333 stmt
334 };
335
336 self.trace_exit("parse", &Ok(&result));
337 Ok(result)
338 }
339
340 fn parse_select_statement_with_comments_public(
342 &mut self,
343 comments: Vec<Comment>,
344 ) -> Result<SelectStatement, String> {
345 self.parse_select_statement_with_comments(comments)
346 }
347
348 fn parse_with_clause(&mut self) -> Result<SelectStatement, String> {
349 self.consume(Token::With)?;
350 let ctes = self.parse_cte_list()?;
351
352 let mut main_query = self.parse_select_statement_inner_no_comments()?;
354 main_query.ctes = ctes;
355
356 self.check_balanced_parentheses()?;
358
359 Ok(main_query)
360 }
361
362 fn parse_with_clause_inner(&mut self) -> Result<SelectStatement, String> {
363 self.consume(Token::With)?;
364 let ctes = self.parse_cte_list()?;
365
366 let mut main_query = self.parse_select_statement_inner()?;
368 main_query.ctes = ctes;
369
370 Ok(main_query)
371 }
372
373 fn parse_cte_list(&mut self) -> Result<Vec<CTE>, String> {
375 let mut ctes = Vec::new();
376
377 loop {
379 let is_web = if matches!(&self.current_token, Token::Web) {
383 self.trace_token("Found WEB keyword for CTE");
384 self.advance();
385 true
386 } else {
387 false
388 };
389
390 let name = match &self.current_token {
392 Token::Identifier(name) => name.clone(),
393 token => {
394 if let Some(keyword) = token.as_keyword_str() {
396 keyword.to_lowercase()
398 } else {
399 return Err(format!(
400 "Expected CTE name after {}",
401 if is_web { "WEB" } else { "WITH or comma" }
402 ));
403 }
404 }
405 };
406 self.advance();
407
408 let column_list = if matches!(self.current_token, Token::LeftParen) {
410 self.advance();
411 let cols = self.parse_identifier_list()?;
412 self.consume(Token::RightParen)?;
413 Some(cols)
414 } else {
415 None
416 };
417
418 self.consume(Token::As)?;
420
421 let cte_type = if is_web {
422 self.consume(Token::LeftParen)?;
424 let web_spec = WebCteParser::parse(self)?;
426 self.consume(Token::RightParen)?;
428 CTEType::Web(web_spec)
429 } else {
430 self.push_paren_depth();
434 self.consume(Token::LeftParen)?;
435
436 let result = if matches!(&self.current_token, Token::File) {
437 self.trace_token("Found FILE keyword inside CTE parens");
438 self.advance();
439 let file_spec = FileCteParser::parse(self)?;
440 CTEType::File(file_spec)
441 } else {
442 let query = self.parse_select_statement_inner()?;
443 CTEType::Standard(query)
444 };
445
446 self.consume(Token::RightParen)?;
448 self.pop_paren_depth();
450 result
451 };
452
453 ctes.push(CTE {
454 name,
455 column_list,
456 cte_type,
457 });
458
459 if !matches!(self.current_token, Token::Comma) {
461 break;
462 }
463 self.advance();
464 }
465
466 Ok(ctes)
467 }
468
469 fn parse_optional_alias(&mut self) -> Result<Option<String>, String> {
471 if matches!(self.current_token, Token::As) {
472 self.advance();
473 match &self.current_token {
474 Token::Identifier(name) => {
475 let alias = name.clone();
476 self.advance();
477 Ok(Some(alias))
478 }
479 token => {
480 if let Some(keyword) = token.as_keyword_str() {
482 Err(format!(
483 "Reserved keyword '{}' cannot be used as column alias. Use a different name or quote it with double quotes: \"{}\"",
484 keyword,
485 keyword.to_lowercase()
486 ))
487 } else {
488 Err("Expected alias name after AS".to_string())
489 }
490 }
491 }
492 } else if let Token::Identifier(name) = &self.current_token {
493 let alias = name.clone();
495 self.advance();
496 Ok(Some(alias))
497 } else {
498 Ok(None)
499 }
500 }
501
502 fn is_valid_identifier(name: &str) -> bool {
504 if name.starts_with('"') && name.ends_with('"') {
505 true
507 } else {
508 name.chars().all(|c| c.is_alphanumeric() || c == '_')
510 }
511 }
512
513 fn update_paren_depth(&mut self, token: &Token) -> Result<(), String> {
515 match token {
516 Token::LeftParen => self.paren_depth += 1,
517 Token::RightParen => {
518 self.paren_depth -= 1;
519 if self.paren_depth < 0 {
521 return Err(
522 "Unexpected closing parenthesis - no matching opening parenthesis"
523 .to_string(),
524 );
525 }
526 }
527 _ => {}
528 }
529 Ok(())
530 }
531
532 fn parse_argument_list(&mut self) -> Result<Vec<SqlExpression>, String> {
534 let mut args = Vec::new();
535
536 if !matches!(self.current_token, Token::RightParen) {
537 loop {
538 args.push(self.parse_expression()?);
539
540 if matches!(self.current_token, Token::Comma) {
541 self.advance();
542 } else {
543 break;
544 }
545 }
546 }
547
548 Ok(args)
549 }
550
551 fn check_balanced_parentheses(&self) -> Result<(), String> {
553 if self.paren_depth > 0 {
554 Err(format!(
555 "Unclosed parenthesis - missing {} closing parenthes{}",
556 self.paren_depth,
557 if self.paren_depth == 1 { "is" } else { "es" }
558 ))
559 } else if self.paren_depth < 0 {
560 Err("Extra closing parenthesis found - no matching opening parenthesis".to_string())
561 } else {
562 Ok(())
563 }
564 }
565
566 fn contains_aggregate_function(expr: &SqlExpression) -> bool {
569 match expr {
570 SqlExpression::FunctionCall { name, args, .. } => {
571 let upper_name = name.to_uppercase();
573 let is_aggregate = matches!(
574 upper_name.as_str(),
575 "COUNT" | "SUM" | "AVG" | "MIN" | "MAX" | "GROUP_CONCAT" | "STRING_AGG"
576 );
577
578 is_aggregate || args.iter().any(Self::contains_aggregate_function)
581 }
582 SqlExpression::BinaryOp { left, right, .. } => {
584 Self::contains_aggregate_function(left) || Self::contains_aggregate_function(right)
585 }
586 SqlExpression::Not { expr } => Self::contains_aggregate_function(expr),
587 SqlExpression::MethodCall { args, .. } => {
588 args.iter().any(Self::contains_aggregate_function)
589 }
590 SqlExpression::ChainedMethodCall { base, args, .. } => {
591 Self::contains_aggregate_function(base)
592 || args.iter().any(Self::contains_aggregate_function)
593 }
594 SqlExpression::CaseExpression {
595 when_branches,
596 else_branch,
597 } => {
598 when_branches.iter().any(|branch| {
599 Self::contains_aggregate_function(&branch.condition)
600 || Self::contains_aggregate_function(&branch.result)
601 }) || else_branch
602 .as_ref()
603 .map_or(false, |e| Self::contains_aggregate_function(e))
604 }
605 SqlExpression::SimpleCaseExpression {
606 expr,
607 when_branches,
608 else_branch,
609 } => {
610 Self::contains_aggregate_function(expr)
611 || when_branches.iter().any(|branch| {
612 Self::contains_aggregate_function(&branch.value)
613 || Self::contains_aggregate_function(&branch.result)
614 })
615 || else_branch
616 .as_ref()
617 .map_or(false, |e| Self::contains_aggregate_function(e))
618 }
619 SqlExpression::ScalarSubquery { query } => {
620 query
623 .having
624 .as_ref()
625 .map_or(false, |h| Self::contains_aggregate_function(h))
626 }
627 SqlExpression::Column(_)
629 | SqlExpression::StringLiteral(_)
630 | SqlExpression::NumberLiteral(_)
631 | SqlExpression::BooleanLiteral(_)
632 | SqlExpression::Null
633 | SqlExpression::DateTimeConstructor { .. }
634 | SqlExpression::DateTimeToday { .. } => false,
635
636 SqlExpression::WindowFunction { .. } => true,
638
639 SqlExpression::Between { expr, lower, upper } => {
641 Self::contains_aggregate_function(expr)
642 || Self::contains_aggregate_function(lower)
643 || Self::contains_aggregate_function(upper)
644 }
645
646 SqlExpression::InList { expr, values } | SqlExpression::NotInList { expr, values } => {
648 Self::contains_aggregate_function(expr)
649 || values.iter().any(Self::contains_aggregate_function)
650 }
651
652 SqlExpression::InSubquery { expr, subquery }
654 | SqlExpression::NotInSubquery { expr, subquery } => {
655 Self::contains_aggregate_function(expr)
656 || subquery
657 .having
658 .as_ref()
659 .map_or(false, |h| Self::contains_aggregate_function(h))
660 }
661
662 SqlExpression::InSubqueryTuple { exprs, subquery }
664 | SqlExpression::NotInSubqueryTuple { exprs, subquery } => {
665 exprs.iter().any(Self::contains_aggregate_function)
666 || subquery
667 .having
668 .as_ref()
669 .map_or(false, |h| Self::contains_aggregate_function(h))
670 }
671
672 SqlExpression::Unnest { column, .. } => Self::contains_aggregate_function(column),
674 }
675 }
676
677 fn parse_select_statement(&mut self) -> Result<SelectStatement, String> {
678 self.trace_enter("parse_select_statement");
679 let result = self.parse_select_statement_inner()?;
680
681 self.check_balanced_parentheses()?;
683
684 Ok(result)
685 }
686
687 fn parse_select_statement_inner(&mut self) -> Result<SelectStatement, String> {
688 let leading_comments = if self.mode == ParserMode::PreserveComments {
690 self.collect_leading_comments()
691 } else {
692 vec![]
693 };
694
695 self.parse_select_statement_with_comments(leading_comments)
696 }
697
698 fn parse_select_statement_inner_no_comments(&mut self) -> Result<SelectStatement, String> {
701 self.parse_select_statement_with_comments(vec![])
702 }
703
704 fn parse_select_statement_with_comments(
706 &mut self,
707 leading_comments: Vec<Comment>,
708 ) -> Result<SelectStatement, String> {
709 self.consume(Token::Select)?;
710
711 let distinct = if matches!(self.current_token, Token::Distinct) {
713 self.advance();
714 true
715 } else {
716 false
717 };
718
719 let select_items = self.parse_select_items()?;
721
722 let columns = select_items
724 .iter()
725 .map(|item| match item {
726 SelectItem::Star { .. } => "*".to_string(),
727 SelectItem::StarExclude { .. } => "*".to_string(), SelectItem::Column {
729 column: col_ref, ..
730 } => col_ref.name.clone(),
731 SelectItem::Expression { alias, .. } => alias.clone(),
732 })
733 .collect();
734
735 let into_table = if matches!(self.current_token, Token::Into) {
737 self.advance();
738 Some(self.parse_into_clause()?)
739 } else {
740 None
741 };
742
743 let (from_table, from_subquery, from_function, from_alias) = if matches!(
745 self.current_token,
746 Token::From
747 ) {
748 self.advance();
749
750 let table_or_function_name = match &self.current_token {
753 Token::Identifier(name) => Some(name.clone()),
754 token => {
755 token.as_keyword_str().map(|k| k.to_lowercase())
757 }
758 };
759
760 if let Some(name) = table_or_function_name {
761 let has_paren = self.peek_token() == Some(Token::LeftParen);
765 if self.debug_trace {
766 eprintln!(
767 " Checking {} for table function, has_paren={}",
768 name, has_paren
769 );
770 }
771
772 let is_table_function = if has_paren {
775 if self.debug_trace {
777 eprintln!(" Checking generator registry for {}", name.to_uppercase());
778 }
779 if let Some(_gen) = self.generator_registry.get(&name.to_uppercase()) {
780 if self.debug_trace {
781 eprintln!(" Found {} in generator registry", name);
782 }
783 self.trace_token(&format!("Found generator: {}", name));
784 true
785 } else {
786 if let Some(func) = self.function_registry.get(&name.to_uppercase()) {
788 let sig = func.signature();
789 let is_table_fn = sig.category == FunctionCategory::TableFunction;
790 if self.debug_trace {
791 eprintln!(
792 " Found {} in function registry, is_table_function={}",
793 name, is_table_fn
794 );
795 }
796 if is_table_fn {
797 self.trace_token(&format!(
798 "Found table function in function registry: {}",
799 name
800 ));
801 }
802 is_table_fn
803 } else {
804 if self.debug_trace {
805 eprintln!(" {} not found in either registry", name);
806 self.trace_token(&format!(
807 "Not found as generator or table function: {}",
808 name
809 ));
810 }
811 return Err(format!(
817 "Unknown table function '{}'. Run --list-functions to see available table functions (e.g. READ_CSV, READ_JSON, READ_JSONL, RANGE).",
818 name
819 ));
820 }
821 }
822 } else {
823 if self.debug_trace {
824 eprintln!(" No parenthesis after {}, treating as table", name);
825 }
826 false
827 };
828
829 if is_table_function {
830 let function_name = name.clone();
832 self.advance(); self.consume(Token::LeftParen)?;
836 let args = self.parse_argument_list()?;
837 self.consume(Token::RightParen)?;
838
839 let alias = if matches!(self.current_token, Token::As) {
841 self.advance();
842 match &self.current_token {
843 Token::Identifier(name) => {
844 let alias = name.clone();
845 self.advance();
846 Some(alias)
847 }
848 token => {
849 if let Some(keyword) = token.as_keyword_str() {
850 return Err(format!(
851 "Reserved keyword '{}' cannot be used as column alias. Use a different name or quote it with double quotes: \"{}\"",
852 keyword,
853 keyword.to_lowercase()
854 ));
855 } else {
856 return Err("Expected alias name after AS".to_string());
857 }
858 }
859 }
860 } else if let Token::Identifier(name) = &self.current_token {
861 let alias = name.clone();
862 self.advance();
863 Some(alias)
864 } else {
865 None
866 };
867
868 (
869 None,
870 None,
871 Some(TableFunction::Generator {
872 name: function_name,
873 args,
874 }),
875 alias,
876 )
877 } else {
878 let table_name = name.clone();
880 self.advance();
881
882 let alias = self.parse_optional_alias()?;
884
885 (Some(table_name), None, None, alias)
886 }
887 } else if matches!(self.current_token, Token::LeftParen) {
888 self.advance();
890
891 let subquery = if matches!(self.current_token, Token::With) {
893 self.parse_with_clause_inner()?
894 } else {
895 self.parse_select_statement_inner()?
896 };
897
898 self.consume(Token::RightParen)?;
899
900 let alias = if matches!(self.current_token, Token::As) {
902 self.advance();
903 match &self.current_token {
904 Token::Identifier(name) => {
905 let alias = name.clone();
906 self.advance();
907 alias
908 }
909 token => {
910 if let Some(keyword) = token.as_keyword_str() {
911 return Err(format!(
912 "Reserved keyword '{}' cannot be used as subquery alias. Use a different name or quote it with double quotes: \"{}\"",
913 keyword,
914 keyword.to_lowercase()
915 ));
916 } else {
917 return Err("Expected alias name after AS".to_string());
918 }
919 }
920 }
921 } else {
922 match &self.current_token {
924 Token::Identifier(name) => {
925 let alias = name.clone();
926 self.advance();
927 alias
928 }
929 _ => {
930 return Err(
931 "Subquery in FROM must have an alias (e.g., AS t)".to_string()
932 )
933 }
934 }
935 };
936
937 (None, Some(Box::new(subquery)), None, Some(alias))
938 } else {
939 let table_name = match &self.current_token {
941 Token::Identifier(table) => table.clone(),
942 Token::QuotedIdentifier(table) => table.clone(),
943 token => {
944 if let Some(keyword) = token.as_keyword_str() {
946 keyword.to_lowercase()
947 } else {
948 return Err("Expected table name or subquery after FROM".to_string());
949 }
950 }
951 };
952
953 self.advance();
954
955 let alias = self.parse_optional_alias()?;
957
958 (Some(table_name), None, None, alias)
959 }
960 } else {
961 (None, None, None, None)
962 };
963
964 let pivot_source = if matches!(self.current_token, Token::Pivot) {
968 let source = if let Some(ref table_name) = from_table {
970 TableSource::Table(table_name.clone())
971 } else if let Some(ref subquery) = from_subquery {
972 TableSource::DerivedTable {
973 query: subquery.clone(),
974 alias: from_alias.clone().unwrap_or_default(),
975 }
976 } else {
977 return Err("PIVOT requires a table or subquery source".to_string());
978 };
979
980 let pivoted = self.parse_pivot_clause(source)?;
982 Some(pivoted)
983 } else {
984 None
985 };
986
987 let mut joins = Vec::new();
989 while self.is_join_token() {
990 joins.push(self.parse_join_clause()?);
991 }
992
993 let where_clause = if matches!(self.current_token, Token::Where) {
994 self.advance();
995 Some(self.parse_where_clause()?)
996 } else {
997 None
998 };
999
1000 let group_by = if matches!(self.current_token, Token::GroupBy) {
1001 self.advance();
1002 Some(self.parse_expression_list()?)
1005 } else {
1006 None
1007 };
1008
1009 let having = if matches!(self.current_token, Token::Having) {
1011 if group_by.is_none() {
1012 return Err("HAVING clause requires GROUP BY".to_string());
1013 }
1014 self.advance();
1015 let having_expr = self.parse_expression()?;
1016
1017 Some(having_expr)
1022 } else {
1023 None
1024 };
1025
1026 let qualify = if matches!(self.current_token, Token::Qualify) {
1030 self.advance();
1031 let qualify_expr = self.parse_expression()?;
1032
1033 Some(qualify_expr)
1037 } else {
1038 None
1039 };
1040
1041 let order_by = if matches!(self.current_token, Token::OrderBy) {
1043 self.trace_token("Found OrderBy token");
1044 self.advance();
1045 Some(self.parse_order_by_list()?)
1046 } else if let Token::Identifier(s) = &self.current_token {
1047 if Self::is_identifier_reserved(s) && s.to_uppercase() == "ORDER" {
1050 self.trace_token("Warning: ORDER as identifier instead of OrderBy token");
1051 self.advance(); if matches!(&self.current_token, Token::By) {
1053 self.advance(); Some(self.parse_order_by_list()?)
1055 } else {
1056 return Err("Expected BY after ORDER".to_string());
1057 }
1058 } else {
1059 None
1060 }
1061 } else {
1062 None
1063 };
1064
1065 let limit = if matches!(self.current_token, Token::Limit) {
1067 self.advance();
1068 match &self.current_token {
1069 Token::NumberLiteral(num) => {
1070 let limit_val = num
1071 .parse::<usize>()
1072 .map_err(|_| format!("Invalid LIMIT value: {num}"))?;
1073 self.advance();
1074 Some(limit_val)
1075 }
1076 _ => return Err("Expected number after LIMIT".to_string()),
1077 }
1078 } else {
1079 None
1080 };
1081
1082 let offset = if matches!(self.current_token, Token::Offset) {
1084 self.advance();
1085 match &self.current_token {
1086 Token::NumberLiteral(num) => {
1087 let offset_val = num
1088 .parse::<usize>()
1089 .map_err(|_| format!("Invalid OFFSET value: {num}"))?;
1090 self.advance();
1091 Some(offset_val)
1092 }
1093 _ => return Err("Expected number after OFFSET".to_string()),
1094 }
1095 } else {
1096 None
1097 };
1098
1099 let into_table = if into_table.is_none() && matches!(self.current_token, Token::Into) {
1103 self.advance();
1104 Some(self.parse_into_clause()?)
1105 } else {
1106 into_table };
1108
1109 let set_operations = self.parse_set_operations()?;
1111
1112 let trailing_comment = if self.mode == ParserMode::PreserveComments {
1114 self.collect_trailing_comment()
1115 } else {
1116 None
1117 };
1118
1119 let from_source = if let Some(pivot) = pivot_source {
1122 Some(pivot)
1123 } else if let Some(ref table_name) = from_table {
1124 Some(TableSource::Table(table_name.clone()))
1125 } else if let Some(ref subquery) = from_subquery {
1126 Some(TableSource::DerivedTable {
1127 query: subquery.clone(),
1128 alias: from_alias.clone().unwrap_or_default(),
1129 })
1130 } else if let Some(ref _func) = from_function {
1131 None
1134 } else {
1135 None
1136 };
1137
1138 Ok(SelectStatement {
1139 distinct,
1140 columns,
1141 select_items,
1142 from_source,
1143 #[allow(deprecated)]
1144 from_table,
1145 #[allow(deprecated)]
1146 from_subquery,
1147 #[allow(deprecated)]
1148 from_function,
1149 #[allow(deprecated)]
1150 from_alias,
1151 joins,
1152 where_clause,
1153 order_by,
1154 group_by,
1155 having,
1156 qualify,
1157 limit,
1158 offset,
1159 ctes: Vec::new(), into_table,
1161 set_operations,
1162 leading_comments,
1163 trailing_comment,
1164 })
1165 }
1166
1167 fn parse_set_operations(
1170 &mut self,
1171 ) -> Result<Vec<(SetOperation, Box<SelectStatement>)>, String> {
1172 let mut operations = Vec::new();
1173
1174 while matches!(
1175 self.current_token,
1176 Token::Union | Token::Intersect | Token::Except
1177 ) {
1178 let operation = match &self.current_token {
1180 Token::Union => {
1181 self.advance();
1182 if let Token::Identifier(id) = &self.current_token {
1184 if id.to_uppercase() == "ALL" {
1185 self.advance();
1186 SetOperation::UnionAll
1187 } else {
1188 SetOperation::Union
1189 }
1190 } else {
1191 SetOperation::Union
1192 }
1193 }
1194 Token::Intersect => {
1195 self.advance();
1196 SetOperation::Intersect
1197 }
1198 Token::Except => {
1199 self.advance();
1200 SetOperation::Except
1201 }
1202 _ => unreachable!(),
1203 };
1204
1205 let next_select = self.parse_select_statement_inner()?;
1207
1208 operations.push((operation, Box::new(next_select)));
1209 }
1210
1211 Ok(operations)
1212 }
1213
1214 fn parse_select_items(&mut self) -> Result<Vec<SelectItem>, String> {
1216 let mut items = Vec::new();
1217
1218 loop {
1219 if let Token::Identifier(name) = &self.current_token.clone() {
1222 let saved_pos = self.lexer.clone();
1224 let saved_token = self.current_token.clone();
1225 let table_name = name.clone();
1226
1227 self.advance();
1228
1229 if matches!(self.current_token, Token::Dot) {
1230 self.advance();
1231 if matches!(self.current_token, Token::Star) {
1232 items.push(SelectItem::Star {
1234 table_prefix: Some(table_name),
1235 leading_comments: vec![],
1236 trailing_comment: None,
1237 });
1238 self.advance();
1239
1240 if matches!(self.current_token, Token::Comma) {
1242 self.advance();
1243 continue;
1244 } else {
1245 break;
1246 }
1247 }
1248 }
1249
1250 self.lexer = saved_pos;
1252 self.current_token = saved_token;
1253 }
1254
1255 if matches!(self.current_token, Token::Star) {
1257 self.advance(); if matches!(self.current_token, Token::Exclude) {
1261 self.advance(); if !matches!(self.current_token, Token::LeftParen) {
1265 return Err("Expected '(' after EXCLUDE".to_string());
1266 }
1267 self.advance(); let mut excluded_columns = Vec::new();
1271 loop {
1272 match &self.current_token {
1273 Token::Identifier(col_name) | Token::QuotedIdentifier(col_name) => {
1274 excluded_columns.push(col_name.clone());
1275 self.advance();
1276 }
1277 _ => return Err("Expected column name in EXCLUDE list".to_string()),
1278 }
1279
1280 if matches!(self.current_token, Token::Comma) {
1282 self.advance();
1283 } else if matches!(self.current_token, Token::RightParen) {
1284 self.advance(); break;
1286 } else {
1287 return Err("Expected ',' or ')' in EXCLUDE list".to_string());
1288 }
1289 }
1290
1291 if excluded_columns.is_empty() {
1292 return Err("EXCLUDE list cannot be empty".to_string());
1293 }
1294
1295 items.push(SelectItem::StarExclude {
1296 table_prefix: None,
1297 excluded_columns,
1298 leading_comments: vec![],
1299 trailing_comment: None,
1300 });
1301 } else {
1302 items.push(SelectItem::Star {
1304 table_prefix: None,
1305 leading_comments: vec![],
1306 trailing_comment: None,
1307 });
1308 }
1309 } else {
1310 let expr = self.parse_comparison()?; let alias = if matches!(self.current_token, Token::As) {
1315 self.advance();
1316 match &self.current_token {
1317 Token::Identifier(alias_name) => {
1318 let alias = alias_name.clone();
1319 self.advance();
1320 alias
1321 }
1322 Token::QuotedIdentifier(alias_name) => {
1323 let alias = alias_name.clone();
1324 self.advance();
1325 alias
1326 }
1327 token => {
1328 if let Some(keyword) = token.as_keyword_str() {
1329 return Err(format!(
1330 "Reserved keyword '{}' cannot be used as column alias. Use a different name or quote it with double quotes: \"{}\"",
1331 keyword,
1332 keyword.to_lowercase()
1333 ));
1334 } else {
1335 return Err("Expected alias name after AS".to_string());
1336 }
1337 }
1338 }
1339 } else {
1340 match &expr {
1342 SqlExpression::Column(col_ref) => col_ref.name.clone(),
1343 _ => format!("expr_{}", items.len() + 1), }
1345 };
1346
1347 let item = match expr {
1349 SqlExpression::Column(col_ref) if alias == col_ref.name => {
1350 SelectItem::Column {
1352 column: col_ref,
1353 leading_comments: vec![],
1354 trailing_comment: None,
1355 }
1356 }
1357 _ => {
1358 SelectItem::Expression {
1360 expr,
1361 alias,
1362 leading_comments: vec![],
1363 trailing_comment: None,
1364 }
1365 }
1366 };
1367
1368 items.push(item);
1369 }
1370
1371 if matches!(self.current_token, Token::Comma) {
1373 self.advance();
1374 } else {
1375 break;
1376 }
1377 }
1378
1379 Ok(items)
1380 }
1381
1382 fn parse_identifier_list(&mut self) -> Result<Vec<String>, String> {
1383 let mut identifiers = Vec::new();
1384
1385 loop {
1386 match &self.current_token {
1387 Token::Identifier(id) => {
1388 if Self::is_identifier_reserved(id) {
1390 break;
1392 }
1393 let mut name = id.clone();
1394 self.advance();
1395
1396 if matches!(self.current_token, Token::Dot) {
1398 self.advance(); match &self.current_token {
1400 Token::Identifier(col) => {
1401 name = format!("{}.{}", name, col);
1402 self.advance();
1403 }
1404 Token::QuotedIdentifier(col) => {
1405 name = format!("{}.{}", name, col);
1406 self.advance();
1407 }
1408 _ => {
1409 return Err("Expected identifier after '.'".to_string());
1410 }
1411 }
1412 }
1413
1414 identifiers.push(name);
1415 }
1416 Token::QuotedIdentifier(id) => {
1417 identifiers.push(id.clone());
1419 self.advance();
1420 }
1421 _ => {
1422 break;
1424 }
1425 }
1426
1427 if matches!(self.current_token, Token::Comma) {
1428 self.advance();
1429 } else {
1430 break;
1431 }
1432 }
1433
1434 if identifiers.is_empty() {
1435 return Err("Expected at least one identifier".to_string());
1436 }
1437
1438 Ok(identifiers)
1439 }
1440
1441 fn parse_window_spec(&mut self) -> Result<WindowSpec, String> {
1442 let mut partition_by = Vec::new();
1443 let mut order_by = Vec::new();
1444
1445 if matches!(self.current_token, Token::Partition) {
1447 self.advance(); if !matches!(self.current_token, Token::By) {
1449 return Err("Expected BY after PARTITION".to_string());
1450 }
1451 self.advance(); partition_by = self.parse_identifier_list()?;
1455 }
1456
1457 if matches!(self.current_token, Token::OrderBy) {
1459 self.advance(); order_by = self.parse_order_by_list()?;
1461 } else if let Token::Identifier(s) = &self.current_token {
1462 if Self::is_identifier_reserved(s) && s.to_uppercase() == "ORDER" {
1463 self.advance(); if !matches!(self.current_token, Token::By) {
1466 return Err("Expected BY after ORDER".to_string());
1467 }
1468 self.advance(); order_by = self.parse_order_by_list()?;
1470 }
1471 }
1472
1473 let mut frame = self.parse_window_frame()?;
1475
1476 if !order_by.is_empty() && frame.is_none() {
1480 frame = Some(WindowFrame {
1481 unit: FrameUnit::Range,
1482 start: FrameBound::UnboundedPreceding,
1483 end: Some(FrameBound::CurrentRow),
1484 });
1485 }
1486
1487 Ok(WindowSpec {
1488 partition_by,
1489 order_by,
1490 frame,
1491 })
1492 }
1493
1494 fn parse_order_by_list(&mut self) -> Result<Vec<OrderByItem>, String> {
1495 let mut order_items = Vec::new();
1496
1497 loop {
1498 let expr = self.parse_expression()?;
1506
1507 let direction = match &self.current_token {
1509 Token::Asc => {
1510 self.advance();
1511 SortDirection::Asc
1512 }
1513 Token::Desc => {
1514 self.advance();
1515 SortDirection::Desc
1516 }
1517 _ => SortDirection::Asc, };
1519
1520 order_items.push(OrderByItem { expr, direction });
1521
1522 if matches!(self.current_token, Token::Comma) {
1523 self.advance();
1524 } else {
1525 break;
1526 }
1527 }
1528
1529 Ok(order_items)
1530 }
1531
1532 fn parse_into_clause(&mut self) -> Result<IntoTable, String> {
1535 let name = match &self.current_token {
1537 Token::Identifier(id) if id.starts_with('#') => {
1538 let table_name = id.clone();
1539 self.advance();
1540 table_name
1541 }
1542 Token::Identifier(id) => {
1543 return Err(format!(
1544 "Temporary table name must start with #, got: {}",
1545 id
1546 ));
1547 }
1548 _ => {
1549 return Err(
1550 "Expected temporary table name (starting with #) after INTO".to_string()
1551 );
1552 }
1553 };
1554
1555 Ok(IntoTable { name })
1556 }
1557
1558 fn parse_window_frame(&mut self) -> Result<Option<WindowFrame>, String> {
1559 let unit = match &self.current_token {
1561 Token::Rows => {
1562 self.advance();
1563 FrameUnit::Rows
1564 }
1565 Token::Identifier(id) if id.to_uppercase() == "RANGE" => {
1566 self.advance();
1568 FrameUnit::Range
1569 }
1570 _ => return Ok(None), };
1572
1573 let (start, end) = if let Token::Between = &self.current_token {
1575 self.advance(); let start = self.parse_frame_bound()?;
1578
1579 if !matches!(&self.current_token, Token::And) {
1581 return Err("Expected AND after window frame start bound".to_string());
1582 }
1583 self.advance();
1584
1585 let end = self.parse_frame_bound()?;
1587 (start, Some(end))
1588 } else {
1589 let bound = self.parse_frame_bound()?;
1591 (bound, None)
1592 };
1593
1594 Ok(Some(WindowFrame { unit, start, end }))
1595 }
1596
1597 fn parse_frame_bound(&mut self) -> Result<FrameBound, String> {
1598 match &self.current_token {
1599 Token::Unbounded => {
1600 self.advance();
1601 match &self.current_token {
1602 Token::Preceding => {
1603 self.advance();
1604 Ok(FrameBound::UnboundedPreceding)
1605 }
1606 Token::Following => {
1607 self.advance();
1608 Ok(FrameBound::UnboundedFollowing)
1609 }
1610 _ => Err("Expected PRECEDING or FOLLOWING after UNBOUNDED".to_string()),
1611 }
1612 }
1613 Token::Current => {
1614 self.advance();
1615 if matches!(&self.current_token, Token::Row) {
1616 self.advance();
1617 return Ok(FrameBound::CurrentRow);
1618 }
1619 Err("Expected ROW after CURRENT".to_string())
1620 }
1621 Token::NumberLiteral(num) => {
1622 let n: i64 = num
1623 .parse()
1624 .map_err(|_| "Invalid number in window frame".to_string())?;
1625 self.advance();
1626 match &self.current_token {
1627 Token::Preceding => {
1628 self.advance();
1629 Ok(FrameBound::Preceding(n))
1630 }
1631 Token::Following => {
1632 self.advance();
1633 Ok(FrameBound::Following(n))
1634 }
1635 _ => Err("Expected PRECEDING or FOLLOWING after number".to_string()),
1636 }
1637 }
1638 _ => Err("Invalid window frame bound".to_string()),
1639 }
1640 }
1641
1642 fn parse_where_clause(&mut self) -> Result<WhereClause, String> {
1643 let expr = self.parse_expression()?;
1646
1647 if matches!(self.current_token, Token::RightParen) && self.paren_depth <= 0 {
1649 return Err(
1650 "Unexpected closing parenthesis - no matching opening parenthesis".to_string(),
1651 );
1652 }
1653
1654 let conditions = vec![Condition {
1656 expr,
1657 connector: None,
1658 }];
1659
1660 Ok(WhereClause { conditions })
1661 }
1662
1663 fn parse_expression(&mut self) -> Result<SqlExpression, String> {
1664 self.trace_enter("parse_expression");
1665 let mut left = self.parse_logical_or()?;
1668
1669 left = parse_in_operator(self, left)?;
1672
1673 let result = Ok(left);
1674 self.trace_exit("parse_expression", &result);
1675 result
1676 }
1677
1678 fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
1679 parse_comparison_expr(self)
1681 }
1682
1683 fn parse_additive(&mut self) -> Result<SqlExpression, String> {
1684 parse_additive_expr(self)
1686 }
1687
1688 fn parse_multiplicative(&mut self) -> Result<SqlExpression, String> {
1689 parse_multiplicative_expr(self)
1691 }
1692
1693 fn parse_logical_or(&mut self) -> Result<SqlExpression, String> {
1694 parse_logical_or_expr(self)
1696 }
1697
1698 fn parse_logical_and(&mut self) -> Result<SqlExpression, String> {
1699 parse_logical_and_expr(self)
1701 }
1702
1703 fn parse_case_expression(&mut self) -> Result<SqlExpression, String> {
1704 parse_case_expr(self)
1706 }
1707
1708 fn parse_primary(&mut self) -> Result<SqlExpression, String> {
1709 let columns = self.columns.clone();
1712 let in_method_args = self.in_method_args;
1713 let ctx = PrimaryExpressionContext {
1714 columns: &columns,
1715 in_method_args,
1716 };
1717 parse_primary_expr(self, &ctx)
1718 }
1719
1720 fn parse_method_args(&mut self) -> Result<Vec<SqlExpression>, String> {
1722 self.in_method_args = true;
1724
1725 let args = self.parse_argument_list()?;
1726
1727 self.in_method_args = false;
1729
1730 Ok(args)
1731 }
1732
1733 fn parse_function_args(&mut self) -> Result<(Vec<SqlExpression>, bool), String> {
1734 let mut args = Vec::new();
1735 let mut has_distinct = false;
1736
1737 if !matches!(self.current_token, Token::RightParen) {
1738 if matches!(self.current_token, Token::Distinct) {
1740 self.advance(); has_distinct = true;
1742 }
1743
1744 args.push(self.parse_logical_or()?);
1747
1748 while matches!(self.current_token, Token::Comma) {
1750 self.advance();
1751 args.push(self.parse_logical_or()?);
1752 }
1753 }
1754
1755 Ok((args, has_distinct))
1756 }
1757
1758 fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
1759 let mut expressions = Vec::new();
1760
1761 loop {
1762 expressions.push(self.parse_expression()?);
1763
1764 if matches!(self.current_token, Token::Comma) {
1765 self.advance();
1766 } else {
1767 break;
1768 }
1769 }
1770
1771 Ok(expressions)
1772 }
1773
1774 #[must_use]
1775 pub fn get_position(&self) -> usize {
1776 self.lexer.get_position()
1777 }
1778
1779 fn is_join_token(&self) -> bool {
1781 matches!(
1782 self.current_token,
1783 Token::Join | Token::Inner | Token::Left | Token::Right | Token::Full | Token::Cross
1784 )
1785 }
1786
1787 fn parse_join_clause(&mut self) -> Result<JoinClause, String> {
1789 let join_type = match &self.current_token {
1791 Token::Join => {
1792 self.advance();
1793 JoinType::Inner }
1795 Token::Inner => {
1796 self.advance();
1797 if !matches!(self.current_token, Token::Join) {
1798 return Err("Expected JOIN after INNER".to_string());
1799 }
1800 self.advance();
1801 JoinType::Inner
1802 }
1803 Token::Left => {
1804 self.advance();
1805 if matches!(self.current_token, Token::Outer) {
1807 self.advance();
1808 }
1809 if !matches!(self.current_token, Token::Join) {
1810 return Err("Expected JOIN after LEFT".to_string());
1811 }
1812 self.advance();
1813 JoinType::Left
1814 }
1815 Token::Right => {
1816 self.advance();
1817 if matches!(self.current_token, Token::Outer) {
1819 self.advance();
1820 }
1821 if !matches!(self.current_token, Token::Join) {
1822 return Err("Expected JOIN after RIGHT".to_string());
1823 }
1824 self.advance();
1825 JoinType::Right
1826 }
1827 Token::Full => {
1828 self.advance();
1829 if matches!(self.current_token, Token::Outer) {
1831 self.advance();
1832 }
1833 if !matches!(self.current_token, Token::Join) {
1834 return Err("Expected JOIN after FULL".to_string());
1835 }
1836 self.advance();
1837 JoinType::Full
1838 }
1839 Token::Cross => {
1840 self.advance();
1841 if !matches!(self.current_token, Token::Join) {
1842 return Err("Expected JOIN after CROSS".to_string());
1843 }
1844 self.advance();
1845 JoinType::Cross
1846 }
1847 _ => return Err("Expected JOIN keyword".to_string()),
1848 };
1849
1850 let (table, alias) = self.parse_join_table_source()?;
1852
1853 let condition = if join_type == JoinType::Cross {
1855 JoinCondition { conditions: vec![] }
1857 } else {
1858 if !matches!(self.current_token, Token::On) {
1859 return Err("Expected ON keyword after JOIN table".to_string());
1860 }
1861 self.advance();
1862 self.parse_join_condition()?
1863 };
1864
1865 Ok(JoinClause {
1866 join_type,
1867 table,
1868 alias,
1869 condition,
1870 })
1871 }
1872
1873 fn parse_join_table_source(&mut self) -> Result<(TableSource, Option<String>), String> {
1874 let table = match &self.current_token {
1875 Token::Identifier(name) => {
1876 let table_name = name.clone();
1877 self.advance();
1878 TableSource::Table(table_name)
1879 }
1880 Token::LeftParen => {
1881 self.advance();
1883 let subquery = self.parse_select_statement_inner()?;
1884 if !matches!(self.current_token, Token::RightParen) {
1885 return Err("Expected ')' after subquery".to_string());
1886 }
1887 self.advance();
1888
1889 let alias = match &self.current_token {
1891 Token::Identifier(alias_name) => {
1892 let alias = alias_name.clone();
1893 self.advance();
1894 alias
1895 }
1896 Token::As => {
1897 self.advance();
1898 match &self.current_token {
1899 Token::Identifier(alias_name) => {
1900 let alias = alias_name.clone();
1901 self.advance();
1902 alias
1903 }
1904 _ => return Err("Expected alias after AS keyword".to_string()),
1905 }
1906 }
1907 _ => return Err("Subqueries must have an alias".to_string()),
1908 };
1909
1910 return Ok((
1911 TableSource::DerivedTable {
1912 query: Box::new(subquery),
1913 alias: alias.clone(),
1914 },
1915 Some(alias),
1916 ));
1917 }
1918 _ => return Err("Expected table name or subquery in JOIN clause".to_string()),
1919 };
1920
1921 let alias = match &self.current_token {
1923 Token::Identifier(alias_name) => {
1924 let alias = alias_name.clone();
1925 self.advance();
1926 Some(alias)
1927 }
1928 Token::As => {
1929 self.advance();
1930 match &self.current_token {
1931 Token::Identifier(alias_name) => {
1932 let alias = alias_name.clone();
1933 self.advance();
1934 Some(alias)
1935 }
1936 _ => return Err("Expected alias after AS keyword".to_string()),
1937 }
1938 }
1939 _ => None,
1940 };
1941
1942 Ok((table, alias))
1943 }
1944
1945 fn parse_join_condition(&mut self) -> Result<JoinCondition, String> {
1946 let mut conditions = Vec::new();
1947
1948 conditions.push(self.parse_single_join_condition()?);
1950
1951 while matches!(self.current_token, Token::And) {
1953 self.advance(); conditions.push(self.parse_single_join_condition()?);
1955 }
1956
1957 Ok(JoinCondition { conditions })
1958 }
1959
1960 fn parse_single_join_condition(&mut self) -> Result<SingleJoinCondition, String> {
1961 let left_expr = self.parse_additive()?;
1964
1965 let operator = match &self.current_token {
1967 Token::Equal => JoinOperator::Equal,
1968 Token::NotEqual => JoinOperator::NotEqual,
1969 Token::LessThan => JoinOperator::LessThan,
1970 Token::LessThanOrEqual => JoinOperator::LessThanOrEqual,
1971 Token::GreaterThan => JoinOperator::GreaterThan,
1972 Token::GreaterThanOrEqual => JoinOperator::GreaterThanOrEqual,
1973 _ => return Err("Expected comparison operator in JOIN condition".to_string()),
1974 };
1975 self.advance();
1976
1977 let right_expr = self.parse_additive()?;
1979
1980 Ok(SingleJoinCondition {
1981 left_expr,
1982 operator,
1983 right_expr,
1984 })
1985 }
1986
1987 fn parse_column_reference(&mut self) -> Result<String, String> {
1988 match &self.current_token {
1989 Token::Identifier(name) => {
1990 let mut column_ref = name.clone();
1991 self.advance();
1992
1993 if matches!(self.current_token, Token::Dot) {
1995 self.advance();
1996 match &self.current_token {
1997 Token::Identifier(col_name) => {
1998 column_ref.push('.');
1999 column_ref.push_str(col_name);
2000 self.advance();
2001 }
2002 _ => return Err("Expected column name after '.'".to_string()),
2003 }
2004 }
2005
2006 Ok(column_ref)
2007 }
2008 _ => Err("Expected column reference".to_string()),
2009 }
2010 }
2011
2012 fn parse_pivot_clause(&mut self, source: TableSource) -> Result<TableSource, String> {
2017 self.consume(Token::Pivot)?;
2019
2020 self.consume(Token::LeftParen)?;
2022
2023 let aggregate = self.parse_pivot_aggregate()?;
2025
2026 self.consume(Token::For)?;
2028
2029 let pivot_column = match &self.current_token {
2031 Token::Identifier(col) => {
2032 let column = col.clone();
2033 self.advance();
2034 column
2035 }
2036 Token::QuotedIdentifier(col) => {
2037 let column = col.clone();
2038 self.advance();
2039 column
2040 }
2041 _ => return Err("Expected column name after FOR in PIVOT".to_string()),
2042 };
2043
2044 if !matches!(self.current_token, Token::In) {
2046 return Err("Expected IN keyword in PIVOT clause".to_string());
2047 }
2048 self.advance();
2049
2050 let pivot_values = self.parse_pivot_in_clause()?;
2052
2053 self.consume(Token::RightParen)?;
2055
2056 let alias = self.parse_optional_alias()?;
2058
2059 Ok(TableSource::Pivot {
2060 source: Box::new(source),
2061 aggregate,
2062 pivot_column,
2063 pivot_values,
2064 alias,
2065 })
2066 }
2067
2068 fn parse_pivot_aggregate(&mut self) -> Result<PivotAggregate, String> {
2071 let function = match &self.current_token {
2073 Token::Identifier(name) => {
2074 let func_name = name.to_uppercase();
2075 match func_name.as_str() {
2077 "MAX" | "MIN" | "SUM" | "AVG" | "COUNT" => {
2078 self.advance();
2079 func_name
2080 }
2081 _ => {
2082 return Err(format!(
2083 "Expected aggregate function (MAX, MIN, SUM, AVG, COUNT), got {}",
2084 func_name
2085 ))
2086 }
2087 }
2088 }
2089 _ => return Err("Expected aggregate function in PIVOT".to_string()),
2090 };
2091
2092 self.consume(Token::LeftParen)?;
2094
2095 let column = match &self.current_token {
2097 Token::Identifier(col) => {
2098 let column = col.clone();
2099 self.advance();
2100 column
2101 }
2102 Token::QuotedIdentifier(col) => {
2103 let column = col.clone();
2104 self.advance();
2105 column
2106 }
2107 Token::Star => {
2108 if function == "COUNT" {
2110 self.advance();
2111 "*".to_string()
2112 } else {
2113 return Err(format!("Only COUNT can use *, not {}", function));
2114 }
2115 }
2116 _ => return Err("Expected column name in aggregate function".to_string()),
2117 };
2118
2119 self.consume(Token::RightParen)?;
2121
2122 Ok(PivotAggregate { function, column })
2123 }
2124
2125 fn parse_pivot_in_clause(&mut self) -> Result<Vec<String>, String> {
2129 self.consume(Token::LeftParen)?;
2131
2132 let mut values = Vec::new();
2133
2134 match &self.current_token {
2136 Token::StringLiteral(val) => {
2137 values.push(val.clone());
2138 self.advance();
2139 }
2140 Token::Identifier(val) => {
2141 values.push(val.clone());
2143 self.advance();
2144 }
2145 Token::NumberLiteral(val) => {
2146 values.push(val.clone());
2148 self.advance();
2149 }
2150 _ => return Err("Expected value in PIVOT IN clause".to_string()),
2151 }
2152
2153 while matches!(self.current_token, Token::Comma) {
2155 self.advance(); match &self.current_token {
2158 Token::StringLiteral(val) => {
2159 values.push(val.clone());
2160 self.advance();
2161 }
2162 Token::Identifier(val) => {
2163 values.push(val.clone());
2164 self.advance();
2165 }
2166 Token::NumberLiteral(val) => {
2167 values.push(val.clone());
2168 self.advance();
2169 }
2170 _ => return Err("Expected value after comma in PIVOT IN clause".to_string()),
2171 }
2172 }
2173
2174 self.consume(Token::RightParen)?;
2176
2177 if values.is_empty() {
2178 return Err("PIVOT IN clause must have at least one value".to_string());
2179 }
2180
2181 Ok(values)
2182 }
2183}
2184
2185#[derive(Debug, Clone)]
2187pub enum CursorContext {
2188 SelectClause,
2189 FromClause,
2190 WhereClause,
2191 OrderByClause,
2192 AfterColumn(String),
2193 AfterLogicalOp(LogicalOp),
2194 AfterComparisonOp(String, String), InMethodCall(String, String), InExpression,
2197 Unknown,
2198}
2199
2200fn safe_slice_to(s: &str, pos: usize) -> &str {
2202 if pos >= s.len() {
2203 return s;
2204 }
2205
2206 let mut safe_pos = pos;
2208 while safe_pos > 0 && !s.is_char_boundary(safe_pos) {
2209 safe_pos -= 1;
2210 }
2211
2212 &s[..safe_pos]
2213}
2214
2215fn safe_slice_from(s: &str, pos: usize) -> &str {
2217 if pos >= s.len() {
2218 return "";
2219 }
2220
2221 let mut safe_pos = pos;
2223 while safe_pos < s.len() && !s.is_char_boundary(safe_pos) {
2224 safe_pos += 1;
2225 }
2226
2227 &s[safe_pos..]
2228}
2229
2230#[must_use]
2231pub fn detect_cursor_context(query: &str, cursor_pos: usize) -> (CursorContext, Option<String>) {
2232 let truncated = safe_slice_to(query, cursor_pos);
2233 let mut parser = Parser::new(truncated);
2234
2235 if let Ok(stmt) = parser.parse() {
2237 let (ctx, partial) = analyze_statement(&stmt, truncated, cursor_pos);
2238 #[cfg(test)]
2239 println!("analyze_statement returned: {ctx:?}, {partial:?} for query: '{truncated}'");
2240 (ctx, partial)
2241 } else {
2242 let (ctx, partial) = analyze_partial(truncated, cursor_pos);
2244 #[cfg(test)]
2245 println!("analyze_partial returned: {ctx:?}, {partial:?} for query: '{truncated}'");
2246 (ctx, partial)
2247 }
2248}
2249
2250#[must_use]
2251pub fn tokenize_query(query: &str) -> Vec<String> {
2252 let mut lexer = Lexer::new(query);
2253 let tokens = lexer.tokenize_all();
2254 tokens.iter().map(|t| format!("{t:?}")).collect()
2255}
2256
2257#[must_use]
2258fn find_quote_start(bytes: &[u8], mut pos: usize) -> Option<usize> {
2260 if pos > 0 {
2262 pos -= 1;
2263 while pos > 0 {
2264 if bytes[pos] == b'"' {
2265 if pos == 0 || bytes[pos - 1] != b'\\' {
2267 return Some(pos);
2268 }
2269 }
2270 pos -= 1;
2271 }
2272 if bytes[0] == b'"' {
2274 return Some(0);
2275 }
2276 }
2277 None
2278}
2279
2280fn handle_method_call_context(col_name: &str, after_dot: &str) -> (CursorContext, Option<String>) {
2282 let partial_method = if after_dot.is_empty() {
2284 None
2285 } else if after_dot.chars().all(|c| c.is_alphanumeric() || c == '_') {
2286 Some(after_dot.to_string())
2287 } else {
2288 None
2289 };
2290
2291 let col_name_for_context =
2293 if col_name.starts_with('"') && col_name.ends_with('"') && col_name.len() > 2 {
2294 col_name[1..col_name.len() - 1].to_string()
2295 } else {
2296 col_name.to_string()
2297 };
2298
2299 (
2300 CursorContext::AfterColumn(col_name_for_context),
2301 partial_method,
2302 )
2303}
2304
2305fn check_after_comparison_operator(query: &str) -> Option<(CursorContext, Option<String>)> {
2307 for op in &Parser::COMPARISON_OPERATORS {
2308 if let Some(op_pos) = query.rfind(op) {
2309 let before_op = safe_slice_to(query, op_pos);
2310 let after_op_start = op_pos + op.len();
2311 let after_op = if after_op_start < query.len() {
2312 &query[after_op_start..]
2313 } else {
2314 ""
2315 };
2316
2317 if let Some(col_name) = before_op.split_whitespace().last() {
2319 if col_name.chars().all(|c| c.is_alphanumeric() || c == '_') {
2320 let after_op_trimmed = after_op.trim();
2322 if after_op_trimmed.is_empty()
2323 || (after_op_trimmed
2324 .chars()
2325 .all(|c| c.is_alphanumeric() || c == '_')
2326 && !after_op_trimmed.contains('('))
2327 {
2328 let partial = if after_op_trimmed.is_empty() {
2329 None
2330 } else {
2331 Some(after_op_trimmed.to_string())
2332 };
2333 return Some((
2334 CursorContext::AfterComparisonOp(
2335 col_name.to_string(),
2336 op.trim().to_string(),
2337 ),
2338 partial,
2339 ));
2340 }
2341 }
2342 }
2343 }
2344 }
2345 None
2346}
2347
2348fn analyze_statement(
2349 stmt: &SelectStatement,
2350 query: &str,
2351 _cursor_pos: usize,
2352) -> (CursorContext, Option<String>) {
2353 let trimmed = query.trim();
2355
2356 if let Some(result) = check_after_comparison_operator(query) {
2358 return result;
2359 }
2360
2361 let ends_with_logical_op = |s: &str| -> bool {
2364 let s_upper = s.to_uppercase();
2365 s_upper.ends_with(" AND") || s_upper.ends_with(" OR")
2366 };
2367
2368 if ends_with_logical_op(trimmed) {
2369 } else {
2371 if let Some(dot_pos) = trimmed.rfind('.') {
2373 let before_dot = safe_slice_to(trimmed, dot_pos);
2375 let after_dot_start = dot_pos + 1;
2376 let after_dot = if after_dot_start < trimmed.len() {
2377 &trimmed[after_dot_start..]
2378 } else {
2379 ""
2380 };
2381
2382 if !after_dot.contains('(') {
2385 let col_name = if before_dot.ends_with('"') {
2387 let bytes = before_dot.as_bytes();
2389 let pos = before_dot.len() - 1; find_quote_start(bytes, pos).map(|start| safe_slice_from(before_dot, start))
2392 } else {
2393 before_dot
2396 .split_whitespace()
2397 .last()
2398 .map(|word| word.trim_start_matches('('))
2399 };
2400
2401 if let Some(col_name) = col_name {
2402 let is_valid = Parser::is_valid_identifier(col_name);
2404
2405 if is_valid {
2406 return handle_method_call_context(col_name, after_dot);
2407 }
2408 }
2409 }
2410 }
2411 }
2412
2413 if let Some(where_clause) = &stmt.where_clause {
2415 let trimmed_upper = trimmed.to_uppercase();
2417 if trimmed_upper.ends_with(" AND") || trimmed_upper.ends_with(" OR") {
2418 let op = if trimmed_upper.ends_with(" AND") {
2419 LogicalOp::And
2420 } else {
2421 LogicalOp::Or
2422 };
2423 return (CursorContext::AfterLogicalOp(op), None);
2424 }
2425
2426 let query_upper = query.to_uppercase();
2428 if let Some(and_pos) = query_upper.rfind(" AND ") {
2429 let after_and = safe_slice_from(query, and_pos + 5);
2430 let partial = extract_partial_at_end(after_and);
2431 if partial.is_some() {
2432 return (CursorContext::AfterLogicalOp(LogicalOp::And), partial);
2433 }
2434 }
2435
2436 if let Some(or_pos) = query_upper.rfind(" OR ") {
2437 let after_or = safe_slice_from(query, or_pos + 4);
2438 let partial = extract_partial_at_end(after_or);
2439 if partial.is_some() {
2440 return (CursorContext::AfterLogicalOp(LogicalOp::Or), partial);
2441 }
2442 }
2443
2444 if let Some(last_condition) = where_clause.conditions.last() {
2445 if let Some(connector) = &last_condition.connector {
2446 return (
2448 CursorContext::AfterLogicalOp(connector.clone()),
2449 extract_partial_at_end(query),
2450 );
2451 }
2452 }
2453 return (CursorContext::WhereClause, extract_partial_at_end(query));
2455 }
2456
2457 let query_upper = query.to_uppercase();
2459 if query_upper.ends_with(" ORDER BY") {
2460 return (CursorContext::OrderByClause, None);
2461 }
2462
2463 if stmt.order_by.is_some() {
2465 return (CursorContext::OrderByClause, extract_partial_at_end(query));
2466 }
2467
2468 if stmt.from_table.is_some() && stmt.where_clause.is_none() && stmt.order_by.is_none() {
2469 return (CursorContext::FromClause, extract_partial_at_end(query));
2470 }
2471
2472 if !stmt.columns.is_empty() && stmt.from_table.is_none() {
2473 return (CursorContext::SelectClause, extract_partial_at_end(query));
2474 }
2475
2476 (CursorContext::Unknown, None)
2477}
2478
2479fn find_last_token(tokens: &[(usize, usize, Token)], target: &Token) -> Option<usize> {
2481 tokens
2482 .iter()
2483 .rposition(|(_, _, t)| t == target)
2484 .map(|idx| tokens[idx].0)
2485}
2486
2487fn find_last_matching_token<F>(
2489 tokens: &[(usize, usize, Token)],
2490 predicate: F,
2491) -> Option<(usize, &Token)>
2492where
2493 F: Fn(&Token) -> bool,
2494{
2495 tokens
2496 .iter()
2497 .rposition(|(_, _, t)| predicate(t))
2498 .map(|idx| (tokens[idx].0, &tokens[idx].2))
2499}
2500
2501fn is_in_clause(
2503 tokens: &[(usize, usize, Token)],
2504 clause_token: Token,
2505 exclude_tokens: &[Token],
2506) -> bool {
2507 if let Some(clause_pos) = find_last_token(tokens, &clause_token) {
2509 for (pos, _, token) in tokens.iter() {
2511 if *pos > clause_pos && exclude_tokens.contains(token) {
2512 return false;
2513 }
2514 }
2515 return true;
2516 }
2517 false
2518}
2519
2520fn analyze_partial(query: &str, cursor_pos: usize) -> (CursorContext, Option<String>) {
2521 let mut lexer = Lexer::new(query);
2523 let tokens = lexer.tokenize_all_with_positions();
2524
2525 let trimmed = query.trim();
2526
2527 #[cfg(test)]
2528 {
2529 if trimmed.contains("\"Last Name\"") {
2530 eprintln!("DEBUG analyze_partial: query='{query}', trimmed='{trimmed}'");
2531 }
2532 }
2533
2534 if let Some(result) = check_after_comparison_operator(query) {
2536 return result;
2537 }
2538
2539 if let Some(dot_pos) = trimmed.rfind('.') {
2542 #[cfg(test)]
2543 {
2544 if trimmed.contains("\"Last Name\"") {
2545 eprintln!("DEBUG: Found dot at position {dot_pos}");
2546 }
2547 }
2548 let before_dot = &trimmed[..dot_pos];
2550 let after_dot = &trimmed[dot_pos + 1..];
2551
2552 if !after_dot.contains('(') {
2555 let col_name = if before_dot.ends_with('"') {
2558 let bytes = before_dot.as_bytes();
2560 let pos = before_dot.len() - 1; #[cfg(test)]
2563 {
2564 if trimmed.contains("\"Last Name\"") {
2565 eprintln!("DEBUG: before_dot='{before_dot}', looking for opening quote");
2566 }
2567 }
2568
2569 let found_start = find_quote_start(bytes, pos);
2570
2571 if let Some(start) = found_start {
2572 let result = safe_slice_from(before_dot, start);
2574 #[cfg(test)]
2575 {
2576 if trimmed.contains("\"Last Name\"") {
2577 eprintln!("DEBUG: Extracted quoted identifier: '{result}'");
2578 }
2579 }
2580 Some(result)
2581 } else {
2582 #[cfg(test)]
2583 {
2584 if trimmed.contains("\"Last Name\"") {
2585 eprintln!("DEBUG: No opening quote found!");
2586 }
2587 }
2588 None
2589 }
2590 } else {
2591 before_dot
2594 .split_whitespace()
2595 .last()
2596 .map(|word| word.trim_start_matches('('))
2597 };
2598
2599 if let Some(col_name) = col_name {
2600 #[cfg(test)]
2601 {
2602 if trimmed.contains("\"Last Name\"") {
2603 eprintln!("DEBUG: col_name = '{col_name}'");
2604 }
2605 }
2606
2607 let is_valid = Parser::is_valid_identifier(col_name);
2609
2610 #[cfg(test)]
2611 {
2612 if trimmed.contains("\"Last Name\"") {
2613 eprintln!("DEBUG: is_valid = {is_valid}");
2614 }
2615 }
2616
2617 if is_valid {
2618 return handle_method_call_context(col_name, after_dot);
2619 }
2620 }
2621 }
2622 }
2623
2624 if let Some((pos, token)) =
2626 find_last_matching_token(&tokens, |t| matches!(t, Token::And | Token::Or))
2627 {
2628 let token_end_pos = if matches!(token, Token::And) {
2630 pos + 3 } else {
2632 pos + 2 };
2634
2635 if cursor_pos > token_end_pos {
2636 let after_op = safe_slice_from(query, token_end_pos + 1); let partial = extract_partial_at_end(after_op);
2639 let op = if matches!(token, Token::And) {
2640 LogicalOp::And
2641 } else {
2642 LogicalOp::Or
2643 };
2644 return (CursorContext::AfterLogicalOp(op), partial);
2645 }
2646 }
2647
2648 if let Some((_, _, last_token)) = tokens.last() {
2650 if matches!(last_token, Token::And | Token::Or) {
2651 let op = if matches!(last_token, Token::And) {
2652 LogicalOp::And
2653 } else {
2654 LogicalOp::Or
2655 };
2656 return (CursorContext::AfterLogicalOp(op), None);
2657 }
2658 }
2659
2660 if let Some(order_pos) = find_last_token(&tokens, &Token::OrderBy) {
2662 let has_by = tokens
2664 .iter()
2665 .any(|(pos, _, t)| *pos > order_pos && matches!(t, Token::By));
2666 if has_by
2667 || tokens
2668 .last()
2669 .map_or(false, |(_, _, t)| matches!(t, Token::OrderBy))
2670 {
2671 return (CursorContext::OrderByClause, extract_partial_at_end(query));
2672 }
2673 }
2674
2675 if is_in_clause(&tokens, Token::Where, &[Token::OrderBy, Token::GroupBy]) {
2677 return (CursorContext::WhereClause, extract_partial_at_end(query));
2678 }
2679
2680 if is_in_clause(
2682 &tokens,
2683 Token::From,
2684 &[Token::Where, Token::OrderBy, Token::GroupBy],
2685 ) {
2686 return (CursorContext::FromClause, extract_partial_at_end(query));
2687 }
2688
2689 if find_last_token(&tokens, &Token::Select).is_some()
2691 && find_last_token(&tokens, &Token::From).is_none()
2692 {
2693 return (CursorContext::SelectClause, extract_partial_at_end(query));
2694 }
2695
2696 (CursorContext::Unknown, None)
2697}
2698
2699fn extract_partial_at_end(query: &str) -> Option<String> {
2700 let trimmed = query.trim();
2701
2702 if let Some(last_word) = trimmed.split_whitespace().last() {
2704 if last_word.starts_with('"') && !last_word.ends_with('"') {
2705 return Some(last_word.to_string());
2707 }
2708 }
2709
2710 let last_word = trimmed.split_whitespace().last()?;
2712
2713 if last_word.chars().all(|c| c.is_alphanumeric() || c == '_') {
2716 if !is_sql_keyword(last_word) {
2718 Some(last_word.to_string())
2719 } else {
2720 None
2721 }
2722 } else {
2723 None
2724 }
2725}
2726
2727impl ParsePrimary for Parser {
2729 fn current_token(&self) -> &Token {
2730 &self.current_token
2731 }
2732
2733 fn advance(&mut self) {
2734 self.advance();
2735 }
2736
2737 fn consume(&mut self, expected: Token) -> Result<(), String> {
2738 self.consume(expected)
2739 }
2740
2741 fn parse_case_expression(&mut self) -> Result<SqlExpression, String> {
2742 self.parse_case_expression()
2743 }
2744
2745 fn parse_function_args(&mut self) -> Result<(Vec<SqlExpression>, bool), String> {
2746 self.parse_function_args()
2747 }
2748
2749 fn parse_window_spec(&mut self) -> Result<WindowSpec, String> {
2750 self.parse_window_spec()
2751 }
2752
2753 fn parse_logical_or(&mut self) -> Result<SqlExpression, String> {
2754 self.parse_logical_or()
2755 }
2756
2757 fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
2758 self.parse_comparison()
2759 }
2760
2761 fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2762 self.parse_expression_list()
2763 }
2764
2765 fn parse_subquery(&mut self) -> Result<SelectStatement, String> {
2766 if matches!(self.current_token, Token::With) {
2768 self.parse_with_clause_inner()
2769 } else {
2770 self.parse_select_statement_inner()
2771 }
2772 }
2773}
2774
2775impl ExpressionParser for Parser {
2777 fn current_token(&self) -> &Token {
2778 &self.current_token
2779 }
2780
2781 fn advance(&mut self) {
2782 match &self.current_token {
2784 Token::LeftParen => self.paren_depth += 1,
2785 Token::RightParen => {
2786 self.paren_depth -= 1;
2787 }
2788 _ => {}
2789 }
2790 self.current_token = self.lexer.next_token();
2791 }
2792
2793 fn peek(&self) -> Option<&Token> {
2794 None }
2801
2802 fn is_at_end(&self) -> bool {
2803 matches!(self.current_token, Token::Eof)
2804 }
2805
2806 fn consume(&mut self, expected: Token) -> Result<(), String> {
2807 if std::mem::discriminant(&self.current_token) == std::mem::discriminant(&expected) {
2809 self.update_paren_depth(&expected)?;
2810 self.current_token = self.lexer.next_token();
2811 Ok(())
2812 } else {
2813 Err(format!(
2814 "Expected {:?}, found {:?}",
2815 expected, self.current_token
2816 ))
2817 }
2818 }
2819
2820 fn parse_identifier(&mut self) -> Result<String, String> {
2821 if let Token::Identifier(id) = &self.current_token {
2822 let id = id.clone();
2823 self.advance();
2824 Ok(id)
2825 } else {
2826 Err(format!(
2827 "Expected identifier, found {:?}",
2828 self.current_token
2829 ))
2830 }
2831 }
2832}
2833
2834impl ParseArithmetic for Parser {
2836 fn current_token(&self) -> &Token {
2837 &self.current_token
2838 }
2839
2840 fn advance(&mut self) {
2841 self.advance();
2842 }
2843
2844 fn consume(&mut self, expected: Token) -> Result<(), String> {
2845 self.consume(expected)
2846 }
2847
2848 fn parse_primary(&mut self) -> Result<SqlExpression, String> {
2849 self.parse_primary()
2850 }
2851
2852 fn parse_multiplicative(&mut self) -> Result<SqlExpression, String> {
2853 self.parse_multiplicative()
2854 }
2855
2856 fn parse_method_args(&mut self) -> Result<Vec<SqlExpression>, String> {
2857 self.parse_method_args()
2858 }
2859}
2860
2861impl ParseComparison for Parser {
2863 fn current_token(&self) -> &Token {
2864 &self.current_token
2865 }
2866
2867 fn advance(&mut self) {
2868 self.advance();
2869 }
2870
2871 fn consume(&mut self, expected: Token) -> Result<(), String> {
2872 self.consume(expected)
2873 }
2874
2875 fn parse_primary(&mut self) -> Result<SqlExpression, String> {
2876 self.parse_primary()
2877 }
2878
2879 fn parse_additive(&mut self) -> Result<SqlExpression, String> {
2880 self.parse_additive()
2881 }
2882
2883 fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2884 self.parse_expression_list()
2885 }
2886
2887 fn parse_subquery(&mut self) -> Result<SelectStatement, String> {
2888 if matches!(self.current_token, Token::With) {
2890 self.parse_with_clause_inner()
2891 } else {
2892 self.parse_select_statement_inner()
2893 }
2894 }
2895}
2896
2897impl ParseLogical for Parser {
2899 fn current_token(&self) -> &Token {
2900 &self.current_token
2901 }
2902
2903 fn advance(&mut self) {
2904 self.advance();
2905 }
2906
2907 fn consume(&mut self, expected: Token) -> Result<(), String> {
2908 self.consume(expected)
2909 }
2910
2911 fn parse_logical_and(&mut self) -> Result<SqlExpression, String> {
2912 self.parse_logical_and()
2913 }
2914
2915 fn parse_base_logical_expression(&mut self) -> Result<SqlExpression, String> {
2916 self.parse_comparison()
2919 }
2920
2921 fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
2922 self.parse_comparison()
2923 }
2924
2925 fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
2926 self.parse_expression_list()
2927 }
2928}
2929
2930impl ParseCase for Parser {
2932 fn current_token(&self) -> &Token {
2933 &self.current_token
2934 }
2935
2936 fn advance(&mut self) {
2937 self.advance();
2938 }
2939
2940 fn consume(&mut self, expected: Token) -> Result<(), String> {
2941 self.consume(expected)
2942 }
2943
2944 fn parse_expression(&mut self) -> Result<SqlExpression, String> {
2945 self.parse_expression()
2946 }
2947}
2948
2949fn is_sql_keyword(word: &str) -> bool {
2950 let mut lexer = Lexer::new(word);
2952 let token = lexer.next_token();
2953
2954 !matches!(token, Token::Identifier(_) | Token::Eof)
2956}
2957
2958#[cfg(test)]
2959mod tests {
2960 use super::*;
2961
2962 #[test]
2964 fn test_parser_mode_default_is_standard() {
2965 let sql = "-- Leading comment\nSELECT * FROM users";
2966 let mut parser = Parser::new(sql);
2967 let stmt = parser.parse().unwrap();
2968
2969 assert!(stmt.leading_comments.is_empty());
2971 assert!(stmt.trailing_comment.is_none());
2972 }
2973
2974 #[test]
2976 fn test_parser_mode_preserve_leading_comments() {
2977 let sql = "-- Important query\n-- Author: Alice\nSELECT id, name FROM users";
2978 let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2979 let stmt = parser.parse().unwrap();
2980
2981 assert_eq!(stmt.leading_comments.len(), 2);
2983 assert!(stmt.leading_comments[0].is_line_comment);
2984 assert!(stmt.leading_comments[0].text.contains("Important query"));
2985 assert!(stmt.leading_comments[1].text.contains("Author: Alice"));
2986 }
2987
2988 #[test]
2990 fn test_parser_mode_preserve_trailing_comment() {
2991 let sql = "SELECT * FROM users -- Fetch all users";
2992 let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
2993 let stmt = parser.parse().unwrap();
2994
2995 assert!(stmt.trailing_comment.is_some());
2997 let comment = stmt.trailing_comment.unwrap();
2998 assert!(comment.is_line_comment);
2999 assert!(comment.text.contains("Fetch all users"));
3000 }
3001
3002 #[test]
3004 fn test_parser_mode_preserve_block_comments() {
3005 let sql = "/* Query explanation */\nSELECT * FROM users";
3006 let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
3007 let stmt = parser.parse().unwrap();
3008
3009 assert_eq!(stmt.leading_comments.len(), 1);
3011 assert!(!stmt.leading_comments[0].is_line_comment); assert!(stmt.leading_comments[0].text.contains("Query explanation"));
3013 }
3014
3015 #[test]
3017 fn test_parser_mode_preserve_both_comments() {
3018 let sql = "-- Leading\nSELECT * FROM users -- Trailing";
3019 let mut parser = Parser::with_mode(sql, ParserMode::PreserveComments);
3020 let stmt = parser.parse().unwrap();
3021
3022 assert_eq!(stmt.leading_comments.len(), 1);
3024 assert!(stmt.leading_comments[0].text.contains("Leading"));
3025 assert!(stmt.trailing_comment.is_some());
3026 assert!(stmt.trailing_comment.unwrap().text.contains("Trailing"));
3027 }
3028
3029 #[test]
3031 fn test_parser_mode_standard_ignores_comments() {
3032 let sql = "-- Comment 1\n/* Comment 2 */\nSELECT * FROM users -- Comment 3";
3033 let mut parser = Parser::with_mode(sql, ParserMode::Standard);
3034 let stmt = parser.parse().unwrap();
3035
3036 assert!(stmt.leading_comments.is_empty());
3038 assert!(stmt.trailing_comment.is_none());
3039
3040 assert_eq!(stmt.select_items.len(), 1);
3042 assert_eq!(stmt.from_table, Some("users".to_string()));
3043 }
3044
3045 #[test]
3047 fn test_parser_backward_compatibility() {
3048 let sql = "SELECT id, name FROM users WHERE active = true";
3049
3050 let mut parser1 = Parser::new(sql);
3052 let stmt1 = parser1.parse().unwrap();
3053
3054 let mut parser2 = Parser::with_mode(sql, ParserMode::Standard);
3056 let stmt2 = parser2.parse().unwrap();
3057
3058 assert_eq!(stmt1.select_items.len(), stmt2.select_items.len());
3060 assert_eq!(stmt1.from_table, stmt2.from_table);
3061 assert_eq!(stmt1.where_clause.is_some(), stmt2.where_clause.is_some());
3062 assert!(stmt1.leading_comments.is_empty());
3063 assert!(stmt2.leading_comments.is_empty());
3064 }
3065
3066 #[test]
3068 fn test_pivot_parsing_not_yet_supported() {
3069 let sql = "SELECT * FROM food_eaten PIVOT (MAX(AmountEaten) FOR FoodName IN ('Sammich', 'Pickle', 'Apple'))";
3070 let mut parser = Parser::new(sql);
3071 let result = parser.parse();
3072
3073 assert!(result.is_ok());
3075 let stmt = result.unwrap();
3076
3077 assert!(stmt.from_source.is_some());
3079 if let Some(crate::sql::parser::ast::TableSource::Pivot { .. }) = stmt.from_source {
3080 } else {
3082 panic!("Expected from_source to be a Pivot variant");
3083 }
3084 }
3085
3086 #[test]
3088 fn test_pivot_aggregate_functions() {
3089 let sql = "SELECT * FROM sales PIVOT (SUM(amount) FOR month IN ('Jan', 'Feb', 'Mar'))";
3091 let mut parser = Parser::new(sql);
3092 let result = parser.parse();
3093 assert!(result.is_ok());
3094
3095 let sql2 = "SELECT * FROM sales PIVOT (COUNT(*) FOR month IN ('Jan', 'Feb'))";
3097 let mut parser2 = Parser::new(sql2);
3098 let result2 = parser2.parse();
3099 assert!(result2.is_ok());
3100
3101 let sql3 = "SELECT * FROM sales PIVOT (AVG(price) FOR category IN ('A', 'B'))";
3103 let mut parser3 = Parser::new(sql3);
3104 let result3 = parser3.parse();
3105 assert!(result3.is_ok());
3106 }
3107
3108 #[test]
3110 fn test_pivot_with_subquery() {
3111 let sql = "SELECT * FROM (SELECT * FROM food_eaten WHERE Id > 5) AS t \
3112 PIVOT (MAX(AmountEaten) FOR FoodName IN ('Sammich', 'Pickle'))";
3113 let mut parser = Parser::new(sql);
3114 let result = parser.parse();
3115
3116 assert!(result.is_ok());
3118 let stmt = result.unwrap();
3119 assert!(stmt.from_source.is_some());
3120 }
3121
3122 #[test]
3124 fn test_pivot_with_alias() {
3125 let sql =
3126 "SELECT * FROM sales PIVOT (SUM(amount) FOR month IN ('Jan', 'Feb')) AS pivot_table";
3127 let mut parser = Parser::new(sql);
3128 let result = parser.parse();
3129
3130 assert!(result.is_ok());
3132 let stmt = result.unwrap();
3133 assert!(stmt.from_source.is_some());
3134 }
3135
3136 fn extract_web_spec(
3139 stmt: &crate::sql::parser::ast::SelectStatement,
3140 ) -> &crate::sql::parser::ast::WebCTESpec {
3141 use crate::sql::parser::ast::CTEType;
3142 assert!(!stmt.ctes.is_empty(), "statement should have CTEs");
3143 match &stmt.ctes[0].cte_type {
3144 CTEType::Web(spec) => spec,
3145 other => panic!("expected Web CTE, got {:?}", other),
3146 }
3147 }
3148
3149 #[test]
3150 fn test_web_cte_delimiter_pipe() {
3151 let sql = "WITH WEB foo AS (URL 'file:///tmp/missing.dat' FORMAT CSV DELIMITER '|') \
3152 SELECT * FROM foo";
3153 let mut parser = Parser::new(sql);
3154 let stmt = parser.parse().expect("parse failed");
3155 let spec = extract_web_spec(&stmt);
3156 assert_eq!(spec.delimiter, Some(b'|'));
3157 }
3158
3159 #[test]
3160 fn test_web_cte_delimiter_tab_via_escape() {
3161 let sql = "WITH WEB foo AS (URL 'file:///tmp/missing.dat' FORMAT CSV DELIMITER '\\t') \
3162 SELECT * FROM foo";
3163 let mut parser = Parser::new(sql);
3164 let stmt = parser.parse().expect("parse failed");
3165 let spec = extract_web_spec(&stmt);
3166 assert_eq!(spec.delimiter, Some(b'\t'));
3167 }
3168
3169 #[test]
3170 fn test_web_cte_no_delimiter_defaults_to_none() {
3171 let sql = "WITH WEB foo AS (URL 'file:///tmp/missing.dat' FORMAT CSV) SELECT * FROM foo";
3172 let mut parser = Parser::new(sql);
3173 let stmt = parser.parse().expect("parse failed");
3174 let spec = extract_web_spec(&stmt);
3175 assert!(spec.delimiter.is_none());
3176 }
3177
3178 #[test]
3179 fn test_web_cte_delimiter_rejects_multi_char() {
3180 let sql = "WITH WEB foo AS (URL 'file:///tmp/missing.dat' FORMAT CSV DELIMITER '||') \
3181 SELECT * FROM foo";
3182 let mut parser = Parser::new(sql);
3183 let err = parser.parse().unwrap_err();
3184 let msg = err.to_string();
3185 assert!(
3186 msg.contains("DELIMITER") || msg.contains("single ASCII"),
3187 "should reject multi-char delimiter: {}",
3188 msg
3189 );
3190 }
3191}