1pub use super::parser::ast::{
5 Condition, JoinClause, JoinCondition, JoinOperator, JoinType, LogicalOp, OrderByColumn,
6 SelectItem, SelectStatement, SortDirection, SqlExpression, TableFunction, TableSource,
7 WhenBranch, WhereClause, WindowSpec, CTE,
8};
9pub use super::parser::legacy::{ParseContext, ParseState, Schema, SqlParser, SqlToken, TableInfo};
10pub use super::parser::lexer::{Lexer, Token};
11pub use super::parser::ParserConfig;
12
13pub use super::parser::formatter::{format_ast_tree, format_sql_pretty, format_sql_pretty_compact};
15
16use super::parser::expressions::arithmetic::{
18 parse_additive as parse_additive_expr, parse_multiplicative as parse_multiplicative_expr,
19 ParseArithmetic,
20};
21use super::parser::expressions::case::{parse_case_expression as parse_case_expr, ParseCase};
22use super::parser::expressions::comparison::{
23 parse_comparison as parse_comparison_expr, parse_in_operator, ParseComparison,
24};
25use super::parser::expressions::logical::{
26 parse_logical_and as parse_logical_and_expr, parse_logical_or as parse_logical_or_expr,
27 ParseLogical,
28};
29use super::parser::expressions::primary::{
30 parse_primary as parse_primary_expr, ParsePrimary, PrimaryExpressionContext,
31};
32pub struct Parser {
33 lexer: Lexer,
34 current_token: Token,
35 in_method_args: bool, columns: Vec<String>, paren_depth: i32, #[allow(dead_code)]
39 config: ParserConfig, }
41
42impl Parser {
43 #[must_use]
44 pub fn new(input: &str) -> Self {
45 let mut lexer = Lexer::new(input);
46 let current_token = lexer.next_token();
47 Self {
48 lexer,
49 current_token,
50 in_method_args: false,
51 columns: Vec::new(),
52 paren_depth: 0,
53 config: ParserConfig::default(),
54 }
55 }
56
57 #[must_use]
58 pub fn with_config(input: &str, config: ParserConfig) -> Self {
59 let mut lexer = Lexer::new(input);
60 let current_token = lexer.next_token();
61 Self {
62 lexer,
63 current_token,
64 in_method_args: false,
65 columns: Vec::new(),
66 paren_depth: 0,
67 config,
68 }
69 }
70
71 #[must_use]
72 pub fn with_columns(mut self, columns: Vec<String>) -> Self {
73 self.columns = columns;
74 self
75 }
76
77 fn consume(&mut self, expected: Token) -> Result<(), String> {
78 if std::mem::discriminant(&self.current_token) == std::mem::discriminant(&expected) {
79 match &expected {
81 Token::LeftParen => self.paren_depth += 1,
82 Token::RightParen => {
83 self.paren_depth -= 1;
84 if self.paren_depth < 0 {
86 return Err(
87 "Unexpected closing parenthesis - no matching opening parenthesis"
88 .to_string(),
89 );
90 }
91 }
92 _ => {}
93 }
94
95 self.current_token = self.lexer.next_token();
96 Ok(())
97 } else {
98 let error_msg = match (&expected, &self.current_token) {
100 (Token::RightParen, Token::Eof) if self.paren_depth > 0 => {
101 format!(
102 "Unclosed parenthesis - missing {} closing parenthes{}",
103 self.paren_depth,
104 if self.paren_depth == 1 { "is" } else { "es" }
105 )
106 }
107 (Token::RightParen, _) if self.paren_depth > 0 => {
108 format!(
109 "Expected closing parenthesis but found {:?} (currently {} unclosed parenthes{})",
110 self.current_token,
111 self.paren_depth,
112 if self.paren_depth == 1 { "is" } else { "es" }
113 )
114 }
115 _ => format!("Expected {:?}, found {:?}", expected, self.current_token),
116 };
117 Err(error_msg)
118 }
119 }
120
121 fn advance(&mut self) {
122 match &self.current_token {
124 Token::LeftParen => self.paren_depth += 1,
125 Token::RightParen => {
126 self.paren_depth -= 1;
127 }
130 _ => {}
131 }
132 self.current_token = self.lexer.next_token();
133 }
134
135 pub fn parse(&mut self) -> Result<SelectStatement, String> {
136 if matches!(self.current_token, Token::With) {
138 self.parse_with_clause()
139 } else {
140 self.parse_select_statement()
141 }
142 }
143
144 fn parse_with_clause(&mut self) -> Result<SelectStatement, String> {
145 self.consume(Token::With)?;
146
147 let mut ctes = Vec::new();
148
149 loop {
151 let name = match &self.current_token {
153 Token::Identifier(name) => name.clone(),
154 _ => return Err("Expected CTE name after WITH".to_string()),
155 };
156 self.advance();
157
158 let column_list = if matches!(self.current_token, Token::LeftParen) {
160 self.advance();
161 let cols = self.parse_identifier_list()?;
162 self.consume(Token::RightParen)?;
163 Some(cols)
164 } else {
165 None
166 };
167
168 self.consume(Token::As)?;
170
171 self.consume(Token::LeftParen)?;
173
174 let query = self.parse_select_statement_inner()?;
176
177 self.consume(Token::RightParen)?;
179
180 ctes.push(CTE {
181 name,
182 column_list,
183 query,
184 });
185
186 if !matches!(self.current_token, Token::Comma) {
188 break;
189 }
190 self.advance();
191 }
192
193 let mut main_query = self.parse_select_statement()?;
195 main_query.ctes = ctes;
196
197 Ok(main_query)
198 }
199
200 fn parse_select_statement(&mut self) -> Result<SelectStatement, String> {
201 let result = self.parse_select_statement_inner()?;
202
203 if self.paren_depth > 0 {
205 return Err(format!(
206 "Unclosed parenthesis - missing {} closing parenthes{}",
207 self.paren_depth,
208 if self.paren_depth == 1 { "is" } else { "es" }
209 ));
210 } else if self.paren_depth < 0 {
211 return Err(
212 "Extra closing parenthesis found - no matching opening parenthesis".to_string(),
213 );
214 }
215
216 Ok(result)
217 }
218
219 fn parse_select_statement_inner(&mut self) -> Result<SelectStatement, String> {
220 self.consume(Token::Select)?;
221
222 let distinct = if matches!(self.current_token, Token::Distinct) {
224 self.advance();
225 true
226 } else {
227 false
228 };
229
230 let select_items = self.parse_select_items()?;
232
233 let columns = select_items
235 .iter()
236 .map(|item| match item {
237 SelectItem::Star => "*".to_string(),
238 SelectItem::Column(name) => name.clone(),
239 SelectItem::Expression { alias, .. } => alias.clone(),
240 })
241 .collect();
242
243 let (from_table, from_subquery, from_function, from_alias) =
245 if matches!(self.current_token, Token::From) {
246 self.advance();
247
248 if let Token::Identifier(name) = &self.current_token.clone() {
250 if name.to_uppercase() == "RANGE" {
251 self.advance();
252 self.consume(Token::LeftParen)?;
254
255 let start = self.parse_expression()?;
257 self.consume(Token::Comma)?;
258
259 let end = self.parse_expression()?;
261
262 let step = if matches!(self.current_token, Token::Comma) {
264 self.advance();
265 Some(self.parse_expression()?)
266 } else {
267 None
268 };
269
270 self.consume(Token::RightParen)?;
271
272 let alias = if matches!(self.current_token, Token::As) {
274 self.advance();
275 match &self.current_token {
276 Token::Identifier(name) => {
277 let alias = name.clone();
278 self.advance();
279 Some(alias)
280 }
281 _ => return Err("Expected alias name after AS".to_string()),
282 }
283 } else if let Token::Identifier(name) = &self.current_token {
284 let alias = name.clone();
285 self.advance();
286 Some(alias)
287 } else {
288 None
289 };
290
291 (
292 None,
293 None,
294 Some(TableFunction::Range { start, end, step }),
295 alias,
296 )
297 } else {
298 let table_name = name.clone();
300 self.advance();
301
302 let alias = if matches!(self.current_token, Token::As) {
304 self.advance();
305 match &self.current_token {
306 Token::Identifier(name) => {
307 let alias = name.clone();
308 self.advance();
309 Some(alias)
310 }
311 _ => return Err("Expected alias name after AS".to_string()),
312 }
313 } else if let Token::Identifier(name) = &self.current_token {
314 let alias = name.clone();
316 self.advance();
317 Some(alias)
318 } else {
319 None
320 };
321
322 (Some(table_name), None, None, alias)
323 }
324 } else if matches!(self.current_token, Token::LeftParen) {
325 self.advance();
327
328 let subquery = self.parse_select_statement_inner()?;
330
331 self.consume(Token::RightParen)?;
332
333 let alias = if matches!(self.current_token, Token::As) {
335 self.advance();
336 match &self.current_token {
337 Token::Identifier(name) => {
338 let alias = name.clone();
339 self.advance();
340 alias
341 }
342 _ => return Err("Expected alias name after AS".to_string()),
343 }
344 } else {
345 match &self.current_token {
347 Token::Identifier(name) => {
348 let alias = name.clone();
349 self.advance();
350 alias
351 }
352 _ => {
353 return Err(
354 "Subquery in FROM must have an alias (e.g., AS t)".to_string()
355 )
356 }
357 }
358 };
359
360 (None, Some(Box::new(subquery)), None, Some(alias))
361 } else {
362 match &self.current_token {
364 Token::Identifier(table) => {
365 let table_name = table.clone();
366 self.advance();
367
368 let alias = if matches!(self.current_token, Token::As) {
370 self.advance();
371 match &self.current_token {
372 Token::Identifier(name) => {
373 let alias = name.clone();
374 self.advance();
375 Some(alias)
376 }
377 _ => return Err("Expected alias name after AS".to_string()),
378 }
379 } else if let Token::Identifier(name) = &self.current_token {
380 let alias = name.clone();
382 self.advance();
383 Some(alias)
384 } else {
385 None
386 };
387
388 (Some(table_name), None, None, alias)
389 }
390 Token::QuotedIdentifier(table) => {
391 let table_name = table.clone();
393 self.advance();
394
395 let alias = if matches!(self.current_token, Token::As) {
397 self.advance();
398 match &self.current_token {
399 Token::Identifier(name) => {
400 let alias = name.clone();
401 self.advance();
402 Some(alias)
403 }
404 _ => return Err("Expected alias name after AS".to_string()),
405 }
406 } else if let Token::Identifier(name) = &self.current_token {
407 let alias = name.clone();
409 self.advance();
410 Some(alias)
411 } else {
412 None
413 };
414
415 (Some(table_name), None, None, alias)
416 }
417 _ => return Err("Expected table name or subquery after FROM".to_string()),
418 }
419 }
420 } else {
421 (None, None, None, None)
422 };
423
424 let mut joins = Vec::new();
426 while self.is_join_token() {
427 joins.push(self.parse_join_clause()?);
428 }
429
430 let where_clause = if matches!(self.current_token, Token::Where) {
431 self.advance();
432 Some(self.parse_where_clause()?)
433 } else {
434 None
435 };
436
437 let group_by = if matches!(self.current_token, Token::GroupBy) {
438 self.advance();
439 Some(self.parse_identifier_list()?)
440 } else {
441 None
442 };
443
444 let having = if matches!(self.current_token, Token::Having) {
446 if group_by.is_none() {
447 return Err("HAVING clause requires GROUP BY".to_string());
448 }
449 self.advance();
450 Some(self.parse_expression()?)
451 } else {
452 None
453 };
454
455 let order_by = if matches!(self.current_token, Token::OrderBy) {
457 self.advance();
458 Some(self.parse_order_by_list()?)
459 } else if let Token::Identifier(s) = &self.current_token {
460 if s.to_uppercase() == "ORDER" {
461 self.advance(); if matches!(&self.current_token, Token::Identifier(by_token) if by_token.to_uppercase() == "BY")
464 {
465 self.advance(); Some(self.parse_order_by_list()?)
467 } else {
468 return Err("Expected BY after ORDER".to_string());
469 }
470 } else {
471 None
472 }
473 } else {
474 None
475 };
476
477 let limit = if matches!(self.current_token, Token::Limit) {
479 self.advance();
480 match &self.current_token {
481 Token::NumberLiteral(num) => {
482 let limit_val = num
483 .parse::<usize>()
484 .map_err(|_| format!("Invalid LIMIT value: {num}"))?;
485 self.advance();
486 Some(limit_val)
487 }
488 _ => return Err("Expected number after LIMIT".to_string()),
489 }
490 } else {
491 None
492 };
493
494 let offset = if matches!(self.current_token, Token::Offset) {
496 self.advance();
497 match &self.current_token {
498 Token::NumberLiteral(num) => {
499 let offset_val = num
500 .parse::<usize>()
501 .map_err(|_| format!("Invalid OFFSET value: {num}"))?;
502 self.advance();
503 Some(offset_val)
504 }
505 _ => return Err("Expected number after OFFSET".to_string()),
506 }
507 } else {
508 None
509 };
510
511 Ok(SelectStatement {
512 distinct,
513 columns,
514 select_items,
515 from_table,
516 from_subquery,
517 from_function,
518 from_alias,
519 joins,
520 where_clause,
521 order_by,
522 group_by,
523 having,
524 limit,
525 offset,
526 ctes: Vec::new(), })
528 }
529
530 fn parse_select_list(&mut self) -> Result<Vec<String>, String> {
531 let mut columns = Vec::new();
532
533 if matches!(self.current_token, Token::Star) {
534 columns.push("*".to_string());
535 self.advance();
536 } else {
537 loop {
538 match &self.current_token {
539 Token::Identifier(col) => {
540 columns.push(col.clone());
541 self.advance();
542 }
543 Token::QuotedIdentifier(col) => {
544 columns.push(col.clone());
546 self.advance();
547 }
548 _ => return Err("Expected column name".to_string()),
549 }
550
551 if matches!(self.current_token, Token::Comma) {
552 self.advance();
553 } else {
554 break;
555 }
556 }
557 }
558
559 Ok(columns)
560 }
561
562 fn parse_select_items(&mut self) -> Result<Vec<SelectItem>, String> {
564 let mut items = Vec::new();
565
566 loop {
567 if matches!(self.current_token, Token::Star) {
570 items.push(SelectItem::Star);
578 self.advance();
579 } else {
580 let expr = self.parse_comparison()?; let alias = if matches!(self.current_token, Token::As) {
585 self.advance();
586 match &self.current_token {
587 Token::Identifier(alias_name) => {
588 let alias = alias_name.clone();
589 self.advance();
590 alias
591 }
592 Token::QuotedIdentifier(alias_name) => {
593 let alias = alias_name.clone();
594 self.advance();
595 alias
596 }
597 _ => return Err("Expected alias name after AS".to_string()),
598 }
599 } else {
600 match &expr {
602 SqlExpression::Column(col_name) => col_name.clone(),
603 _ => format!("expr_{}", items.len() + 1), }
605 };
606
607 let item = match expr {
609 SqlExpression::Column(col_name) if alias == col_name => {
610 SelectItem::Column(col_name)
612 }
613 _ => {
614 SelectItem::Expression { expr, alias }
616 }
617 };
618
619 items.push(item);
620 }
621
622 if matches!(self.current_token, Token::Comma) {
624 self.advance();
625 } else {
626 break;
627 }
628 }
629
630 Ok(items)
631 }
632
633 fn parse_identifier_list(&mut self) -> Result<Vec<String>, String> {
634 let mut identifiers = Vec::new();
635
636 loop {
637 match &self.current_token {
638 Token::Identifier(id) => {
639 let id_upper = id.to_uppercase();
641 if matches!(
642 id_upper.as_str(),
643 "ORDER" | "HAVING" | "LIMIT" | "OFFSET" | "UNION" | "INTERSECT" | "EXCEPT"
644 ) {
645 break;
647 }
648 identifiers.push(id.clone());
649 self.advance();
650 }
651 Token::QuotedIdentifier(id) => {
652 identifiers.push(id.clone());
654 self.advance();
655 }
656 _ => {
657 break;
659 }
660 }
661
662 if matches!(self.current_token, Token::Comma) {
663 self.advance();
664 } else {
665 break;
666 }
667 }
668
669 if identifiers.is_empty() {
670 return Err("Expected at least one identifier".to_string());
671 }
672
673 Ok(identifiers)
674 }
675
676 fn parse_window_spec(&mut self) -> Result<WindowSpec, String> {
677 let mut partition_by = Vec::new();
678 let mut order_by = Vec::new();
679
680 if matches!(self.current_token, Token::Partition) {
682 self.advance(); if !matches!(self.current_token, Token::By) {
684 return Err("Expected BY after PARTITION".to_string());
685 }
686 self.advance(); partition_by = self.parse_identifier_list()?;
690 }
691
692 if matches!(self.current_token, Token::OrderBy) {
694 self.advance(); order_by = self.parse_order_by_list()?;
696 } else if let Token::Identifier(s) = &self.current_token {
697 if s.to_uppercase() == "ORDER" {
698 self.advance(); if !matches!(self.current_token, Token::By) {
701 return Err("Expected BY after ORDER".to_string());
702 }
703 self.advance(); order_by = self.parse_order_by_list()?;
705 }
706 }
707
708 Ok(WindowSpec {
709 partition_by,
710 order_by,
711 })
712 }
713
714 fn parse_order_by_list(&mut self) -> Result<Vec<OrderByColumn>, String> {
715 let mut order_columns = Vec::new();
716
717 loop {
718 let column = match &self.current_token {
719 Token::Identifier(id) => {
720 let col = id.clone();
721 self.advance();
722 col
723 }
724 Token::QuotedIdentifier(id) => {
725 let col = id.clone();
726 self.advance();
727 col
728 }
729 Token::NumberLiteral(num) if self.columns.iter().any(|col| col == num) => {
730 let col = num.clone();
732 self.advance();
733 col
734 }
735 _ => return Err("Expected column name in ORDER BY".to_string()),
736 };
737
738 let direction = match &self.current_token {
740 Token::Asc => {
741 self.advance();
742 SortDirection::Asc
743 }
744 Token::Desc => {
745 self.advance();
746 SortDirection::Desc
747 }
748 _ => SortDirection::Asc, };
750
751 order_columns.push(OrderByColumn { column, direction });
752
753 if matches!(self.current_token, Token::Comma) {
754 self.advance();
755 } else {
756 break;
757 }
758 }
759
760 Ok(order_columns)
761 }
762
763 fn parse_where_clause(&mut self) -> Result<WhereClause, String> {
764 let expr = self.parse_expression()?;
767
768 if matches!(self.current_token, Token::RightParen) && self.paren_depth <= 0 {
770 return Err(
771 "Unexpected closing parenthesis - no matching opening parenthesis".to_string(),
772 );
773 }
774
775 let conditions = vec![Condition {
777 expr,
778 connector: None,
779 }];
780
781 Ok(WhereClause { conditions })
782 }
783
784 fn parse_expression(&mut self) -> Result<SqlExpression, String> {
785 let mut left = self.parse_logical_or()?;
788
789 left = parse_in_operator(self, left)?;
792
793 Ok(left)
794 }
795
796 fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
797 parse_comparison_expr(self)
799 }
800
801 fn parse_additive(&mut self) -> Result<SqlExpression, String> {
802 parse_additive_expr(self)
804 }
805
806 fn parse_multiplicative(&mut self) -> Result<SqlExpression, String> {
807 parse_multiplicative_expr(self)
809 }
810
811 fn parse_logical_or(&mut self) -> Result<SqlExpression, String> {
812 parse_logical_or_expr(self)
814 }
815
816 fn parse_logical_and(&mut self) -> Result<SqlExpression, String> {
817 parse_logical_and_expr(self)
819 }
820
821 fn parse_case_expression(&mut self) -> Result<SqlExpression, String> {
822 parse_case_expr(self)
824 }
825
826 fn parse_primary(&mut self) -> Result<SqlExpression, String> {
827 let columns = self.columns.clone();
830 let in_method_args = self.in_method_args;
831 let ctx = PrimaryExpressionContext {
832 columns: &columns,
833 in_method_args,
834 };
835 parse_primary_expr(self, &ctx)
836 }
837
838 fn parse_method_args(&mut self) -> Result<Vec<SqlExpression>, String> {
840 let mut args = Vec::new();
841
842 self.in_method_args = true;
844
845 if !matches!(self.current_token, Token::RightParen) {
846 loop {
847 args.push(self.parse_expression()?);
848
849 if matches!(self.current_token, Token::Comma) {
850 self.advance();
851 } else {
852 break;
853 }
854 }
855 }
856
857 self.in_method_args = false;
859
860 Ok(args)
861 }
862
863 fn parse_function_args(&mut self) -> Result<(Vec<SqlExpression>, bool), String> {
864 let mut args = Vec::new();
865 let mut has_distinct = false;
866
867 if !matches!(self.current_token, Token::RightParen) {
868 if matches!(self.current_token, Token::Distinct) {
870 self.advance(); has_distinct = true;
872 }
873
874 args.push(self.parse_additive()?);
876
877 while matches!(self.current_token, Token::Comma) {
879 self.advance();
880 args.push(self.parse_additive()?);
881 }
882 }
883
884 Ok((args, has_distinct))
885 }
886
887 fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
888 let mut expressions = Vec::new();
889
890 loop {
891 expressions.push(self.parse_expression()?);
892
893 if matches!(self.current_token, Token::Comma) {
894 self.advance();
895 } else {
896 break;
897 }
898 }
899
900 Ok(expressions)
901 }
902
903 fn get_binary_op(&self) -> Option<String> {
904 match &self.current_token {
905 Token::Equal => Some("=".to_string()),
906 Token::NotEqual => Some("!=".to_string()),
907 Token::LessThan => Some("<".to_string()),
908 Token::GreaterThan => Some(">".to_string()),
909 Token::LessThanOrEqual => Some("<=".to_string()),
910 Token::GreaterThanOrEqual => Some(">=".to_string()),
911 Token::Like => Some("LIKE".to_string()),
912 _ => None,
913 }
914 }
915
916 fn get_arithmetic_op(&self) -> Option<String> {
917 match &self.current_token {
918 Token::Plus => Some("+".to_string()),
919 Token::Minus => Some("-".to_string()),
920 Token::Star => Some("*".to_string()), Token::Divide => Some("/".to_string()),
922 Token::Modulo => Some("%".to_string()),
923 _ => None,
924 }
925 }
926
927 #[must_use]
928 pub fn get_position(&self) -> usize {
929 self.lexer.get_position()
930 }
931
932 fn is_join_token(&self) -> bool {
934 matches!(
935 self.current_token,
936 Token::Join | Token::Inner | Token::Left | Token::Right | Token::Full | Token::Cross
937 )
938 }
939
940 fn parse_join_clause(&mut self) -> Result<JoinClause, String> {
942 let join_type = match &self.current_token {
944 Token::Join => {
945 self.advance();
946 JoinType::Inner }
948 Token::Inner => {
949 self.advance();
950 if !matches!(self.current_token, Token::Join) {
951 return Err("Expected JOIN after INNER".to_string());
952 }
953 self.advance();
954 JoinType::Inner
955 }
956 Token::Left => {
957 self.advance();
958 if matches!(self.current_token, Token::Outer) {
960 self.advance();
961 }
962 if !matches!(self.current_token, Token::Join) {
963 return Err("Expected JOIN after LEFT".to_string());
964 }
965 self.advance();
966 JoinType::Left
967 }
968 Token::Right => {
969 self.advance();
970 if matches!(self.current_token, Token::Outer) {
972 self.advance();
973 }
974 if !matches!(self.current_token, Token::Join) {
975 return Err("Expected JOIN after RIGHT".to_string());
976 }
977 self.advance();
978 JoinType::Right
979 }
980 Token::Full => {
981 self.advance();
982 if matches!(self.current_token, Token::Outer) {
984 self.advance();
985 }
986 if !matches!(self.current_token, Token::Join) {
987 return Err("Expected JOIN after FULL".to_string());
988 }
989 self.advance();
990 JoinType::Full
991 }
992 Token::Cross => {
993 self.advance();
994 if !matches!(self.current_token, Token::Join) {
995 return Err("Expected JOIN after CROSS".to_string());
996 }
997 self.advance();
998 JoinType::Cross
999 }
1000 _ => return Err("Expected JOIN keyword".to_string()),
1001 };
1002
1003 let (table, alias) = self.parse_join_table_source()?;
1005
1006 let condition = if join_type == JoinType::Cross {
1008 JoinCondition {
1010 left_column: String::new(),
1011 operator: JoinOperator::Equal,
1012 right_column: String::new(),
1013 }
1014 } else {
1015 if !matches!(self.current_token, Token::On) {
1016 return Err("Expected ON keyword after JOIN table".to_string());
1017 }
1018 self.advance();
1019 self.parse_join_condition()?
1020 };
1021
1022 Ok(JoinClause {
1023 join_type,
1024 table,
1025 alias,
1026 condition,
1027 })
1028 }
1029
1030 fn parse_join_table_source(&mut self) -> Result<(TableSource, Option<String>), String> {
1031 let table = match &self.current_token {
1032 Token::Identifier(name) => {
1033 let table_name = name.clone();
1034 self.advance();
1035 TableSource::Table(table_name)
1036 }
1037 Token::LeftParen => {
1038 self.advance();
1040 let subquery = self.parse_select_statement_inner()?;
1041 if !matches!(self.current_token, Token::RightParen) {
1042 return Err("Expected ')' after subquery".to_string());
1043 }
1044 self.advance();
1045
1046 let alias = match &self.current_token {
1048 Token::Identifier(alias_name) => {
1049 let alias = alias_name.clone();
1050 self.advance();
1051 alias
1052 }
1053 Token::As => {
1054 self.advance();
1055 match &self.current_token {
1056 Token::Identifier(alias_name) => {
1057 let alias = alias_name.clone();
1058 self.advance();
1059 alias
1060 }
1061 _ => return Err("Expected alias after AS keyword".to_string()),
1062 }
1063 }
1064 _ => return Err("Subqueries must have an alias".to_string()),
1065 };
1066
1067 return Ok((
1068 TableSource::DerivedTable {
1069 query: Box::new(subquery),
1070 alias: alias.clone(),
1071 },
1072 Some(alias),
1073 ));
1074 }
1075 _ => return Err("Expected table name or subquery in JOIN clause".to_string()),
1076 };
1077
1078 let alias = match &self.current_token {
1080 Token::Identifier(alias_name) => {
1081 let alias = alias_name.clone();
1082 self.advance();
1083 Some(alias)
1084 }
1085 Token::As => {
1086 self.advance();
1087 match &self.current_token {
1088 Token::Identifier(alias_name) => {
1089 let alias = alias_name.clone();
1090 self.advance();
1091 Some(alias)
1092 }
1093 _ => return Err("Expected alias after AS keyword".to_string()),
1094 }
1095 }
1096 _ => None,
1097 };
1098
1099 Ok((table, alias))
1100 }
1101
1102 fn parse_join_condition(&mut self) -> Result<JoinCondition, String> {
1103 let left_column = self.parse_column_reference()?;
1105
1106 let operator = match &self.current_token {
1108 Token::Equal => JoinOperator::Equal,
1109 Token::NotEqual => JoinOperator::NotEqual,
1110 Token::LessThan => JoinOperator::LessThan,
1111 Token::LessThanOrEqual => JoinOperator::LessThanOrEqual,
1112 Token::GreaterThan => JoinOperator::GreaterThan,
1113 Token::GreaterThanOrEqual => JoinOperator::GreaterThanOrEqual,
1114 _ => return Err("Expected comparison operator in JOIN condition".to_string()),
1115 };
1116 self.advance();
1117
1118 let right_column = self.parse_column_reference()?;
1120
1121 Ok(JoinCondition {
1122 left_column,
1123 operator,
1124 right_column,
1125 })
1126 }
1127
1128 fn parse_column_reference(&mut self) -> Result<String, String> {
1129 match &self.current_token {
1130 Token::Identifier(name) => {
1131 let mut column_ref = name.clone();
1132 self.advance();
1133
1134 if matches!(self.current_token, Token::Dot) {
1136 self.advance();
1137 match &self.current_token {
1138 Token::Identifier(col_name) => {
1139 column_ref.push('.');
1140 column_ref.push_str(col_name);
1141 self.advance();
1142 }
1143 _ => return Err("Expected column name after '.'".to_string()),
1144 }
1145 }
1146
1147 Ok(column_ref)
1148 }
1149 _ => Err("Expected column reference".to_string()),
1150 }
1151 }
1152}
1153
1154#[derive(Debug, Clone)]
1156pub enum CursorContext {
1157 SelectClause,
1158 FromClause,
1159 WhereClause,
1160 OrderByClause,
1161 AfterColumn(String),
1162 AfterLogicalOp(LogicalOp),
1163 AfterComparisonOp(String, String), InMethodCall(String, String), InExpression,
1166 Unknown,
1167}
1168
1169fn safe_slice_to(s: &str, pos: usize) -> &str {
1171 if pos >= s.len() {
1172 return s;
1173 }
1174
1175 let mut safe_pos = pos;
1177 while safe_pos > 0 && !s.is_char_boundary(safe_pos) {
1178 safe_pos -= 1;
1179 }
1180
1181 &s[..safe_pos]
1182}
1183
1184fn safe_slice_from(s: &str, pos: usize) -> &str {
1186 if pos >= s.len() {
1187 return "";
1188 }
1189
1190 let mut safe_pos = pos;
1192 while safe_pos < s.len() && !s.is_char_boundary(safe_pos) {
1193 safe_pos += 1;
1194 }
1195
1196 &s[safe_pos..]
1197}
1198
1199#[must_use]
1200pub fn detect_cursor_context(query: &str, cursor_pos: usize) -> (CursorContext, Option<String>) {
1201 let truncated = safe_slice_to(query, cursor_pos);
1202 let mut parser = Parser::new(truncated);
1203
1204 if let Ok(stmt) = parser.parse() {
1206 let (ctx, partial) = analyze_statement(&stmt, truncated, cursor_pos);
1207 #[cfg(test)]
1208 println!("analyze_statement returned: {ctx:?}, {partial:?} for query: '{truncated}'");
1209 (ctx, partial)
1210 } else {
1211 let (ctx, partial) = analyze_partial(truncated, cursor_pos);
1213 #[cfg(test)]
1214 println!("analyze_partial returned: {ctx:?}, {partial:?} for query: '{truncated}'");
1215 (ctx, partial)
1216 }
1217}
1218
1219#[must_use]
1220pub fn tokenize_query(query: &str) -> Vec<String> {
1221 let mut lexer = Lexer::new(query);
1222 let tokens = lexer.tokenize_all();
1223 tokens.iter().map(|t| format!("{t:?}")).collect()
1224}
1225
1226#[must_use]
1227fn analyze_statement(
1228 stmt: &SelectStatement,
1229 query: &str,
1230 _cursor_pos: usize,
1231) -> (CursorContext, Option<String>) {
1232 let trimmed = query.trim();
1234
1235 let comparison_ops = [" > ", " < ", " >= ", " <= ", " = ", " != "];
1237 for op in &comparison_ops {
1238 if let Some(op_pos) = query.rfind(op) {
1239 let before_op = safe_slice_to(query, op_pos);
1240 let after_op_start = op_pos + op.len();
1241 let after_op = if after_op_start < query.len() {
1242 &query[after_op_start..]
1243 } else {
1244 ""
1245 };
1246
1247 if let Some(col_name) = before_op.split_whitespace().last() {
1249 if col_name.chars().all(|c| c.is_alphanumeric() || c == '_') {
1250 let after_op_trimmed = after_op.trim();
1252 if after_op_trimmed.is_empty()
1253 || (after_op_trimmed
1254 .chars()
1255 .all(|c| c.is_alphanumeric() || c == '_')
1256 && !after_op_trimmed.contains('('))
1257 {
1258 let partial = if after_op_trimmed.is_empty() {
1259 None
1260 } else {
1261 Some(after_op_trimmed.to_string())
1262 };
1263 return (
1264 CursorContext::AfterComparisonOp(
1265 col_name.to_string(),
1266 op.trim().to_string(),
1267 ),
1268 partial,
1269 );
1270 }
1271 }
1272 }
1273 }
1274 }
1275
1276 if trimmed.to_uppercase().ends_with(" AND")
1278 || trimmed.to_uppercase().ends_with(" OR")
1279 || trimmed.to_uppercase().ends_with(" AND ")
1280 || trimmed.to_uppercase().ends_with(" OR ")
1281 {
1282 } else {
1284 if let Some(dot_pos) = trimmed.rfind('.') {
1286 let before_dot = safe_slice_to(trimmed, dot_pos);
1288 let after_dot_start = dot_pos + 1;
1289 let after_dot = if after_dot_start < trimmed.len() {
1290 &trimmed[after_dot_start..]
1291 } else {
1292 ""
1293 };
1294
1295 if !after_dot.contains('(') {
1298 let col_name = if before_dot.ends_with('"') {
1300 let bytes = before_dot.as_bytes();
1302 let mut pos = before_dot.len() - 1; let mut found_start = None;
1304
1305 if pos > 0 {
1307 pos -= 1;
1308 while pos > 0 {
1309 if bytes[pos] == b'"' {
1310 if pos == 0 || bytes[pos - 1] != b'\\' {
1312 found_start = Some(pos);
1313 break;
1314 }
1315 }
1316 pos -= 1;
1317 }
1318 if found_start.is_none() && bytes[0] == b'"' {
1320 found_start = Some(0);
1321 }
1322 }
1323
1324 found_start.map(|start| safe_slice_from(before_dot, start))
1325 } else {
1326 before_dot
1329 .split_whitespace()
1330 .last()
1331 .map(|word| word.trim_start_matches('('))
1332 };
1333
1334 if let Some(col_name) = col_name {
1335 let is_valid = if col_name.starts_with('"') && col_name.ends_with('"') {
1337 true
1339 } else {
1340 col_name.chars().all(|c| c.is_alphanumeric() || c == '_')
1342 };
1343
1344 if is_valid {
1345 let partial_method = if after_dot.is_empty() {
1348 None
1349 } else if after_dot.chars().all(|c| c.is_alphanumeric() || c == '_') {
1350 Some(after_dot.to_string())
1351 } else {
1352 None
1353 };
1354
1355 let col_name_for_context = if col_name.starts_with('"')
1357 && col_name.ends_with('"')
1358 && col_name.len() > 2
1359 {
1360 col_name[1..col_name.len() - 1].to_string()
1361 } else {
1362 col_name.to_string()
1363 };
1364
1365 return (
1366 CursorContext::AfterColumn(col_name_for_context),
1367 partial_method,
1368 );
1369 }
1370 }
1371 }
1372 }
1373 }
1374
1375 if let Some(where_clause) = &stmt.where_clause {
1377 if trimmed.to_uppercase().ends_with(" AND") || trimmed.to_uppercase().ends_with(" OR") {
1379 let op = if trimmed.to_uppercase().ends_with(" AND") {
1380 LogicalOp::And
1381 } else {
1382 LogicalOp::Or
1383 };
1384 return (CursorContext::AfterLogicalOp(op), None);
1385 }
1386
1387 if let Some(and_pos) = query.to_uppercase().rfind(" AND ") {
1389 let after_and = safe_slice_from(query, and_pos + 5);
1390 let partial = extract_partial_at_end(after_and);
1391 if partial.is_some() {
1392 return (CursorContext::AfterLogicalOp(LogicalOp::And), partial);
1393 }
1394 }
1395
1396 if let Some(or_pos) = query.to_uppercase().rfind(" OR ") {
1397 let after_or = safe_slice_from(query, or_pos + 4);
1398 let partial = extract_partial_at_end(after_or);
1399 if partial.is_some() {
1400 return (CursorContext::AfterLogicalOp(LogicalOp::Or), partial);
1401 }
1402 }
1403
1404 if let Some(last_condition) = where_clause.conditions.last() {
1405 if let Some(connector) = &last_condition.connector {
1406 return (
1408 CursorContext::AfterLogicalOp(connector.clone()),
1409 extract_partial_at_end(query),
1410 );
1411 }
1412 }
1413 return (CursorContext::WhereClause, extract_partial_at_end(query));
1415 }
1416
1417 if query.to_uppercase().ends_with(" ORDER BY ") || query.to_uppercase().ends_with(" ORDER BY") {
1419 return (CursorContext::OrderByClause, None);
1420 }
1421
1422 if stmt.order_by.is_some() {
1424 return (CursorContext::OrderByClause, extract_partial_at_end(query));
1425 }
1426
1427 if stmt.from_table.is_some() && stmt.where_clause.is_none() && stmt.order_by.is_none() {
1428 return (CursorContext::FromClause, extract_partial_at_end(query));
1429 }
1430
1431 if !stmt.columns.is_empty() && stmt.from_table.is_none() {
1432 return (CursorContext::SelectClause, extract_partial_at_end(query));
1433 }
1434
1435 (CursorContext::Unknown, None)
1436}
1437
1438fn analyze_partial(query: &str, cursor_pos: usize) -> (CursorContext, Option<String>) {
1439 let upper = query.to_uppercase();
1440
1441 let trimmed = query.trim();
1443
1444 #[cfg(test)]
1445 {
1446 if trimmed.contains("\"Last Name\"") {
1447 eprintln!("DEBUG analyze_partial: query='{query}', trimmed='{trimmed}'");
1448 }
1449 }
1450
1451 let comparison_ops = [" > ", " < ", " >= ", " <= ", " = ", " != "];
1453 for op in &comparison_ops {
1454 if let Some(op_pos) = query.rfind(op) {
1455 let before_op = safe_slice_to(query, op_pos);
1456 let after_op_start = op_pos + op.len();
1457 let after_op = if after_op_start < query.len() {
1458 &query[after_op_start..]
1459 } else {
1460 ""
1461 };
1462
1463 if let Some(col_name) = before_op.split_whitespace().last() {
1465 if col_name.chars().all(|c| c.is_alphanumeric() || c == '_') {
1466 let after_op_trimmed = after_op.trim();
1468 if after_op_trimmed.is_empty()
1469 || (after_op_trimmed
1470 .chars()
1471 .all(|c| c.is_alphanumeric() || c == '_')
1472 && !after_op_trimmed.contains('('))
1473 {
1474 let partial = if after_op_trimmed.is_empty() {
1475 None
1476 } else {
1477 Some(after_op_trimmed.to_string())
1478 };
1479 return (
1480 CursorContext::AfterComparisonOp(
1481 col_name.to_string(),
1482 op.trim().to_string(),
1483 ),
1484 partial,
1485 );
1486 }
1487 }
1488 }
1489 }
1490 }
1491
1492 if let Some(dot_pos) = trimmed.rfind('.') {
1495 #[cfg(test)]
1496 {
1497 if trimmed.contains("\"Last Name\"") {
1498 eprintln!("DEBUG: Found dot at position {dot_pos}");
1499 }
1500 }
1501 let before_dot = &trimmed[..dot_pos];
1503 let after_dot = &trimmed[dot_pos + 1..];
1504
1505 if !after_dot.contains('(') {
1508 let col_name = if before_dot.ends_with('"') {
1511 let bytes = before_dot.as_bytes();
1513 let mut pos = before_dot.len() - 1; let mut found_start = None;
1515
1516 #[cfg(test)]
1517 {
1518 if trimmed.contains("\"Last Name\"") {
1519 eprintln!("DEBUG: before_dot='{before_dot}', looking for opening quote");
1520 }
1521 }
1522
1523 if pos > 0 {
1525 pos -= 1;
1526 while pos > 0 {
1527 if bytes[pos] == b'"' {
1528 if pos == 0 || bytes[pos - 1] != b'\\' {
1530 found_start = Some(pos);
1531 break;
1532 }
1533 }
1534 pos -= 1;
1535 }
1536 if found_start.is_none() && bytes[0] == b'"' {
1538 found_start = Some(0);
1539 }
1540 }
1541
1542 if let Some(start) = found_start {
1543 let result = safe_slice_from(before_dot, start);
1545 #[cfg(test)]
1546 {
1547 if trimmed.contains("\"Last Name\"") {
1548 eprintln!("DEBUG: Extracted quoted identifier: '{result}'");
1549 }
1550 }
1551 Some(result)
1552 } else {
1553 #[cfg(test)]
1554 {
1555 if trimmed.contains("\"Last Name\"") {
1556 eprintln!("DEBUG: No opening quote found!");
1557 }
1558 }
1559 None
1560 }
1561 } else {
1562 before_dot
1565 .split_whitespace()
1566 .last()
1567 .map(|word| word.trim_start_matches('('))
1568 };
1569
1570 if let Some(col_name) = col_name {
1571 #[cfg(test)]
1572 {
1573 if trimmed.contains("\"Last Name\"") {
1574 eprintln!("DEBUG: col_name = '{col_name}'");
1575 }
1576 }
1577
1578 let is_valid = if col_name.starts_with('"') && col_name.ends_with('"') {
1580 true
1582 } else {
1583 col_name.chars().all(|c| c.is_alphanumeric() || c == '_')
1585 };
1586
1587 #[cfg(test)]
1588 {
1589 if trimmed.contains("\"Last Name\"") {
1590 eprintln!("DEBUG: is_valid = {is_valid}");
1591 }
1592 }
1593
1594 if is_valid {
1595 let partial_method = if after_dot.is_empty() {
1598 None
1599 } else if after_dot.chars().all(|c| c.is_alphanumeric() || c == '_') {
1600 Some(after_dot.to_string())
1601 } else {
1602 None
1603 };
1604
1605 let col_name_for_context = if col_name.starts_with('"')
1607 && col_name.ends_with('"')
1608 && col_name.len() > 2
1609 {
1610 col_name[1..col_name.len() - 1].to_string()
1611 } else {
1612 col_name.to_string()
1613 };
1614
1615 return (
1616 CursorContext::AfterColumn(col_name_for_context),
1617 partial_method,
1618 );
1619 }
1620 }
1621 }
1622 }
1623
1624 if let Some(and_pos) = upper.rfind(" AND ") {
1626 if cursor_pos >= and_pos + 5 {
1628 let after_and = safe_slice_from(query, and_pos + 5);
1630 let partial = extract_partial_at_end(after_and);
1631 return (CursorContext::AfterLogicalOp(LogicalOp::And), partial);
1632 }
1633 }
1634
1635 if let Some(or_pos) = upper.rfind(" OR ") {
1636 if cursor_pos >= or_pos + 4 {
1638 let after_or = safe_slice_from(query, or_pos + 4);
1640 let partial = extract_partial_at_end(after_or);
1641 return (CursorContext::AfterLogicalOp(LogicalOp::Or), partial);
1642 }
1643 }
1644
1645 if trimmed.to_uppercase().ends_with(" AND") || trimmed.to_uppercase().ends_with(" OR") {
1647 let op = if trimmed.to_uppercase().ends_with(" AND") {
1648 LogicalOp::And
1649 } else {
1650 LogicalOp::Or
1651 };
1652 return (CursorContext::AfterLogicalOp(op), None);
1653 }
1654
1655 if upper.ends_with(" ORDER BY ") || upper.ends_with(" ORDER BY") || upper.contains("ORDER BY ")
1657 {
1658 return (CursorContext::OrderByClause, extract_partial_at_end(query));
1659 }
1660
1661 if upper.contains("WHERE") && !upper.contains("ORDER") && !upper.contains("GROUP") {
1662 return (CursorContext::WhereClause, extract_partial_at_end(query));
1663 }
1664
1665 if upper.contains("FROM") && !upper.contains("WHERE") && !upper.contains("ORDER") {
1666 return (CursorContext::FromClause, extract_partial_at_end(query));
1667 }
1668
1669 if upper.contains("SELECT") && !upper.contains("FROM") {
1670 return (CursorContext::SelectClause, extract_partial_at_end(query));
1671 }
1672
1673 (CursorContext::Unknown, None)
1674}
1675
1676fn extract_partial_at_end(query: &str) -> Option<String> {
1677 let trimmed = query.trim();
1678
1679 if let Some(last_word) = trimmed.split_whitespace().last() {
1681 if last_word.starts_with('"') && !last_word.ends_with('"') {
1682 return Some(last_word.to_string());
1684 }
1685 }
1686
1687 let last_word = trimmed.split_whitespace().last()?;
1689
1690 if last_word.chars().all(|c| c.is_alphanumeric() || c == '_') && !is_sql_keyword(last_word) {
1692 Some(last_word.to_string())
1693 } else {
1694 None
1695 }
1696}
1697
1698impl ParsePrimary for Parser {
1700 fn current_token(&self) -> &Token {
1701 &self.current_token
1702 }
1703
1704 fn advance(&mut self) {
1705 self.advance();
1706 }
1707
1708 fn consume(&mut self, expected: Token) -> Result<(), String> {
1709 self.consume(expected)
1710 }
1711
1712 fn parse_case_expression(&mut self) -> Result<SqlExpression, String> {
1713 self.parse_case_expression()
1714 }
1715
1716 fn parse_function_args(&mut self) -> Result<(Vec<SqlExpression>, bool), String> {
1717 self.parse_function_args()
1718 }
1719
1720 fn parse_window_spec(&mut self) -> Result<WindowSpec, String> {
1721 self.parse_window_spec()
1722 }
1723
1724 fn parse_logical_or(&mut self) -> Result<SqlExpression, String> {
1725 self.parse_logical_or()
1726 }
1727
1728 fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
1729 self.parse_comparison()
1730 }
1731
1732 fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
1733 self.parse_expression_list()
1734 }
1735}
1736
1737impl ParseArithmetic for Parser {
1739 fn current_token(&self) -> &Token {
1740 &self.current_token
1741 }
1742
1743 fn advance(&mut self) {
1744 self.advance();
1745 }
1746
1747 fn consume(&mut self, expected: Token) -> Result<(), String> {
1748 self.consume(expected)
1749 }
1750
1751 fn parse_primary(&mut self) -> Result<SqlExpression, String> {
1752 self.parse_primary()
1753 }
1754
1755 fn parse_multiplicative(&mut self) -> Result<SqlExpression, String> {
1756 self.parse_multiplicative()
1757 }
1758
1759 fn parse_method_args(&mut self) -> Result<Vec<SqlExpression>, String> {
1760 self.parse_method_args()
1761 }
1762}
1763
1764impl ParseComparison for Parser {
1766 fn current_token(&self) -> &Token {
1767 &self.current_token
1768 }
1769
1770 fn advance(&mut self) {
1771 self.advance();
1772 }
1773
1774 fn consume(&mut self, expected: Token) -> Result<(), String> {
1775 self.consume(expected)
1776 }
1777
1778 fn parse_primary(&mut self) -> Result<SqlExpression, String> {
1779 self.parse_primary()
1780 }
1781
1782 fn parse_additive(&mut self) -> Result<SqlExpression, String> {
1783 self.parse_additive()
1784 }
1785
1786 fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
1787 self.parse_expression_list()
1788 }
1789}
1790
1791impl ParseLogical for Parser {
1793 fn current_token(&self) -> &Token {
1794 &self.current_token
1795 }
1796
1797 fn advance(&mut self) {
1798 self.advance();
1799 }
1800
1801 fn consume(&mut self, expected: Token) -> Result<(), String> {
1802 self.consume(expected)
1803 }
1804
1805 fn parse_logical_and(&mut self) -> Result<SqlExpression, String> {
1806 self.parse_logical_and()
1807 }
1808
1809 fn parse_base_logical_expression(&mut self) -> Result<SqlExpression, String> {
1810 self.parse_comparison()
1813 }
1814
1815 fn parse_comparison(&mut self) -> Result<SqlExpression, String> {
1816 self.parse_comparison()
1817 }
1818
1819 fn parse_expression_list(&mut self) -> Result<Vec<SqlExpression>, String> {
1820 self.parse_expression_list()
1821 }
1822}
1823
1824impl ParseCase for Parser {
1826 fn current_token(&self) -> &Token {
1827 &self.current_token
1828 }
1829
1830 fn advance(&mut self) {
1831 self.advance();
1832 }
1833
1834 fn consume(&mut self, expected: Token) -> Result<(), String> {
1835 self.consume(expected)
1836 }
1837
1838 fn parse_expression(&mut self) -> Result<SqlExpression, String> {
1839 self.parse_expression()
1840 }
1841}
1842
1843fn is_sql_keyword(word: &str) -> bool {
1844 matches!(
1845 word.to_uppercase().as_str(),
1846 "SELECT"
1847 | "FROM"
1848 | "WHERE"
1849 | "AND"
1850 | "OR"
1851 | "IN"
1852 | "ORDER"
1853 | "BY"
1854 | "GROUP"
1855 | "HAVING"
1856 | "ASC"
1857 | "DESC"
1858 | "DISTINCT"
1859 )
1860}