1use crate::error::{HematiteError, Result};
22use crate::parser::ast::*;
23use crate::parser::lexer::{Lexer, Token};
24use crate::parser::types::{normalize_float_literal, LiteralValue, SqlTypeName};
25
26pub struct Parser {
27 tokens: Vec<Token>,
28 position: usize,
29 parameter_count: usize,
30}
31
32impl Parser {
33 pub fn new(tokens: Vec<Token>) -> Self {
34 Self {
35 tokens,
36 position: 0,
37 parameter_count: 0,
38 }
39 }
40
41 pub fn parse(&mut self) -> Result<Statement> {
42 if self.tokens.is_empty() {
43 return Err(HematiteError::ParseError("Empty input".to_string()));
44 }
45
46 let token = self.peek_token()?;
47
48 if let Token::Identifier(name) = &token {
49 if let Some(keyword) = uppercase_keyword_match(name, &TOP_LEVEL_KEYWORDS) {
50 return Err(HematiteError::ParseError(format!(
51 "Keyword '{}' must be capitalized as '{}'",
52 name, keyword
53 )));
54 }
55 }
56
57 match token {
58 Token::Begin => self.parse_begin(),
59 Token::Commit => self.parse_commit(),
60 Token::Rollback => self.parse_rollback(),
61 Token::Savepoint => self.parse_savepoint(),
62 Token::Release => self.parse_release_savepoint(),
63 Token::Explain => self.parse_explain(),
64 Token::Describe => self.parse_describe(),
65 Token::Show => self.parse_show(),
66 Token::Select | Token::With => self.parse_select(),
67 Token::Update => self.parse_update(),
68 Token::Insert => self.parse_insert(),
69 Token::Delete => self.parse_delete(),
70 Token::Create => self.parse_create(),
71 Token::Alter => self.parse_alter(),
72 Token::Drop => self.parse_drop(),
73 _ => Err(HematiteError::ParseError(format!(
74 "Expected BEGIN, COMMIT, ROLLBACK, SAVEPOINT, RELEASE, EXPLAIN, DESCRIBE, SHOW, SELECT, UPDATE, INSERT, DELETE, CREATE, ALTER, or DROP, found: {:?}",
75 token
76 ))),
77 }
78 }
79
80 fn parse_begin(&mut self) -> Result<Statement> {
81 self.consume_token(&Token::Begin)?;
82 self.consume_token(&Token::Semicolon)?;
83 Ok(Statement::Begin)
84 }
85
86 fn parse_commit(&mut self) -> Result<Statement> {
87 self.consume_token(&Token::Commit)?;
88 self.consume_token(&Token::Semicolon)?;
89 Ok(Statement::Commit)
90 }
91
92 fn parse_rollback(&mut self) -> Result<Statement> {
93 self.consume_token(&Token::Rollback)?;
94 if matches!(self.peek_token(), Ok(Token::To)) {
95 self.consume_token(&Token::To)?;
96 if matches!(self.peek_token(), Ok(Token::Savepoint)) {
97 self.consume_token(&Token::Savepoint)?;
98 }
99 let name = self.parse_identifier()?;
100 self.consume_token(&Token::Semicolon)?;
101 return Ok(Statement::RollbackToSavepoint(name));
102 }
103 self.consume_token(&Token::Semicolon)?;
104 Ok(Statement::Rollback)
105 }
106
107 fn parse_savepoint(&mut self) -> Result<Statement> {
108 self.consume_token(&Token::Savepoint)?;
109 let name = self.parse_identifier()?;
110 self.consume_token(&Token::Semicolon)?;
111 Ok(Statement::Savepoint(name))
112 }
113
114 fn parse_release_savepoint(&mut self) -> Result<Statement> {
115 self.consume_token(&Token::Release)?;
116 if matches!(self.peek_token(), Ok(Token::Savepoint)) {
117 self.consume_token(&Token::Savepoint)?;
118 }
119 let name = self.parse_identifier()?;
120 self.consume_token(&Token::Semicolon)?;
121 Ok(Statement::ReleaseSavepoint(name))
122 }
123
124 fn parse_select(&mut self) -> Result<Statement> {
125 let (query, into_table) = self.parse_query_statement(true, true)?;
126 match into_table {
127 Some(table) => Ok(Statement::SelectInto(SelectIntoStatement { table, query })),
128 None => Ok(Statement::Select(query)),
129 }
130 }
131
132 fn parse_explain(&mut self) -> Result<Statement> {
133 self.consume_token(&Token::Explain)?;
134 Ok(Statement::Explain(ExplainStatement {
135 statement: Box::new(self.parse()?),
136 }))
137 }
138
139 fn parse_describe(&mut self) -> Result<Statement> {
140 self.consume_token(&Token::Describe)?;
141 let table = self.parse_identifier()?;
142 self.consume_token(&Token::Semicolon)?;
143 Ok(Statement::Describe(DescribeStatement { table }))
144 }
145
146 fn parse_show(&mut self) -> Result<Statement> {
147 self.consume_token(&Token::Show)?;
148 match self.peek_token()? {
149 Token::Tables => {
150 self.consume_token(&Token::Tables)?;
151 self.consume_token(&Token::Semicolon)?;
152 Ok(Statement::ShowTables)
153 }
154 Token::Views => {
155 self.consume_token(&Token::Views)?;
156 self.consume_token(&Token::Semicolon)?;
157 Ok(Statement::ShowViews)
158 }
159 Token::Indexes => {
160 self.consume_token(&Token::Indexes)?;
161 let table = if matches!(self.peek_token(), Ok(Token::From)) {
162 self.consume_token(&Token::From)?;
163 Some(self.parse_identifier()?)
164 } else {
165 None
166 };
167 self.consume_token(&Token::Semicolon)?;
168 Ok(Statement::ShowIndexes(table))
169 }
170 Token::Triggers => {
171 self.consume_token(&Token::Triggers)?;
172 let table = if matches!(self.peek_token(), Ok(Token::From)) {
173 self.consume_token(&Token::From)?;
174 Some(self.parse_identifier()?)
175 } else {
176 None
177 };
178 self.consume_token(&Token::Semicolon)?;
179 Ok(Statement::ShowTriggers(table))
180 }
181 Token::Create => {
182 self.consume_token(&Token::Create)?;
183 match self.peek_token()? {
184 Token::Table => {
185 self.consume_token(&Token::Table)?;
186 let table = self.parse_identifier()?;
187 self.consume_token(&Token::Semicolon)?;
188 Ok(Statement::ShowCreateTable(table))
189 }
190 Token::View => {
191 self.consume_token(&Token::View)?;
192 let view = self.parse_identifier()?;
193 self.consume_token(&Token::Semicolon)?;
194 Ok(Statement::ShowCreateView(view))
195 }
196 token => Err(HematiteError::ParseError(format!(
197 "Expected TABLE or VIEW after SHOW CREATE, found: {:?}",
198 token
199 ))),
200 }
201 }
202 token => Err(HematiteError::ParseError(format!(
203 "Expected TABLES, VIEWS, INDEXES, TRIGGERS, or CREATE after SHOW, found: {:?}",
204 token
205 ))),
206 }
207 }
208
209 fn parse_query_statement(
210 &mut self,
211 expect_semicolon: bool,
212 allow_into: bool,
213 ) -> Result<(SelectStatement, Option<String>)> {
214 let with_clause = if matches!(self.peek_token(), Ok(Token::With)) {
215 self.parse_with_clause()?
216 } else {
217 Vec::new()
218 };
219
220 let (mut statement, into_table) =
221 self.parse_select_statement(expect_semicolon, allow_into)?;
222 statement.with_clause = with_clause;
223 Ok((statement, into_table))
224 }
225
226 fn parse_select_statement(
227 &mut self,
228 expect_semicolon: bool,
229 allow_into: bool,
230 ) -> Result<(SelectStatement, Option<String>)> {
231 self.consume_token(&Token::Select)?;
232 let distinct = if matches!(self.peek_token(), Ok(Token::Distinct)) {
233 self.consume_token(&Token::Distinct)?;
234 true
235 } else {
236 false
237 };
238
239 let (columns, column_aliases) = self.parse_select_columns()?;
240
241 let into_table = if allow_into && matches!(self.peek_token(), Ok(Token::Into)) {
242 self.consume_token(&Token::Into)?;
243 Some(self.parse_identifier()?)
244 } else {
245 None
246 };
247
248 self.consume_token(&Token::From)?;
249
250 let from = self.parse_from_clause()?;
251
252 let where_clause = if self.peek_token()? == Token::Where {
253 Some(self.parse_where_clause()?)
254 } else {
255 None
256 };
257
258 let group_by = if matches!(self.peek_token(), Ok(Token::Group)) {
259 self.parse_group_by_clause()?
260 } else {
261 Vec::new()
262 };
263
264 let having_clause = if matches!(self.peek_token(), Ok(Token::Having)) {
265 Some(self.parse_having_clause()?)
266 } else {
267 None
268 };
269
270 let order_by = if matches!(self.peek_token(), Ok(Token::Order)) {
271 self.parse_order_by_clause()?
272 } else {
273 Vec::new()
274 };
275
276 let (limit, limit_offset) = if matches!(self.peek_token(), Ok(Token::Limit)) {
277 self.parse_limit_clause()?
278 } else {
279 (None, None)
280 };
281
282 let offset = if limit_offset.is_some() {
283 limit_offset
284 } else if matches!(self.peek_token(), Ok(Token::Offset)) {
285 Some(self.parse_offset_clause()?)
286 } else {
287 None
288 };
289
290 let set_operation = if matches!(
291 self.peek_token(),
292 Ok(Token::Union | Token::Intersect | Token::Except)
293 ) {
294 let operator = match self.peek_token()? {
295 Token::Union => {
296 self.consume_token(&Token::Union)?;
297 if matches!(self.peek_token(), Ok(Token::All)) {
298 self.consume_token(&Token::All)?;
299 SetOperator::UnionAll
300 } else {
301 SetOperator::Union
302 }
303 }
304 Token::Intersect => {
305 self.consume_token(&Token::Intersect)?;
306 SetOperator::Intersect
307 }
308 Token::Except => {
309 self.consume_token(&Token::Except)?;
310 SetOperator::Except
311 }
312 token => {
313 return Err(HematiteError::ParseError(format!(
314 "Expected set operation, found: {:?}",
315 token
316 )))
317 }
318 };
319
320 Some(SetOperation {
321 operator,
322 right: Box::new(self.parse_select_statement(false, false)?.0),
323 })
324 } else {
325 None
326 };
327
328 if expect_semicolon {
329 self.consume_token(&Token::Semicolon)?;
330 }
331
332 Ok((
333 SelectStatement {
334 with_clause: Vec::new(),
335 distinct,
336 columns,
337 column_aliases,
338 from,
339 where_clause,
340 group_by,
341 having_clause,
342 order_by,
343 limit,
344 offset,
345 set_operation,
346 },
347 into_table,
348 ))
349 }
350
351 fn parse_with_clause(&mut self) -> Result<Vec<CommonTableExpression>> {
352 self.consume_token(&Token::With)?;
353 let recursive = if matches!(self.peek_token(), Ok(Token::Recursive)) {
354 self.consume_token(&Token::Recursive)?;
355 true
356 } else {
357 false
358 };
359 let mut ctes = Vec::new();
360
361 loop {
362 let name = self.parse_identifier()?;
363 self.consume_token(&Token::As)?;
364 self.consume_token(&Token::LeftParen)?;
365 let query = self.parse_query_statement(false, false)?.0;
366 self.consume_token(&Token::RightParen)?;
367 ctes.push(CommonTableExpression {
368 name,
369 recursive,
370 query: Box::new(query),
371 });
372
373 if matches!(self.peek_token(), Ok(Token::Comma)) {
374 self.consume_token(&Token::Comma)?;
375 continue;
376 }
377
378 break;
379 }
380
381 Ok(ctes)
382 }
383
384 fn parse_select_columns(&mut self) -> Result<(Vec<SelectItem>, Vec<Option<String>>)> {
385 let mut columns = Vec::new();
386 let mut aliases = Vec::new();
387
388 let token = self.peek_token()?;
389
390 if token == Token::Asterisk {
391 self.consume_token(&Token::Asterisk)?;
392 columns.push(SelectItem::Wildcard);
393 aliases.push(None);
394 } else {
395 loop {
396 columns.push(self.parse_select_item()?);
397 aliases.push(self.parse_optional_alias()?);
398
399 if self.peek_token()? == Token::Comma {
400 self.consume_token(&Token::Comma)?;
401 continue;
402 } else {
403 break;
404 }
405 }
406 }
407
408 Ok((columns, aliases))
409 }
410
411 fn parse_select_item(&mut self) -> Result<SelectItem> {
412 let token = self.peek_token()?;
413 match token {
414 Token::Count | Token::Sum | Token::Avg | Token::Min | Token::Max => {
415 let expr = self.parse_aggregate_expression()?;
416 if matches!(self.peek_token(), Ok(Token::Over)) {
417 self.parse_window_select_item(expr)
418 } else {
419 self.aggregate_expression_to_select_item(expr)
420 }
421 }
422 Token::Identifier(ref name)
423 if self.next_token_is(&Token::LeftParen) && is_window_only_function_name(name) =>
424 {
425 self.parse_window_only_select_item()
426 }
427 Token::Identifier(_)
428 | Token::StringLiteral(_)
429 | Token::BlobLiteral(_)
430 | Token::NumberLiteral(_)
431 | Token::BooleanLiteral(_)
432 | Token::Null
433 | Token::NullLiteral
434 | Token::Placeholder
435 | Token::LeftParen
436 | Token::Case
437 | Token::Cast
438 | Token::Interval
439 | Token::Date
440 | Token::Time
441 | Token::DateTime
442 | Token::Left
443 | Token::Right
444 | Token::Minus => {
445 let expr = self.parse_expression()?;
446 Ok(match expr {
447 Expression::Column(name) => SelectItem::Column(name),
448 expr => SelectItem::Expression(expr),
449 })
450 }
451 _ => Err(HematiteError::ParseError(format!(
452 "Expected select item or aggregate, found: {:?}",
453 token
454 ))),
455 }
456 }
457
458 fn parse_aggregate_expression(&mut self) -> Result<Expression> {
459 let function = match self.peek_token()? {
460 Token::Count => {
461 self.consume_token(&Token::Count)?;
462 AggregateFunction::Count
463 }
464 Token::Sum => {
465 self.consume_token(&Token::Sum)?;
466 AggregateFunction::Sum
467 }
468 Token::Avg => {
469 self.consume_token(&Token::Avg)?;
470 AggregateFunction::Avg
471 }
472 Token::Min => {
473 self.consume_token(&Token::Min)?;
474 AggregateFunction::Min
475 }
476 Token::Max => {
477 self.consume_token(&Token::Max)?;
478 AggregateFunction::Max
479 }
480 token => {
481 return Err(HematiteError::ParseError(format!(
482 "Expected aggregate function, found: {:?}",
483 token
484 )))
485 }
486 };
487
488 self.consume_token(&Token::LeftParen)?;
489 if function == AggregateFunction::Count && matches!(self.peek_token(), Ok(Token::Asterisk))
490 {
491 self.consume_token(&Token::Asterisk)?;
492 self.consume_token(&Token::RightParen)?;
493 return Ok(Expression::AggregateCall {
494 function,
495 target: AggregateTarget::All,
496 });
497 }
498
499 let column = self.parse_identifier_reference()?;
500 self.consume_token(&Token::RightParen)?;
501
502 Ok(Expression::AggregateCall {
503 function,
504 target: AggregateTarget::Column(column),
505 })
506 }
507
508 fn aggregate_expression_to_select_item(&self, expression: Expression) -> Result<SelectItem> {
509 match expression {
510 Expression::AggregateCall {
511 function: AggregateFunction::Count,
512 target: AggregateTarget::All,
513 } => Ok(SelectItem::CountAll),
514 Expression::AggregateCall {
515 function,
516 target: AggregateTarget::Column(column),
517 } => Ok(SelectItem::Aggregate { function, column }),
518 _ => Err(HematiteError::InternalError(
519 "aggregate expression parser returned a non-aggregate expression".to_string(),
520 )),
521 }
522 }
523
524 fn parse_window_only_select_item(&mut self) -> Result<SelectItem> {
525 let function_name = self.parse_identifier()?;
526 self.consume_token(&Token::LeftParen)?;
527 self.consume_token(&Token::RightParen)?;
528 let function = match function_name.to_ascii_uppercase().as_str() {
529 "ROW_NUMBER" => WindowFunction::RowNumber,
530 "RANK" => WindowFunction::Rank,
531 "DENSE_RANK" => WindowFunction::DenseRank,
532 _ => {
533 return Err(HematiteError::ParseError(format!(
534 "Unsupported window function '{}'",
535 function_name
536 )))
537 }
538 };
539 self.parse_window_item(function)
540 }
541
542 fn parse_window_select_item(&mut self, expression: Expression) -> Result<SelectItem> {
543 let Expression::AggregateCall { function, target } = expression else {
544 return Err(HematiteError::ParseError(
545 "OVER(...) currently requires a ranking or aggregate function".to_string(),
546 ));
547 };
548 self.parse_window_item(WindowFunction::Aggregate { function, target })
549 }
550
551 fn parse_window_item(&mut self, function: WindowFunction) -> Result<SelectItem> {
552 self.consume_token(&Token::Over)?;
553 self.consume_token(&Token::LeftParen)?;
554 let partition_by = if matches!(self.peek_token(), Ok(Token::Partition)) {
555 self.consume_token(&Token::Partition)?;
556 self.consume_token(&Token::By)?;
557 let mut exprs = Vec::new();
558 loop {
559 exprs.push(self.parse_expression()?);
560 if matches!(self.peek_token(), Ok(Token::Comma)) {
561 self.consume_token(&Token::Comma)?;
562 continue;
563 }
564 break;
565 }
566 exprs
567 } else {
568 Vec::new()
569 };
570 let order_by = if matches!(self.peek_token(), Ok(Token::Order)) {
571 self.parse_order_by_clause()?
572 } else {
573 Vec::new()
574 };
575 self.consume_token(&Token::RightParen)?;
576 Ok(SelectItem::Window {
577 function,
578 window: WindowSpec {
579 partition_by,
580 order_by,
581 },
582 })
583 }
584
585 fn parse_from_clause(&mut self) -> Result<TableReference> {
586 let from = self.parse_table_reference()?;
587 self.parse_join_chain(from)
588 }
589
590 fn parse_table_reference(&mut self) -> Result<TableReference> {
591 match self.peek_token()? {
592 Token::Identifier(_) => {
593 let table_name = self.parse_identifier()?;
594 let alias = self.parse_optional_alias()?;
595 Ok(TableReference::Table(table_name, alias))
596 }
597 Token::LeftParen => {
598 self.consume_token(&Token::LeftParen)?;
599 let subquery = self.parse_query_statement(false, false)?.0;
600 self.consume_token(&Token::RightParen)?;
601 let alias = self.parse_required_alias("derived table")?;
602 Ok(TableReference::Derived {
603 subquery: Box::new(subquery),
604 alias,
605 })
606 }
607 _ => Err(HematiteError::ParseError(format!(
608 "Expected table name, found: {:?}",
609 self.peek_token()?
610 ))),
611 }
612 }
613
614 fn parse_join_chain(&mut self, mut from: TableReference) -> Result<TableReference> {
615 loop {
616 match self.peek_token()? {
617 Token::Comma => {
618 self.consume_token(&Token::Comma)?;
619 let right = self.parse_table_reference()?;
620 from = TableReference::CrossJoin(Box::new(from), Box::new(right));
621 }
622 Token::Join => {
623 self.consume_token(&Token::Join)?;
624 let right = self.parse_table_reference()?;
625 self.consume_token(&Token::On)?;
626 let on = self.parse_or_condition()?;
627 from = TableReference::InnerJoin {
628 left: Box::new(from),
629 right: Box::new(right),
630 on,
631 };
632 }
633 Token::Inner => {
634 self.consume_token(&Token::Inner)?;
635 self.consume_token(&Token::Join)?;
636 let right = self.parse_table_reference()?;
637 self.consume_token(&Token::On)?;
638 let on = self.parse_or_condition()?;
639 from = TableReference::InnerJoin {
640 left: Box::new(from),
641 right: Box::new(right),
642 on,
643 };
644 }
645 Token::Left => {
646 self.consume_token(&Token::Left)?;
647 if matches!(self.peek_token(), Ok(Token::Outer)) {
648 self.consume_token(&Token::Outer)?;
649 }
650 self.consume_token(&Token::Join)?;
651 let right = self.parse_table_reference()?;
652 self.consume_token(&Token::On)?;
653 let on = self.parse_or_condition()?;
654 from = TableReference::LeftJoin {
655 left: Box::new(from),
656 right: Box::new(right),
657 on,
658 };
659 }
660 Token::Right => {
661 self.consume_token(&Token::Right)?;
662 if matches!(self.peek_token(), Ok(Token::Outer)) {
663 self.consume_token(&Token::Outer)?;
664 }
665 self.consume_token(&Token::Join)?;
666 let right = self.parse_table_reference()?;
667 self.consume_token(&Token::On)?;
668 let on = self.parse_or_condition()?;
669 from = TableReference::RightJoin {
670 left: Box::new(from),
671 right: Box::new(right),
672 on,
673 };
674 }
675 Token::Full => {
676 self.consume_token(&Token::Full)?;
677 if matches!(self.peek_token(), Ok(Token::Outer)) {
678 self.consume_token(&Token::Outer)?;
679 }
680 self.consume_token(&Token::Join)?;
681 let right = self.parse_table_reference()?;
682 self.consume_token(&Token::On)?;
683 let on = self.parse_or_condition()?;
684 from = TableReference::FullOuterJoin {
685 left: Box::new(from),
686 right: Box::new(right),
687 on,
688 };
689 }
690 _ => break,
691 }
692 }
693
694 Ok(from)
695 }
696
697 fn parse_where_clause(&mut self) -> Result<WhereClause> {
698 self.consume_token(&Token::Where)?;
699
700 let conditions = self.parse_conditions()?;
701
702 Ok(WhereClause { conditions })
703 }
704
705 fn parse_order_by_clause(&mut self) -> Result<Vec<OrderByItem>> {
706 self.consume_token(&Token::Order)?;
707 self.consume_token(&Token::By)?;
708
709 let mut items = Vec::new();
710 loop {
711 let column = self.parse_identifier_reference()?;
712 let direction = match self.peek_token() {
713 Ok(Token::Asc) => {
714 self.consume_token(&Token::Asc)?;
715 SortDirection::Asc
716 }
717 Ok(Token::Desc) => {
718 self.consume_token(&Token::Desc)?;
719 SortDirection::Desc
720 }
721 _ => SortDirection::Asc,
722 };
723
724 items.push(OrderByItem { column, direction });
725
726 if matches!(self.peek_token(), Ok(Token::Comma)) {
727 self.consume_token(&Token::Comma)?;
728 continue;
729 }
730
731 break;
732 }
733
734 Ok(items)
735 }
736
737 fn parse_group_by_clause(&mut self) -> Result<Vec<Expression>> {
738 self.consume_token(&Token::Group)?;
739 self.consume_token(&Token::By)?;
740
741 let mut items = Vec::new();
742 loop {
743 items.push(self.parse_expression()?);
744 if matches!(self.peek_token(), Ok(Token::Comma)) {
745 self.consume_token(&Token::Comma)?;
746 continue;
747 }
748 break;
749 }
750 Ok(items)
751 }
752
753 fn parse_having_clause(&mut self) -> Result<WhereClause> {
754 self.consume_token(&Token::Having)?;
755 let conditions = self.parse_conditions()?;
756 Ok(WhereClause { conditions })
757 }
758
759 fn parse_limit_clause(&mut self) -> Result<(Option<usize>, Option<usize>)> {
760 self.consume_token(&Token::Limit)?;
761 let first = self.parse_non_negative_integer_clause("LIMIT")?;
762 if matches!(self.peek_token(), Ok(Token::Comma)) {
763 self.consume_token(&Token::Comma)?;
764 let second = self.parse_non_negative_integer_clause("LIMIT")?;
765 Ok((Some(second), Some(first)))
766 } else {
767 Ok((Some(first), None))
768 }
769 }
770
771 fn parse_offset_clause(&mut self) -> Result<usize> {
772 self.consume_token(&Token::Offset)?;
773 self.parse_non_negative_integer_clause("OFFSET")
774 }
775
776 fn parse_non_negative_integer_clause(&mut self, clause_name: &str) -> Result<usize> {
777 match self.peek_token()? {
778 Token::NumberLiteral(value) => {
779 let parsed = parse_non_negative_integer_literal(&value, clause_name)?;
780 self.consume_token(&Token::NumberLiteral(value))?;
781 Ok(parsed)
782 }
783 token => Err(HematiteError::ParseError(format!(
784 "Expected non-negative integer after {}, found: {:?}",
785 clause_name, token
786 ))),
787 }
788 }
789
790 fn parse_conditions(&mut self) -> Result<Vec<Condition>> {
791 Ok(vec![self.parse_or_condition()?])
792 }
793
794 fn parse_or_condition(&mut self) -> Result<Condition> {
795 let mut condition = self.parse_and_condition()?;
796
797 while matches!(self.peek_token(), Ok(Token::Or)) {
798 self.consume_token(&Token::Or)?;
799 let right = self.parse_and_condition()?;
800 condition = Condition::Logical {
801 left: Box::new(condition),
802 operator: LogicalOperator::Or,
803 right: Box::new(right),
804 };
805 }
806
807 Ok(condition)
808 }
809
810 fn parse_and_condition(&mut self) -> Result<Condition> {
811 let mut condition = self.parse_primary_condition()?;
812
813 while matches!(self.peek_token(), Ok(Token::And)) {
814 self.consume_token(&Token::And)?;
815 let right = self.parse_primary_condition()?;
816 condition = Condition::Logical {
817 left: Box::new(condition),
818 operator: LogicalOperator::And,
819 right: Box::new(right),
820 };
821 }
822
823 Ok(condition)
824 }
825
826 fn parse_primary_condition(&mut self) -> Result<Condition> {
827 if self.peek_token()? == Token::Not {
828 self.consume_token(&Token::Not)?;
829 if self.peek_token()? == Token::Exists {
830 self.consume_token(&Token::Exists)?;
831 return self.parse_exists_condition(true);
832 }
833 return Ok(Condition::Not(Box::new(self.parse_primary_condition()?)));
834 }
835
836 if self.peek_token()? == Token::Exists {
837 self.consume_token(&Token::Exists)?;
838 return self.parse_exists_condition(false);
839 }
840
841 if self.peek_token()? == Token::LeftParen {
842 self.consume_token(&Token::LeftParen)?;
843 let condition = self.parse_or_condition()?;
844 self.consume_token(&Token::RightParen)?;
845 Ok(condition)
846 } else {
847 self.parse_condition()
848 }
849 }
850
851 fn parse_condition(&mut self) -> Result<Condition> {
852 let left = self.parse_value_expression()?;
853
854 if matches!(self.peek_token(), Ok(Token::Not)) {
855 self.consume_token(&Token::Not)?;
856 if matches!(self.peek_token(), Ok(Token::In)) {
857 self.consume_token(&Token::In)?;
858 return self.parse_in_list_condition(left, true);
859 }
860 if matches!(self.peek_token(), Ok(Token::Like)) {
861 self.consume_token(&Token::Like)?;
862 return self.parse_like_condition(left, true);
863 }
864 if matches!(self.peek_token(), Ok(Token::Between)) {
865 self.consume_token(&Token::Between)?;
866 return self.parse_between_condition(left, true);
867 }
868 return Err(HematiteError::ParseError(
869 "Expected IN, LIKE, or BETWEEN after NOT in predicate".to_string(),
870 ));
871 }
872
873 if matches!(self.peek_token(), Ok(Token::In)) {
874 self.consume_token(&Token::In)?;
875 return self.parse_in_list_condition(left, false);
876 }
877
878 if matches!(self.peek_token(), Ok(Token::Between)) {
879 self.consume_token(&Token::Between)?;
880 return self.parse_between_condition(left, false);
881 }
882
883 if matches!(self.peek_token(), Ok(Token::Like)) {
884 self.consume_token(&Token::Like)?;
885 return self.parse_like_condition(left, false);
886 }
887
888 if matches!(self.peek_token(), Ok(Token::Is)) {
889 self.consume_token(&Token::Is)?;
890 let is_not = if matches!(self.peek_token(), Ok(Token::Not)) {
891 self.consume_token(&Token::Not)?;
892 true
893 } else {
894 false
895 };
896 self.consume_token(&Token::Null)?;
897 return Ok(Condition::NullCheck { expr: left, is_not });
898 }
899
900 let operator = self.parse_comparison_operator()?;
901
902 let right = self.parse_value_expression()?;
903
904 Ok(Condition::Comparison {
905 left,
906 operator,
907 right,
908 })
909 }
910
911 fn parse_in_list_condition(&mut self, expr: Expression, is_not: bool) -> Result<Condition> {
912 self.consume_token(&Token::LeftParen)?;
913 if matches!(self.peek_token(), Ok(Token::Select | Token::With)) {
914 let subquery = self.parse_query_statement(false, false)?.0;
915 self.consume_token(&Token::RightParen)?;
916 return Ok(Condition::InSubquery {
917 expr,
918 subquery: Box::new(subquery),
919 is_not,
920 });
921 }
922
923 let mut values = Vec::new();
924
925 loop {
926 values.push(self.parse_expression()?);
927 if matches!(self.peek_token(), Ok(Token::Comma)) {
928 self.consume_token(&Token::Comma)?;
929 continue;
930 }
931 break;
932 }
933
934 if values.is_empty() {
935 return Err(HematiteError::ParseError(
936 "IN list must contain at least one expression".to_string(),
937 ));
938 }
939
940 self.consume_token(&Token::RightParen)?;
941 Ok(Condition::InList {
942 expr,
943 values,
944 is_not,
945 })
946 }
947
948 fn parse_exists_condition(&mut self, is_not: bool) -> Result<Condition> {
949 self.consume_token(&Token::LeftParen)?;
950 let subquery = self.parse_query_statement(false, false)?.0;
951 self.consume_token(&Token::RightParen)?;
952 Ok(Condition::Exists {
953 subquery: Box::new(subquery),
954 is_not,
955 })
956 }
957
958 fn parse_between_condition(&mut self, expr: Expression, is_not: bool) -> Result<Condition> {
959 let lower = self.parse_value_expression()?;
960 self.consume_token(&Token::And)?;
961 let upper = self.parse_value_expression()?;
962 Ok(Condition::Between {
963 expr,
964 lower,
965 upper,
966 is_not,
967 })
968 }
969
970 fn parse_like_condition(&mut self, expr: Expression, is_not: bool) -> Result<Condition> {
971 let pattern = self.parse_value_expression()?;
972 Ok(Condition::Like {
973 expr,
974 pattern,
975 is_not,
976 })
977 }
978
979 fn parse_expression(&mut self) -> Result<Expression> {
980 self.parse_or_expression()
981 }
982
983 fn parse_or_expression(&mut self) -> Result<Expression> {
984 let mut expr = self.parse_and_expression()?;
985
986 while matches!(self.peek_token(), Ok(Token::Or)) {
987 self.consume_token(&Token::Or)?;
988 let right = self.parse_and_expression()?;
989 expr = Expression::Logical {
990 left: Box::new(expr),
991 operator: LogicalOperator::Or,
992 right: Box::new(right),
993 };
994 }
995
996 Ok(expr)
997 }
998
999 fn parse_and_expression(&mut self) -> Result<Expression> {
1000 let mut expr = self.parse_not_expression()?;
1001
1002 while matches!(self.peek_token(), Ok(Token::And)) {
1003 self.consume_token(&Token::And)?;
1004 let right = self.parse_not_expression()?;
1005 expr = Expression::Logical {
1006 left: Box::new(expr),
1007 operator: LogicalOperator::And,
1008 right: Box::new(right),
1009 };
1010 }
1011
1012 Ok(expr)
1013 }
1014
1015 fn parse_not_expression(&mut self) -> Result<Expression> {
1016 if matches!(self.peek_token(), Ok(Token::Not)) {
1017 self.consume_token(&Token::Not)?;
1018 return Ok(Expression::UnaryNot(Box::new(self.parse_not_expression()?)));
1019 }
1020
1021 self.parse_predicate_expression()
1022 }
1023
1024 fn parse_predicate_expression(&mut self) -> Result<Expression> {
1025 if matches!(self.peek_token(), Ok(Token::Exists)) {
1026 self.consume_token(&Token::Exists)?;
1027 return self.parse_exists_expression(false);
1028 }
1029
1030 let left = self.parse_value_expression()?;
1031
1032 if matches!(self.peek_token(), Ok(Token::Not)) {
1033 self.consume_token(&Token::Not)?;
1034 if matches!(self.peek_token(), Ok(Token::In)) {
1035 self.consume_token(&Token::In)?;
1036 return self.parse_in_list_expression(left, true);
1037 }
1038 if matches!(self.peek_token(), Ok(Token::Like)) {
1039 self.consume_token(&Token::Like)?;
1040 return self.parse_like_expression(left, true);
1041 }
1042 if matches!(self.peek_token(), Ok(Token::Between)) {
1043 self.consume_token(&Token::Between)?;
1044 return self.parse_between_expression(left, true);
1045 }
1046 return Err(HematiteError::ParseError(
1047 "Expected IN, LIKE, or BETWEEN after NOT in expression".to_string(),
1048 ));
1049 }
1050
1051 if matches!(self.peek_token(), Ok(Token::In)) {
1052 self.consume_token(&Token::In)?;
1053 return self.parse_in_list_expression(left, false);
1054 }
1055
1056 if matches!(self.peek_token(), Ok(Token::Between)) {
1057 self.consume_token(&Token::Between)?;
1058 return self.parse_between_expression(left, false);
1059 }
1060
1061 if matches!(self.peek_token(), Ok(Token::Like)) {
1062 self.consume_token(&Token::Like)?;
1063 return self.parse_like_expression(left, false);
1064 }
1065
1066 if matches!(self.peek_token(), Ok(Token::Is)) {
1067 self.consume_token(&Token::Is)?;
1068 let is_not = if matches!(self.peek_token(), Ok(Token::Not)) {
1069 self.consume_token(&Token::Not)?;
1070 true
1071 } else {
1072 false
1073 };
1074 self.consume_token(&Token::Null)?;
1075 return Ok(Expression::NullCheck {
1076 expr: Box::new(left),
1077 is_not,
1078 });
1079 }
1080
1081 if self.peek_token_starts_comparison() {
1082 let operator = self.parse_comparison_operator()?;
1083 let right = self.parse_value_expression()?;
1084 return Ok(Expression::Comparison {
1085 left: Box::new(left),
1086 operator,
1087 right: Box::new(right),
1088 });
1089 }
1090
1091 Ok(left)
1092 }
1093
1094 fn parse_value_expression(&mut self) -> Result<Expression> {
1095 self.parse_additive_expression()
1096 }
1097
1098 fn parse_in_list_expression(&mut self, expr: Expression, is_not: bool) -> Result<Expression> {
1099 self.consume_token(&Token::LeftParen)?;
1100 if matches!(self.peek_token(), Ok(Token::Select | Token::With)) {
1101 let subquery = self.parse_query_statement(false, false)?.0;
1102 self.consume_token(&Token::RightParen)?;
1103 return Ok(Expression::InSubquery {
1104 expr: Box::new(expr),
1105 subquery: Box::new(subquery),
1106 is_not,
1107 });
1108 }
1109
1110 let mut values = Vec::new();
1111
1112 loop {
1113 values.push(self.parse_expression()?);
1114 if matches!(self.peek_token(), Ok(Token::Comma)) {
1115 self.consume_token(&Token::Comma)?;
1116 continue;
1117 }
1118 break;
1119 }
1120
1121 if values.is_empty() {
1122 return Err(HematiteError::ParseError(
1123 "IN list must contain at least one expression".to_string(),
1124 ));
1125 }
1126
1127 self.consume_token(&Token::RightParen)?;
1128 Ok(Expression::InList {
1129 expr: Box::new(expr),
1130 values,
1131 is_not,
1132 })
1133 }
1134
1135 fn parse_exists_expression(&mut self, is_not: bool) -> Result<Expression> {
1136 self.consume_token(&Token::LeftParen)?;
1137 let subquery = self.parse_query_statement(false, false)?.0;
1138 self.consume_token(&Token::RightParen)?;
1139 Ok(Expression::Exists {
1140 subquery: Box::new(subquery),
1141 is_not,
1142 })
1143 }
1144
1145 fn parse_between_expression(&mut self, expr: Expression, is_not: bool) -> Result<Expression> {
1146 let lower = self.parse_value_expression()?;
1147 self.consume_token(&Token::And)?;
1148 let upper = self.parse_value_expression()?;
1149 Ok(Expression::Between {
1150 expr: Box::new(expr),
1151 lower: Box::new(lower),
1152 upper: Box::new(upper),
1153 is_not,
1154 })
1155 }
1156
1157 fn parse_like_expression(&mut self, expr: Expression, is_not: bool) -> Result<Expression> {
1158 let pattern = self.parse_value_expression()?;
1159 Ok(Expression::Like {
1160 expr: Box::new(expr),
1161 pattern: Box::new(pattern),
1162 is_not,
1163 })
1164 }
1165
1166 fn parse_additive_expression(&mut self) -> Result<Expression> {
1167 let mut expr = self.parse_multiplicative_expression()?;
1168
1169 loop {
1170 let operator = match self.peek_token() {
1171 Ok(Token::Plus) => ArithmeticOperator::Add,
1172 Ok(Token::Minus) => ArithmeticOperator::Subtract,
1173 _ => break,
1174 };
1175
1176 match operator {
1177 ArithmeticOperator::Add => self.consume_token(&Token::Plus)?,
1178 ArithmeticOperator::Subtract => self.consume_token(&Token::Minus)?,
1179 ArithmeticOperator::Multiply
1180 | ArithmeticOperator::Divide
1181 | ArithmeticOperator::Modulo => unreachable!(),
1182 }
1183
1184 let right = self.parse_multiplicative_expression()?;
1185 expr = Expression::Binary {
1186 left: Box::new(expr),
1187 operator,
1188 right: Box::new(right),
1189 };
1190 }
1191
1192 Ok(expr)
1193 }
1194
1195 fn parse_multiplicative_expression(&mut self) -> Result<Expression> {
1196 let mut expr = self.parse_unary_expression()?;
1197
1198 loop {
1199 let operator = match self.peek_token() {
1200 Ok(Token::Asterisk) => ArithmeticOperator::Multiply,
1201 Ok(Token::Slash) => ArithmeticOperator::Divide,
1202 Ok(Token::Percent) => ArithmeticOperator::Modulo,
1203 _ => break,
1204 };
1205
1206 match operator {
1207 ArithmeticOperator::Multiply => self.consume_token(&Token::Asterisk)?,
1208 ArithmeticOperator::Divide => self.consume_token(&Token::Slash)?,
1209 ArithmeticOperator::Modulo => self.consume_token(&Token::Percent)?,
1210 ArithmeticOperator::Add | ArithmeticOperator::Subtract => unreachable!(),
1211 }
1212
1213 let right = self.parse_unary_expression()?;
1214 expr = Expression::Binary {
1215 left: Box::new(expr),
1216 operator,
1217 right: Box::new(right),
1218 };
1219 }
1220
1221 Ok(expr)
1222 }
1223
1224 fn parse_unary_expression(&mut self) -> Result<Expression> {
1225 if matches!(self.peek_token(), Ok(Token::Minus)) {
1226 self.consume_token(&Token::Minus)?;
1227 return Ok(Expression::UnaryMinus(Box::new(
1228 self.parse_unary_expression()?,
1229 )));
1230 }
1231
1232 self.parse_primary_expression()
1233 }
1234
1235 fn parse_primary_expression(&mut self) -> Result<Expression> {
1236 let token = self.peek_token()?;
1237 match token {
1238 Token::Cast => self.parse_cast_expression(),
1239 Token::Case => self.parse_case_expression(),
1240 Token::Interval => self.parse_interval_literal(),
1241 Token::Date | Token::Time | Token::DateTime
1242 if self.next_token_is(&Token::LeftParen) =>
1243 {
1244 self.parse_scalar_function_expression()
1245 }
1246 Token::Left | Token::Right if self.next_token_is(&Token::LeftParen) => {
1247 self.parse_scalar_function_expression()
1248 }
1249 Token::Count | Token::Sum | Token::Avg | Token::Min | Token::Max => {
1250 self.parse_aggregate_expression()
1251 }
1252 Token::Identifier(_) if self.next_token_is(&Token::LeftParen) => {
1253 self.parse_scalar_function_expression()
1254 }
1255 Token::Identifier(_) => Ok(Expression::Column(self.parse_identifier_reference()?)),
1256 Token::StringLiteral(value) => {
1257 self.consume_token(&Token::StringLiteral(value.clone()))?;
1258 Ok(Expression::Literal(LiteralValue::Text(value)))
1259 }
1260 Token::BlobLiteral(value) => {
1261 self.consume_token(&Token::BlobLiteral(value.clone()))?;
1262 Ok(Expression::Literal(LiteralValue::Blob(value)))
1263 }
1264 Token::NumberLiteral(value) => {
1265 self.consume_token(&Token::NumberLiteral(value.clone()))?;
1266 if value.contains('.') {
1267 Ok(Expression::Literal(LiteralValue::Float(
1268 normalize_float_literal(&value),
1269 )))
1270 } else {
1271 Ok(Expression::Literal(LiteralValue::Integer(
1272 value.parse::<i128>().map_err(|_| {
1273 HematiteError::ParseError(format!(
1274 "Invalid integer literal '{}'",
1275 value
1276 ))
1277 })?,
1278 )))
1279 }
1280 }
1281 Token::BooleanLiteral(value) => {
1282 self.consume_token(&Token::BooleanLiteral(value.clone()))?;
1283 Ok(Expression::Literal(LiteralValue::Boolean(value)))
1284 }
1285 Token::NullLiteral | Token::Null => {
1286 if token == Token::NullLiteral {
1288 self.consume_token(&Token::NullLiteral)?;
1289 } else {
1290 self.consume_token(&Token::Null)?;
1291 }
1292 Ok(Expression::Literal(LiteralValue::Null))
1293 }
1294 Token::Placeholder => {
1295 self.consume_token(&Token::Placeholder)?;
1296 let index = self.parameter_count;
1297 self.parameter_count += 1;
1298 Ok(Expression::Parameter(index))
1299 }
1300 Token::LeftParen => {
1301 self.consume_token(&Token::LeftParen)?;
1302 if matches!(self.peek_token(), Ok(Token::Select | Token::With)) {
1303 let subquery = self.parse_query_statement(false, false)?.0;
1304 self.consume_token(&Token::RightParen)?;
1305 return Ok(Expression::ScalarSubquery(Box::new(subquery)));
1306 }
1307 let expr = self.parse_expression()?;
1308 self.consume_token(&Token::RightParen)?;
1309 Ok(expr)
1310 }
1311 _ => Err(HematiteError::ParseError(format!(
1312 "Expected expression, found: {:?}",
1313 token
1314 ))),
1315 }
1316 }
1317
1318 fn parse_interval_literal(&mut self) -> Result<Expression> {
1319 self.consume_token(&Token::Interval)?;
1320 let value = match self.peek_token()? {
1321 Token::StringLiteral(value) => {
1322 self.consume_token(&Token::StringLiteral(value.clone()))?;
1323 value
1324 }
1325 token => {
1326 return Err(HematiteError::ParseError(format!(
1327 "Expected INTERVAL string literal, found: {:?}",
1328 token
1329 )))
1330 }
1331 };
1332 let leading = self.parse_identifier()?.to_ascii_uppercase();
1333 self.consume_token(&Token::To)?;
1334 let trailing = self.parse_identifier()?.to_ascii_uppercase();
1335
1336 let qualifier = match (leading.as_str(), trailing.as_str()) {
1337 ("YEAR", "MONTH") => IntervalQualifier::YearToMonth,
1338 ("DAY", "SECOND") => IntervalQualifier::DayToSecond,
1339 _ => {
1340 return Err(HematiteError::ParseError(format!(
1341 "Unsupported INTERVAL qualifier '{} TO {}'",
1342 leading, trailing
1343 )))
1344 }
1345 };
1346
1347 Ok(Expression::IntervalLiteral { value, qualifier })
1348 }
1349
1350 fn parse_case_expression(&mut self) -> Result<Expression> {
1351 self.consume_token(&Token::Case)?;
1352 let mut branches = Vec::new();
1353
1354 while matches!(self.peek_token(), Ok(Token::When)) {
1355 self.consume_token(&Token::When)?;
1356 let condition = self.parse_expression()?;
1357 self.consume_token(&Token::Then)?;
1358 let result = self.parse_expression()?;
1359 branches.push(CaseWhenClause { condition, result });
1360 }
1361
1362 if branches.is_empty() {
1363 return Err(HematiteError::ParseError(
1364 "CASE expression requires at least one WHEN ... THEN branch".to_string(),
1365 ));
1366 }
1367
1368 let else_expr = if matches!(self.peek_token(), Ok(Token::Else)) {
1369 self.consume_token(&Token::Else)?;
1370 Some(Box::new(self.parse_expression()?))
1371 } else {
1372 None
1373 };
1374
1375 self.consume_token(&Token::End)?;
1376 Ok(Expression::Case {
1377 branches,
1378 else_expr,
1379 })
1380 }
1381
1382 fn parse_cast_expression(&mut self) -> Result<Expression> {
1383 self.consume_token(&Token::Cast)?;
1384 self.consume_token(&Token::LeftParen)?;
1385 let expr = self.parse_expression()?;
1386 self.consume_token(&Token::As)?;
1387 let target_type = self.parse_data_type()?;
1388 self.consume_token(&Token::RightParen)?;
1389 Ok(Expression::Cast {
1390 expr: Box::new(expr),
1391 target_type,
1392 })
1393 }
1394
1395 fn parse_scalar_function_expression(&mut self) -> Result<Expression> {
1396 let function_name = self.parse_scalar_function_name()?;
1397 let function = ScalarFunction::from_identifier(&function_name).ok_or_else(|| {
1398 HematiteError::ParseError(format!("Unsupported scalar function '{}'", function_name))
1399 })?;
1400 self.consume_token(&Token::LeftParen)?;
1401
1402 let mut args = Vec::new();
1403 if !matches!(self.peek_token(), Ok(Token::RightParen)) {
1404 loop {
1405 args.push(self.parse_expression()?);
1406 if matches!(self.peek_token(), Ok(Token::Comma)) {
1407 self.consume_token(&Token::Comma)?;
1408 continue;
1409 }
1410 break;
1411 }
1412 }
1413
1414 self.consume_token(&Token::RightParen)?;
1415 Ok(Expression::ScalarFunctionCall { function, args })
1416 }
1417
1418 fn parse_scalar_function_name(&mut self) -> Result<String> {
1419 match self.peek_token()? {
1420 Token::Identifier(_) => self.parse_identifier(),
1421 Token::Date => {
1422 self.consume_token(&Token::Date)?;
1423 Ok("DATE".to_string())
1424 }
1425 Token::Time => {
1426 self.consume_token(&Token::Time)?;
1427 Ok("TIME".to_string())
1428 }
1429 Token::DateTime => {
1430 self.consume_token(&Token::DateTime)?;
1431 Ok("DATETIME".to_string())
1432 }
1433 Token::Left => {
1434 self.consume_token(&Token::Left)?;
1435 Ok("LEFT".to_string())
1436 }
1437 Token::Right => {
1438 self.consume_token(&Token::Right)?;
1439 Ok("RIGHT".to_string())
1440 }
1441 token => Err(HematiteError::ParseError(format!(
1442 "Expected scalar function name, found: {:?}",
1443 token
1444 ))),
1445 }
1446 }
1447
1448 fn parse_comparison_operator(&mut self) -> Result<ComparisonOperator> {
1449 let token = self.peek_token()?;
1450 let operator = match token {
1451 Token::Equal => ComparisonOperator::Equal,
1452 Token::NotEqual => ComparisonOperator::NotEqual,
1453 Token::LessThan => ComparisonOperator::LessThan,
1454 Token::LessThanOrEqual => ComparisonOperator::LessThanOrEqual,
1455 Token::GreaterThan => ComparisonOperator::GreaterThan,
1456 Token::GreaterThanOrEqual => ComparisonOperator::GreaterThanOrEqual,
1457 _ => {
1458 return Err(HematiteError::ParseError(format!(
1459 "Expected comparison operator, found: {:?}",
1460 token
1461 )))
1462 }
1463 };
1464
1465 self.consume_token(&token)?;
1466 Ok(operator)
1467 }
1468
1469 fn parse_insert(&mut self) -> Result<Statement> {
1470 self.consume_token(&Token::Insert)?;
1471 self.consume_token(&Token::Into)?;
1472
1473 let table = self.parse_identifier()?;
1474
1475 let (columns, source) = match self.peek_token()? {
1476 Token::LeftParen => {
1477 self.consume_token(&Token::LeftParen)?;
1478 let columns = self.parse_column_list()?;
1479 self.consume_token(&Token::RightParen)?;
1480 let source = match self.peek_token()? {
1481 Token::Values => {
1482 self.consume_token(&Token::Values)?;
1483 InsertSource::Values(self.parse_value_lists()?)
1484 }
1485 Token::Select | Token::With => {
1486 InsertSource::Select(Box::new(self.parse_query_statement(false, false)?.0))
1487 }
1488 token => {
1489 return Err(HematiteError::ParseError(format!(
1490 "Expected VALUES or SELECT after INSERT column list, found: {:?}",
1491 token
1492 )))
1493 }
1494 };
1495 (columns, source)
1496 }
1497 Token::Set => {
1498 self.consume_token(&Token::Set)?;
1499 let assignments = self.parse_update_assignments()?;
1500 (
1501 assignments
1502 .iter()
1503 .map(|assignment| assignment.column.clone())
1504 .collect(),
1505 InsertSource::Values(vec![assignments
1506 .into_iter()
1507 .map(|assignment| assignment.value)
1508 .collect()]),
1509 )
1510 }
1511 token => {
1512 return Err(HematiteError::ParseError(format!(
1513 "Expected column list or SET after INSERT INTO table, found: {:?}",
1514 token
1515 )))
1516 }
1517 };
1518
1519 let on_duplicate = if matches!(self.peek_token(), Ok(Token::On)) {
1520 self.consume_token(&Token::On)?;
1521 self.consume_token(&Token::Duplicate)?;
1522 self.consume_token(&Token::Key)?;
1523 self.consume_token(&Token::Update)?;
1524 Some(self.parse_update_assignments()?)
1525 } else {
1526 None
1527 };
1528
1529 self.consume_token(&Token::Semicolon)?;
1530
1531 Ok(Statement::Insert(InsertStatement {
1532 table,
1533 columns,
1534 source,
1535 on_duplicate,
1536 }))
1537 }
1538
1539 fn parse_update(&mut self) -> Result<Statement> {
1540 self.consume_token(&Token::Update)?;
1541 let table = self.parse_identifier()?;
1542 let alias = self.parse_optional_alias()?;
1543 let mut from = TableReference::Table(table.clone(), alias.clone());
1544 from = self.parse_join_chain(from)?;
1545 let has_explicit_source =
1546 !matches!(&from, TableReference::Table(name, None) if name == &table);
1547 self.consume_token(&Token::Set)?;
1548 let assignments = self.parse_update_assignments()?;
1549
1550 let where_clause = if matches!(self.peek_token(), Ok(Token::Where)) {
1551 Some(self.parse_where_clause()?)
1552 } else {
1553 None
1554 };
1555
1556 self.consume_token(&Token::Semicolon)?;
1557
1558 let target_binding =
1559 has_explicit_source.then(|| alias.clone().unwrap_or_else(|| table.clone()));
1560 let source = has_explicit_source.then_some(from);
1561
1562 Ok(Statement::Update(UpdateStatement {
1563 table,
1564 target_binding,
1565 source,
1566 assignments,
1567 where_clause,
1568 }))
1569 }
1570
1571 fn parse_delete(&mut self) -> Result<Statement> {
1572 self.consume_token(&Token::Delete)?;
1573 let (table, target_binding, source) = match self.peek_token()? {
1574 Token::From => {
1575 self.consume_token(&Token::From)?;
1576 let table = self.parse_identifier()?;
1577 (table, None, None)
1578 }
1579 Token::Identifier(_) => {
1580 let target_binding = self.parse_identifier()?;
1581 self.consume_token(&Token::From)?;
1582 let source = self.parse_from_clause()?;
1583 let table = self.resolve_delete_target_table(&target_binding, &source)?;
1584 (table, Some(target_binding), Some(source))
1585 }
1586 token => {
1587 return Err(HematiteError::ParseError(format!(
1588 "Expected FROM or target table alias after DELETE, found: {:?}",
1589 token
1590 )))
1591 }
1592 };
1593
1594 let where_clause = if matches!(self.peek_token(), Ok(Token::Where)) {
1595 Some(self.parse_where_clause()?)
1596 } else {
1597 None
1598 };
1599
1600 self.consume_token(&Token::Semicolon)?;
1601
1602 Ok(Statement::Delete(DeleteStatement {
1603 table,
1604 target_binding,
1605 source,
1606 where_clause,
1607 }))
1608 }
1609
1610 fn resolve_delete_target_table(
1611 &self,
1612 target_binding: &str,
1613 source: &TableReference,
1614 ) -> Result<String> {
1615 let mut matches = Vec::new();
1616 for binding in SelectStatement::collect_table_bindings(source) {
1617 let binding_name = binding.alias.as_deref().unwrap_or(&binding.table_name);
1618 if binding_name.eq_ignore_ascii_case(target_binding)
1619 || binding.table_name.eq_ignore_ascii_case(target_binding)
1620 {
1621 matches.push(binding.table_name);
1622 }
1623 }
1624
1625 match matches.len() {
1626 1 => Ok(matches.remove(0)),
1627 0 => Err(HematiteError::ParseError(format!(
1628 "DELETE target '{}' does not match any table in the FROM clause",
1629 target_binding
1630 ))),
1631 _ => Err(HematiteError::ParseError(format!(
1632 "DELETE target '{}' is ambiguous in the FROM clause",
1633 target_binding
1634 ))),
1635 }
1636 }
1637
1638 fn parse_create(&mut self) -> Result<Statement> {
1639 self.consume_token(&Token::Create)?;
1640 let unique = if matches!(self.peek_token(), Ok(Token::Unique)) {
1641 self.consume_token(&Token::Unique)?;
1642 true
1643 } else {
1644 false
1645 };
1646 match self.peek_token()? {
1647 Token::Table => {
1648 if unique {
1649 return Err(HematiteError::ParseError(
1650 "CREATE UNIQUE TABLE is not supported".to_string(),
1651 ));
1652 }
1653 self.consume_token(&Token::Table)?;
1654 let if_not_exists = self.parse_if_not_exists_clause()?;
1655
1656 let table = self.parse_identifier()?;
1657
1658 self.consume_token(&Token::LeftParen)?;
1659
1660 let (columns, constraints) = self.parse_table_definition_items()?;
1661
1662 self.consume_token(&Token::RightParen)?;
1663 self.consume_ignored_create_table_options()?;
1664 self.consume_token(&Token::Semicolon)?;
1665
1666 Ok(Statement::Create(CreateStatement {
1667 table,
1668 columns,
1669 constraints,
1670 if_not_exists,
1671 }))
1672 }
1673 Token::View => {
1674 if unique {
1675 return Err(HematiteError::ParseError(
1676 "CREATE UNIQUE VIEW is not supported".to_string(),
1677 ));
1678 }
1679 self.consume_token(&Token::View)?;
1680 let if_not_exists = self.parse_if_not_exists_clause()?;
1681 let view = self.parse_identifier()?;
1682 self.consume_token(&Token::As)?;
1683 let query = self.parse_query_statement(true, false)?.0;
1684 Ok(Statement::CreateView(CreateViewStatement {
1685 view,
1686 if_not_exists,
1687 query,
1688 }))
1689 }
1690 Token::Trigger => {
1691 if unique {
1692 return Err(HematiteError::ParseError(
1693 "CREATE UNIQUE TRIGGER is not supported".to_string(),
1694 ));
1695 }
1696 self.consume_token(&Token::Trigger)?;
1697 let trigger = self.parse_identifier()?;
1698 self.consume_token(&Token::After)?;
1699 let event = match self.peek_token()? {
1700 Token::Insert => {
1701 self.consume_token(&Token::Insert)?;
1702 TriggerEvent::Insert
1703 }
1704 Token::Update => {
1705 self.consume_token(&Token::Update)?;
1706 TriggerEvent::Update
1707 }
1708 Token::Delete => {
1709 self.consume_token(&Token::Delete)?;
1710 TriggerEvent::Delete
1711 }
1712 token => {
1713 return Err(HematiteError::ParseError(format!(
1714 "Expected INSERT, UPDATE, or DELETE after AFTER, found: {:?}",
1715 token
1716 )))
1717 }
1718 };
1719 self.consume_token(&Token::On)?;
1720 let table = self.parse_identifier()?;
1721 self.consume_token(&Token::As)?;
1722 let body = Box::new(self.parse()?);
1723 Ok(Statement::CreateTrigger(CreateTriggerStatement {
1724 trigger,
1725 table,
1726 event,
1727 body,
1728 }))
1729 }
1730 Token::Index | Token::Key => {
1731 if matches!(self.peek_token(), Ok(Token::Index)) {
1732 self.consume_token(&Token::Index)?;
1733 } else {
1734 self.consume_token(&Token::Key)?;
1735 }
1736 let if_not_exists = self.parse_if_not_exists_clause()?;
1737 let index_name = self.parse_identifier()?;
1738 self.consume_optional_index_type_clause()?;
1739 self.consume_token(&Token::On)?;
1740 let table = self.parse_identifier()?;
1741 self.consume_token(&Token::LeftParen)?;
1742 let columns = self.parse_column_list()?;
1743 self.consume_token(&Token::RightParen)?;
1744 self.consume_optional_index_type_clause()?;
1745 self.consume_token(&Token::Semicolon)?;
1746
1747 Ok(Statement::CreateIndex(CreateIndexStatement {
1748 index_name,
1749 table,
1750 columns,
1751 unique,
1752 if_not_exists,
1753 }))
1754 }
1755 token => Err(HematiteError::ParseError(format!(
1756 "Expected TABLE, VIEW, TRIGGER, INDEX, or KEY after CREATE, found: {:?}",
1757 token
1758 ))),
1759 }
1760 }
1761
1762 fn peek_token_starts_comparison(&self) -> bool {
1763 matches!(
1764 self.peek_token(),
1765 Ok(Token::Equal
1766 | Token::NotEqual
1767 | Token::LessThan
1768 | Token::LessThanOrEqual
1769 | Token::GreaterThan
1770 | Token::GreaterThanOrEqual)
1771 )
1772 }
1773
1774 fn parse_drop(&mut self) -> Result<Statement> {
1775 self.consume_token(&Token::Drop)?;
1776 match self.peek_token()? {
1777 Token::Table => {
1778 self.consume_token(&Token::Table)?;
1779 let if_exists = self.parse_if_exists_clause()?;
1780 let table = self.parse_identifier()?;
1781 self.consume_token(&Token::Semicolon)?;
1782 Ok(Statement::Drop(DropStatement { table, if_exists }))
1783 }
1784 Token::View => {
1785 self.consume_token(&Token::View)?;
1786 let if_exists = self.parse_if_exists_clause()?;
1787 let view = self.parse_identifier()?;
1788 self.consume_token(&Token::Semicolon)?;
1789 Ok(Statement::DropView(DropViewStatement { view, if_exists }))
1790 }
1791 Token::Trigger => {
1792 self.consume_token(&Token::Trigger)?;
1793 let if_exists = self.parse_if_exists_clause()?;
1794 let trigger = self.parse_identifier()?;
1795 self.consume_token(&Token::Semicolon)?;
1796 Ok(Statement::DropTrigger(DropTriggerStatement {
1797 trigger,
1798 if_exists,
1799 }))
1800 }
1801 Token::Index => {
1802 self.consume_token(&Token::Index)?;
1803 let if_exists = self.parse_if_exists_clause()?;
1804 let index_name = self.parse_identifier()?;
1805 self.consume_token(&Token::On)?;
1806 let table = self.parse_identifier()?;
1807 self.consume_token(&Token::Semicolon)?;
1808 Ok(Statement::DropIndex(DropIndexStatement {
1809 index_name,
1810 table,
1811 if_exists,
1812 }))
1813 }
1814 token => Err(HematiteError::ParseError(format!(
1815 "Expected TABLE, VIEW, TRIGGER, or INDEX after DROP, found: {:?}",
1816 token
1817 ))),
1818 }
1819 }
1820
1821 fn parse_alter(&mut self) -> Result<Statement> {
1822 self.consume_token(&Token::Alter)?;
1823 self.consume_token(&Token::Table)?;
1824 let table = self.parse_identifier()?;
1825
1826 match self.peek_token()? {
1827 Token::Rename => {
1828 self.consume_token(&Token::Rename)?;
1829 let operation = if matches!(self.peek_token(), Ok(Token::Column)) {
1830 self.consume_token(&Token::Column)?;
1831 let old_name = self.parse_identifier()?;
1832 self.consume_token(&Token::To)?;
1833 let new_name = self.parse_identifier()?;
1834 AlterOperation::RenameColumn { old_name, new_name }
1835 } else {
1836 self.consume_token(&Token::To)?;
1837 let new_name = self.parse_identifier()?;
1838 AlterOperation::RenameTo(new_name)
1839 };
1840 self.consume_token(&Token::Semicolon)?;
1841 Ok(Statement::Alter(AlterStatement { table, operation }))
1842 }
1843 Token::Add => {
1844 self.consume_token(&Token::Add)?;
1845 let operation = match self.peek_token()? {
1846 Token::Column => {
1847 self.consume_token(&Token::Column)?;
1848 AlterOperation::AddColumn(self.parse_column_definition()?)
1849 }
1850 Token::Constraint | Token::Check | Token::Unique | Token::Foreign => {
1851 AlterOperation::AddConstraint(self.parse_table_constraint()?)
1852 }
1853 Token::Identifier(_) => {
1854 AlterOperation::AddColumn(self.parse_column_definition()?)
1855 }
1856 token => {
1857 return Err(HematiteError::ParseError(format!(
1858 "Expected COLUMN or constraint after ADD, found: {:?}",
1859 token
1860 )))
1861 }
1862 };
1863 self.consume_token(&Token::Semicolon)?;
1864 Ok(Statement::Alter(AlterStatement { table, operation }))
1865 }
1866 Token::Drop => {
1867 self.consume_token(&Token::Drop)?;
1868 let operation = match self.peek_token()? {
1869 Token::Column => {
1870 self.consume_token(&Token::Column)?;
1871 AlterOperation::DropColumn(self.parse_identifier()?)
1872 }
1873 Token::Constraint => {
1874 self.consume_token(&Token::Constraint)?;
1875 AlterOperation::DropConstraint(self.parse_identifier()?)
1876 }
1877 Token::Identifier(_) => AlterOperation::DropColumn(self.parse_identifier()?),
1878 token => {
1879 return Err(HematiteError::ParseError(format!(
1880 "Expected COLUMN or CONSTRAINT after DROP, found: {:?}",
1881 token
1882 )))
1883 }
1884 };
1885 self.consume_token(&Token::Semicolon)?;
1886 Ok(Statement::Alter(AlterStatement { table, operation }))
1887 }
1888 Token::Alter => {
1889 self.consume_token(&Token::Alter)?;
1890 if matches!(self.peek_token(), Ok(Token::Column)) {
1891 self.consume_token(&Token::Column)?;
1892 }
1893 let column_name = self.parse_identifier()?;
1894 let operation = match self.peek_token()? {
1895 Token::Set => {
1896 self.consume_token(&Token::Set)?;
1897 match self.peek_token()? {
1898 Token::Default => {
1899 self.consume_token(&Token::Default)?;
1900 let default_value = self.parse_default_value()?;
1901 AlterOperation::AlterColumnSetDefault {
1902 column_name,
1903 default_value,
1904 }
1905 }
1906 Token::Not => {
1907 self.consume_token(&Token::Not)?;
1908 self.consume_token(&Token::Null)?;
1909 AlterOperation::AlterColumnSetNotNull { column_name }
1910 }
1911 token => {
1912 return Err(HematiteError::ParseError(format!(
1913 "Expected DEFAULT or NOT NULL after SET, found: {:?}",
1914 token
1915 )))
1916 }
1917 }
1918 }
1919 Token::Drop => {
1920 self.consume_token(&Token::Drop)?;
1921 match self.peek_token()? {
1922 Token::Default => {
1923 self.consume_token(&Token::Default)?;
1924 AlterOperation::AlterColumnDropDefault { column_name }
1925 }
1926 Token::Not => {
1927 self.consume_token(&Token::Not)?;
1928 self.consume_token(&Token::Null)?;
1929 AlterOperation::AlterColumnDropNotNull { column_name }
1930 }
1931 token => {
1932 return Err(HematiteError::ParseError(format!(
1933 "Expected DEFAULT or NOT NULL after DROP, found: {:?}",
1934 token
1935 )))
1936 }
1937 }
1938 }
1939 token => {
1940 return Err(HematiteError::ParseError(format!(
1941 "Expected SET or DROP after ALTER COLUMN, found: {:?}",
1942 token
1943 )))
1944 }
1945 };
1946 self.consume_token(&Token::Semicolon)?;
1947 Ok(Statement::Alter(AlterStatement { table, operation }))
1948 }
1949 token => Err(HematiteError::ParseError(format!(
1950 "Expected supported ALTER TABLE operation, found: {:?}",
1951 token
1952 ))),
1953 }
1954 }
1955
1956 fn parse_identifier(&mut self) -> Result<String> {
1957 let token = self.peek_token()?;
1958 match token {
1959 Token::Identifier(name) => {
1960 self.consume_token(&Token::Identifier(name.clone()))?;
1961 Ok(name)
1962 }
1963 _ => Err(HematiteError::ParseError(format!(
1964 "Expected identifier, found: {:?}",
1965 token
1966 ))),
1967 }
1968 }
1969
1970 fn parse_identifier_reference(&mut self) -> Result<String> {
1971 let first = self.parse_identifier()?;
1972 if matches!(self.peek_token(), Ok(Token::Dot)) {
1973 self.consume_token(&Token::Dot)?;
1974 let second = self.parse_identifier()?;
1975 Ok(format!("{}.{}", first, second))
1976 } else {
1977 Ok(first)
1978 }
1979 }
1980
1981 fn parse_optional_alias(&mut self) -> Result<Option<String>> {
1982 match self.peek_token() {
1983 Ok(Token::As) => {
1984 self.consume_token(&Token::As)?;
1985 Ok(Some(self.parse_identifier()?))
1986 }
1987 Ok(Token::Identifier(_)) => Ok(Some(self.parse_identifier()?)),
1988 _ => Ok(None),
1989 }
1990 }
1991
1992 fn parse_if_not_exists_clause(&mut self) -> Result<bool> {
1993 if matches!(self.peek_token(), Ok(Token::If)) {
1994 self.consume_token(&Token::If)?;
1995 self.consume_token(&Token::Not)?;
1996 self.consume_token(&Token::Exists)?;
1997 return Ok(true);
1998 }
1999 Ok(false)
2000 }
2001
2002 fn parse_if_exists_clause(&mut self) -> Result<bool> {
2003 if matches!(self.peek_token(), Ok(Token::If)) {
2004 self.consume_token(&Token::If)?;
2005 self.consume_token(&Token::Exists)?;
2006 return Ok(true);
2007 }
2008 Ok(false)
2009 }
2010
2011 fn parse_required_alias(&mut self, subject: &str) -> Result<String> {
2012 self.parse_optional_alias()?
2013 .ok_or_else(|| HematiteError::ParseError(format!("{} must have an alias", subject)))
2014 }
2015
2016 fn peek_identifier_keyword(&self, keyword: &str) -> bool {
2017 matches!(
2018 self.peek_token(),
2019 Ok(Token::Identifier(name)) if name == keyword
2020 )
2021 }
2022
2023 fn consume_identifier_keyword(&mut self, keyword: &str) -> Result<()> {
2024 match self.peek_token()? {
2025 Token::Identifier(name) if name == keyword => {
2026 self.consume_token(&Token::Identifier(name.clone()))
2027 }
2028 Token::Identifier(name) if name.eq_ignore_ascii_case(keyword) => {
2029 Err(HematiteError::ParseError(format!(
2030 "Keyword '{}' must be capitalized as '{}'",
2031 name, keyword
2032 )))
2033 }
2034 token => Err(HematiteError::ParseError(format!(
2035 "Expected {}, found: {:?}",
2036 keyword, token
2037 ))),
2038 }
2039 }
2040
2041 fn consume_optional_equals(&mut self) -> Result<()> {
2042 if matches!(self.peek_token(), Ok(Token::Equal)) {
2043 self.consume_token(&Token::Equal)?;
2044 }
2045 Ok(())
2046 }
2047
2048 fn consume_optional_index_type_clause(&mut self) -> Result<()> {
2049 if !self.peek_identifier_keyword("USING") {
2050 return Ok(());
2051 }
2052 self.consume_identifier_keyword("USING")?;
2053 match self.peek_token()? {
2054 Token::Identifier(name) if name == "BTREE" || name == "HASH" => {
2055 self.consume_token(&Token::Identifier(name.clone()))?;
2056 Ok(())
2057 }
2058 token => Err(HematiteError::ParseError(format!(
2059 "Expected BTREE or HASH after USING, found: {:?}",
2060 token
2061 ))),
2062 }
2063 }
2064
2065 fn consume_ignored_create_table_options(&mut self) -> Result<()> {
2066 loop {
2067 if self.peek_identifier_keyword("ENGINE") {
2068 self.consume_identifier_keyword("ENGINE")?;
2069 self.consume_optional_equals()?;
2070 self.parse_identifier()?;
2071 continue;
2072 }
2073
2074 if matches!(self.peek_token(), Ok(Token::AutoIncrement)) {
2075 self.consume_token(&Token::AutoIncrement)?;
2076 self.consume_optional_equals()?;
2077 self.consume_positive_integer_literal("AUTO_INCREMENT")?;
2078 continue;
2079 }
2080
2081 if matches!(self.peek_token(), Ok(Token::Default)) {
2082 self.consume_token(&Token::Default)?;
2083 if self.peek_identifier_keyword("CHARSET") {
2084 self.consume_identifier_keyword("CHARSET")?;
2085 self.consume_optional_equals()?;
2086 self.parse_identifier()?;
2087 continue;
2088 }
2089 if self.peek_identifier_keyword("CHARACTER") {
2090 self.consume_identifier_keyword("CHARACTER")?;
2091 self.consume_token(&Token::Set)?;
2092 self.consume_optional_equals()?;
2093 self.parse_identifier()?;
2094 continue;
2095 }
2096 if let Some(message) = capitalization_hint_for_token(&self.peek_token()?) {
2097 return Err(HematiteError::ParseError(message));
2098 }
2099 return Err(HematiteError::ParseError(
2100 "Unsupported DEFAULT table option".to_string(),
2101 ));
2102 }
2103
2104 if self.peek_identifier_keyword("CHARACTER") {
2105 self.consume_identifier_keyword("CHARACTER")?;
2106 self.consume_token(&Token::Set)?;
2107 self.consume_optional_equals()?;
2108 self.parse_identifier()?;
2109 continue;
2110 }
2111
2112 if self.peek_identifier_keyword("CHARSET") {
2113 self.consume_identifier_keyword("CHARSET")?;
2114 self.consume_optional_equals()?;
2115 self.parse_identifier()?;
2116 continue;
2117 }
2118
2119 if self.peek_identifier_keyword("COLLATE") {
2120 self.consume_identifier_keyword("COLLATE")?;
2121 self.consume_optional_equals()?;
2122 self.parse_identifier()?;
2123 continue;
2124 }
2125
2126 break;
2127 }
2128
2129 Ok(())
2130 }
2131
2132 fn parse_column_list(&mut self) -> Result<Vec<String>> {
2133 let mut columns = Vec::new();
2134
2135 loop {
2136 let token = self.peek_token()?;
2137 match token {
2138 Token::Identifier(name) => {
2139 self.consume_token(&Token::Identifier(name.clone()))?;
2140 columns.push(name);
2141 }
2142 _ => {
2143 return Err(HematiteError::ParseError(format!(
2144 "Expected column name, found: {:?}",
2145 token
2146 )))
2147 }
2148 }
2149
2150 if self.peek_token()? == Token::Comma {
2151 self.consume_token(&Token::Comma)?;
2152 continue;
2153 } else {
2154 break;
2155 }
2156 }
2157
2158 Ok(columns)
2159 }
2160
2161 fn parse_update_assignments(&mut self) -> Result<Vec<UpdateAssignment>> {
2162 let mut assignments = Vec::new();
2163
2164 loop {
2165 let column = self.parse_identifier()?;
2166 self.consume_token(&Token::Equal)?;
2167 let value = self.parse_expression()?;
2168 assignments.push(UpdateAssignment { column, value });
2169
2170 if matches!(self.peek_token(), Ok(Token::Comma)) {
2171 self.consume_token(&Token::Comma)?;
2172 continue;
2173 }
2174
2175 break;
2176 }
2177
2178 Ok(assignments)
2179 }
2180
2181 fn parse_value_lists(&mut self) -> Result<Vec<Vec<Expression>>> {
2182 let mut value_lists = Vec::new();
2183
2184 loop {
2185 self.consume_token(&Token::LeftParen)?;
2186 let mut values = Vec::new();
2187
2188 loop {
2189 values.push(self.parse_expression()?);
2190
2191 if self.peek_token()? == Token::Comma {
2192 self.consume_token(&Token::Comma)?;
2193 continue;
2194 } else {
2195 break;
2196 }
2197 }
2198
2199 self.consume_token(&Token::RightParen)?;
2200 value_lists.push(values);
2201
2202 if self.peek_token()? == Token::Comma {
2203 self.consume_token(&Token::Comma)?;
2204 continue;
2205 } else {
2206 break;
2207 }
2208 }
2209
2210 Ok(value_lists)
2211 }
2212
2213 fn parse_table_definition_items(
2214 &mut self,
2215 ) -> Result<(Vec<ColumnDefinition>, Vec<TableConstraint>)> {
2216 let mut columns = Vec::new();
2217 let mut constraints = Vec::new();
2218
2219 loop {
2220 match self.peek_token()? {
2221 Token::Constraint | Token::Check | Token::Foreign => {
2222 constraints.push(self.parse_table_constraint()?);
2223 }
2224 Token::Identifier(_) => columns.push(self.parse_column_definition()?),
2225 token => {
2226 return Err(HematiteError::ParseError(format!(
2227 "Expected column definition or table constraint, found: {:?}",
2228 token
2229 )))
2230 }
2231 }
2232
2233 if self.peek_token()? == Token::Comma {
2234 self.consume_token(&Token::Comma)?;
2235 continue;
2236 } else {
2237 break;
2238 }
2239 }
2240
2241 Ok((columns, constraints))
2242 }
2243
2244 fn parse_column_definition(&mut self) -> Result<ColumnDefinition> {
2245 let name = self.parse_identifier()?;
2246
2247 let data_type = self.parse_data_type()?;
2248
2249 let mut nullable = true;
2250 let mut primary_key = false;
2251 let mut auto_increment = false;
2252 let mut unique = false;
2253 let mut default_value = None;
2254 let mut character_set = None;
2255 let mut collation = None;
2256 let mut check_constraint = None;
2257 let mut references = None;
2258
2259 while let Ok(token) = self.peek_token() {
2260 match token {
2261 Token::Identifier(_) if self.peek_identifier_keyword("CHARSET") => {
2262 self.consume_identifier_keyword("CHARSET")?;
2263 self.consume_optional_equals()?;
2264 character_set = Some(self.parse_identifier()?);
2265 }
2266 Token::Identifier(_) if self.peek_identifier_keyword("CHARACTER") => {
2267 self.consume_identifier_keyword("CHARACTER")?;
2268 self.consume_token(&Token::Set)?;
2269 self.consume_optional_equals()?;
2270 character_set = Some(self.parse_identifier()?);
2271 }
2272 Token::Identifier(_) if self.peek_identifier_keyword("COLLATE") => {
2273 self.consume_identifier_keyword("COLLATE")?;
2274 self.consume_optional_equals()?;
2275 collation = Some(self.parse_identifier()?);
2276 }
2277 Token::Not => {
2278 self.consume_token(&Token::Not)?;
2279 if self.peek_token()? == Token::Null {
2280 self.consume_token(&Token::Null)?;
2281 nullable = false;
2282 }
2283 }
2284 Token::Primary => {
2285 self.consume_token(&Token::Primary)?;
2286 self.consume_token(&Token::Key)?;
2287 primary_key = true;
2288 nullable = false;
2289 }
2290 Token::Unique => {
2291 self.consume_token(&Token::Unique)?;
2292 unique = true;
2293 }
2294 Token::AutoIncrement => {
2295 self.consume_token(&Token::AutoIncrement)?;
2296 auto_increment = true;
2297 }
2298 Token::Default => {
2299 self.consume_token(&Token::Default)?;
2300 default_value = Some(self.parse_default_value()?);
2301 }
2302 Token::Constraint | Token::Check => {
2303 let constraint_name = self.parse_optional_constraint_name()?;
2304 check_constraint =
2305 Some(self.parse_check_constraint_definition(constraint_name)?);
2306 }
2307 Token::References => {
2308 references = Some(self.parse_column_foreign_key(None, &name)?);
2309 }
2310 _ => break,
2311 }
2312 }
2313
2314 Ok(ColumnDefinition {
2315 name,
2316 data_type,
2317 character_set,
2318 collation,
2319 nullable,
2320 primary_key,
2321 auto_increment,
2322 unique,
2323 default_value,
2324 check_constraint,
2325 references,
2326 })
2327 }
2328
2329 fn parse_table_constraint(&mut self) -> Result<TableConstraint> {
2330 let constraint_name = self.parse_optional_constraint_name()?;
2331 match self.peek_token()? {
2332 Token::Check => Ok(TableConstraint::Check(
2333 self.parse_check_constraint_definition(constraint_name)?,
2334 )),
2335 Token::Unique => Ok(TableConstraint::Unique(
2336 self.parse_unique_constraint_definition(constraint_name)?,
2337 )),
2338 Token::Foreign => Ok(TableConstraint::ForeignKey(
2339 self.parse_table_foreign_key(constraint_name)?,
2340 )),
2341 token => Err(HematiteError::ParseError(format!(
2342 "Expected CHECK, UNIQUE, or FOREIGN KEY constraint, found: {:?}",
2343 token
2344 ))),
2345 }
2346 }
2347
2348 fn parse_optional_constraint_name(&mut self) -> Result<Option<String>> {
2349 if matches!(self.peek_token(), Ok(Token::Constraint)) {
2350 self.consume_token(&Token::Constraint)?;
2351 return Ok(Some(self.parse_identifier()?));
2352 }
2353 Ok(None)
2354 }
2355
2356 fn parse_parenthesized_condition(&mut self) -> Result<Condition> {
2357 self.consume_token(&Token::LeftParen)?;
2358 let condition = self.parse_or_condition()?;
2359 self.consume_token(&Token::RightParen)?;
2360 Ok(condition)
2361 }
2362
2363 fn parse_check_constraint_definition(
2364 &mut self,
2365 name: Option<String>,
2366 ) -> Result<CheckConstraintDefinition> {
2367 self.consume_token(&Token::Check)?;
2368 let condition = self.parse_parenthesized_condition()?;
2369 Ok(CheckConstraintDefinition {
2370 name,
2371 expression_sql: condition.to_sql(),
2372 })
2373 }
2374
2375 fn parse_unique_constraint_definition(
2376 &mut self,
2377 name: Option<String>,
2378 ) -> Result<crate::parser::ast::UniqueConstraintDefinition> {
2379 self.consume_token(&Token::Unique)?;
2380 let columns = self.parse_column_reference_list()?;
2381 Ok(crate::parser::ast::UniqueConstraintDefinition { name, columns })
2382 }
2383
2384 fn parse_table_foreign_key(&mut self, name: Option<String>) -> Result<ForeignKeyDefinition> {
2385 self.consume_token(&Token::Foreign)?;
2386 self.consume_token(&Token::Key)?;
2387 let columns = self.parse_column_reference_list()?;
2388 self.parse_foreign_key_reference(name, columns)
2389 }
2390
2391 fn parse_column_foreign_key(
2392 &mut self,
2393 name: Option<String>,
2394 column: &str,
2395 ) -> Result<ForeignKeyDefinition> {
2396 self.parse_foreign_key_reference(name, vec![column.to_string()])
2397 }
2398
2399 fn parse_column_reference_list(&mut self) -> Result<Vec<String>> {
2400 self.consume_token(&Token::LeftParen)?;
2401 let mut columns = Vec::new();
2402 loop {
2403 columns.push(self.parse_identifier()?);
2404 if matches!(self.peek_token(), Ok(Token::Comma)) {
2405 self.consume_token(&Token::Comma)?;
2406 continue;
2407 }
2408 break;
2409 }
2410 self.consume_token(&Token::RightParen)?;
2411 Ok(columns)
2412 }
2413
2414 fn parse_foreign_key_reference(
2415 &mut self,
2416 name: Option<String>,
2417 columns: Vec<String>,
2418 ) -> Result<ForeignKeyDefinition> {
2419 self.consume_token(&Token::References)?;
2420 let referenced_table = self.parse_identifier()?;
2421 let referenced_columns = self.parse_column_reference_list()?;
2422 let mut on_delete = crate::parser::ast::ForeignKeyAction::Restrict;
2423 let mut on_update = crate::parser::ast::ForeignKeyAction::Restrict;
2424 while matches!(self.peek_token(), Ok(Token::On)) {
2425 self.consume_token(&Token::On)?;
2426 let target_is_delete = match self.peek_token()? {
2427 Token::Delete => {
2428 self.consume_token(&Token::Delete)?;
2429 true
2430 }
2431 Token::Update => {
2432 self.consume_token(&Token::Update)?;
2433 false
2434 }
2435 token => {
2436 return Err(HematiteError::ParseError(format!(
2437 "Expected DELETE or UPDATE after ON, found: {:?}",
2438 token
2439 )))
2440 }
2441 };
2442 let action = self.parse_foreign_key_action()?;
2443 if target_is_delete {
2444 on_delete = action;
2445 } else {
2446 on_update = action;
2447 }
2448 }
2449 Ok(ForeignKeyDefinition {
2450 name,
2451 columns,
2452 referenced_table,
2453 referenced_columns,
2454 on_delete,
2455 on_update,
2456 })
2457 }
2458
2459 fn parse_foreign_key_action(&mut self) -> Result<crate::parser::ast::ForeignKeyAction> {
2460 match self.peek_token()? {
2461 Token::Restrict => {
2462 self.consume_token(&Token::Restrict)?;
2463 Ok(crate::parser::ast::ForeignKeyAction::Restrict)
2464 }
2465 Token::Cascade => {
2466 self.consume_token(&Token::Cascade)?;
2467 Ok(crate::parser::ast::ForeignKeyAction::Cascade)
2468 }
2469 Token::Set => {
2470 self.consume_token(&Token::Set)?;
2471 self.consume_token(&Token::Null)?;
2472 Ok(crate::parser::ast::ForeignKeyAction::SetNull)
2473 }
2474 token => Err(HematiteError::ParseError(format!(
2475 "Expected foreign key action, found: {:?}",
2476 token
2477 ))),
2478 }
2479 }
2480
2481 fn parse_data_type(&mut self) -> Result<SqlTypeName> {
2482 let token = self.peek_token()?;
2483 let data_type = match token {
2484 Token::Int8 => return self.parse_integer_type(Token::Int8, SqlTypeName::Int8),
2485 Token::Int16 => return self.parse_integer_type(Token::Int16, SqlTypeName::Int16),
2486 Token::Int32 | Token::Int => {
2487 return self.parse_integer_type(token.clone(), SqlTypeName::Int)
2488 }
2489 Token::Int64 => return self.parse_integer_type(Token::Int64, SqlTypeName::Int64),
2490 Token::Int128 => return self.parse_integer_type(Token::Int128, SqlTypeName::Int128),
2491 Token::UInt8 => SqlTypeName::UInt8,
2492 Token::UInt16 => SqlTypeName::UInt16,
2493 Token::UInt32 | Token::UInt => SqlTypeName::UInt,
2494 Token::UInt64 => SqlTypeName::UInt64,
2495 Token::UInt128 => SqlTypeName::UInt128,
2496 Token::Text => SqlTypeName::Text,
2497 Token::Boolean | Token::Bool => SqlTypeName::Boolean,
2498 Token::Float32 => SqlTypeName::Float32,
2499 Token::Float | Token::Float64 => SqlTypeName::Float,
2500 Token::Decimal => {
2501 self.consume_token(&token)?;
2502 let (precision, scale) = self.parse_optional_numeric_precision()?;
2503 return Ok(SqlTypeName::Decimal { precision, scale });
2504 }
2505 Token::Blob => SqlTypeName::Blob,
2506 Token::Date => SqlTypeName::Date,
2507 Token::Interval => {
2508 self.consume_token(&token)?;
2509 let start = match self.peek_token()? {
2510 Token::Identifier(name) => name,
2511 other => {
2512 return Err(HematiteError::ParseError(format!(
2513 "Expected INTERVAL qualifier after INTERVAL, found: {:?}",
2514 other
2515 )))
2516 }
2517 };
2518 self.consume_token(&Token::Identifier(start.clone()))?;
2519 self.consume_token(&Token::To)?;
2520 let end = match self.peek_token()? {
2521 Token::Identifier(name) => name,
2522 other => {
2523 return Err(HematiteError::ParseError(format!(
2524 "Expected INTERVAL qualifier after TO, found: {:?}",
2525 other
2526 )))
2527 }
2528 };
2529 self.consume_token(&Token::Identifier(end.clone()))?;
2530 return match (start.as_str(), end.as_str()) {
2531 ("YEAR", "MONTH") => Ok(SqlTypeName::IntervalYearMonth),
2532 ("DAY", "SECOND") => Ok(SqlTypeName::IntervalDaySecond),
2533 _ => Err(HematiteError::ParseError(format!(
2534 "Unsupported INTERVAL data type qualifier '{} TO {}'",
2535 start, end
2536 ))),
2537 };
2538 }
2539 Token::Time => {
2540 self.consume_token(&token)?;
2541 if matches!(self.peek_token(), Ok(Token::With)) {
2542 self.consume_token(&Token::With)?;
2543 self.consume_token(&Token::Time)?;
2544 self.consume_token(&Token::Zone)?;
2545 return Ok(SqlTypeName::TimeWithTimeZone);
2546 }
2547 return Ok(SqlTypeName::Time);
2548 }
2549 Token::DateTime => SqlTypeName::DateTime,
2550 Token::Varchar | Token::Char | Token::BinaryType | Token::VarBinary => {
2551 self.consume_token(&token)?;
2552 let length = self.parse_type_length()?;
2553 return Ok(match token {
2554 Token::Varchar => SqlTypeName::VarChar(length),
2555 Token::Char => SqlTypeName::Char(length),
2556 Token::BinaryType => SqlTypeName::Binary(length),
2557 Token::VarBinary => SqlTypeName::VarBinary(length),
2558 _ => unreachable!(),
2559 });
2560 }
2561 Token::Enum => {
2562 self.consume_token(&token)?;
2563 return Ok(SqlTypeName::Enum(self.parse_enum_variants()?));
2564 }
2565 Token::Identifier(name) => {
2566 if let Some(keyword) = uppercase_keyword_match(&name, &DATA_TYPE_KEYWORDS) {
2567 return Err(HematiteError::ParseError(format!(
2568 "Keyword '{}' must be capitalized as '{}'",
2569 name, keyword
2570 )));
2571 }
2572 return Err(HematiteError::ParseError(format!(
2573 "Expected data type, found: {:?}",
2574 Token::Identifier(name)
2575 )));
2576 }
2577 _ => {
2578 return Err(HematiteError::ParseError(format!(
2579 "Expected data type, found: {:?}",
2580 token
2581 )))
2582 }
2583 };
2584
2585 self.consume_token(&token)?;
2586 Ok(data_type)
2587 }
2588
2589 fn parse_integer_type(
2590 &mut self,
2591 token: Token,
2592 signed_type: SqlTypeName,
2593 ) -> Result<SqlTypeName> {
2594 self.consume_token(&token)?;
2595 Ok(signed_type)
2596 }
2597
2598 fn parse_enum_variants(&mut self) -> Result<Vec<String>> {
2599 self.consume_token(&Token::LeftParen)?;
2600 let mut variants = Vec::new();
2601 loop {
2602 match self.peek_token()? {
2603 Token::StringLiteral(value) => {
2604 self.consume_token(&Token::StringLiteral(value.clone()))?;
2605 variants.push(value);
2606 }
2607 token => {
2608 return Err(HematiteError::ParseError(format!(
2609 "Expected ENUM string literal, found: {:?}",
2610 token
2611 )))
2612 }
2613 }
2614
2615 match self.peek_token()? {
2616 Token::Comma => {
2617 self.consume_token(&Token::Comma)?;
2618 }
2619 Token::RightParen => {
2620 self.consume_token(&Token::RightParen)?;
2621 break;
2622 }
2623 token => {
2624 return Err(HematiteError::ParseError(format!(
2625 "Expected ',' or ')' in ENUM type, found: {:?}",
2626 token
2627 )))
2628 }
2629 }
2630 }
2631
2632 if variants.is_empty() {
2633 return Err(HematiteError::ParseError(
2634 "ENUM type requires at least one variant".to_string(),
2635 ));
2636 }
2637
2638 Ok(variants)
2639 }
2640
2641 fn parse_type_length(&mut self) -> Result<u32> {
2642 self.consume_token(&Token::LeftParen)?;
2643 let length = match self.peek_token()? {
2644 Token::NumberLiteral(length) => {
2645 let parsed = parse_positive_u32_literal(&length, "length")?;
2646 self.consume_token(&Token::NumberLiteral(length))?;
2647 parsed
2648 }
2649 token => {
2650 return Err(HematiteError::ParseError(format!(
2651 "Expected positive integer length, found: {:?}",
2652 token
2653 )))
2654 }
2655 };
2656 self.consume_token(&Token::RightParen)?;
2657 Ok(length)
2658 }
2659
2660 fn parse_optional_numeric_precision(&mut self) -> Result<(Option<u32>, Option<u32>)> {
2661 if !matches!(self.peek_token(), Ok(Token::LeftParen)) {
2662 return Ok((None, None));
2663 }
2664
2665 self.consume_token(&Token::LeftParen)?;
2666 let precision = self.consume_positive_integer_literal("precision")?;
2667 let mut scale = None;
2668 if matches!(self.peek_token(), Ok(Token::Comma)) {
2669 self.consume_token(&Token::Comma)?;
2670 scale = Some(self.consume_positive_integer_literal("scale")?);
2671 }
2672 self.consume_token(&Token::RightParen)?;
2673 Ok((Some(precision), scale))
2674 }
2675
2676 fn consume_positive_integer_literal(&mut self, label: &str) -> Result<u32> {
2677 match self.peek_token()? {
2678 Token::NumberLiteral(value) => {
2679 let parsed = parse_non_negative_u32_literal(&value, label)?;
2680 self.consume_token(&Token::NumberLiteral(value))?;
2681 Ok(parsed)
2682 }
2683 token => Err(HematiteError::ParseError(format!(
2684 "Expected non-negative integer {} value, found: {:?}",
2685 label, token
2686 ))),
2687 }
2688 }
2689
2690 fn parse_default_value(&mut self) -> Result<LiteralValue> {
2691 let token = self.peek_token()?;
2692 match token {
2693 Token::StringLiteral(value) => {
2694 self.consume_token(&Token::StringLiteral(value.clone()))?;
2695 Ok(LiteralValue::Text(value))
2696 }
2697 Token::BlobLiteral(value) => {
2698 self.consume_token(&Token::BlobLiteral(value.clone()))?;
2699 Ok(LiteralValue::Blob(value))
2700 }
2701 Token::NumberLiteral(value) => {
2702 self.consume_token(&Token::NumberLiteral(value.clone()))?;
2703 if value.contains('.') {
2704 Ok(LiteralValue::Float(normalize_float_literal(&value)))
2705 } else {
2706 Ok(LiteralValue::Integer(value.parse::<i128>().map_err(
2707 |_| {
2708 HematiteError::ParseError(format!(
2709 "Invalid integer literal '{}'",
2710 value
2711 ))
2712 },
2713 )?))
2714 }
2715 }
2716 Token::BooleanLiteral(value) => {
2717 self.consume_token(&Token::BooleanLiteral(value.clone()))?;
2718 Ok(LiteralValue::Boolean(value))
2719 }
2720 Token::NullLiteral | Token::Null => {
2721 if token == Token::NullLiteral {
2722 self.consume_token(&Token::NullLiteral)?;
2723 } else {
2724 self.consume_token(&Token::Null)?;
2725 }
2726 Ok(LiteralValue::Null)
2727 }
2728 _ => Err(HematiteError::ParseError(format!(
2729 "Expected DEFAULT literal (NULL, number, string, boolean), found: {:?}",
2730 token
2731 ))),
2732 }
2733 }
2734
2735 fn peek_token(&self) -> Result<Token> {
2736 if self.position < self.tokens.len() {
2737 Ok(self.tokens[self.position].clone())
2738 } else {
2739 Err(HematiteError::ParseError(
2740 "Unexpected end of input".to_string(),
2741 ))
2742 }
2743 }
2744
2745 fn next_token_is(&self, expected: &Token) -> bool {
2746 self.tokens
2747 .get(self.position + 1)
2748 .is_some_and(|token| token == expected)
2749 }
2750
2751 fn consume_token(&mut self, expected: &Token) -> Result<()> {
2752 let token = self.peek_token()?;
2753 if token == *expected {
2754 self.position += 1;
2755 Ok(())
2756 } else if let Token::Identifier(name) = &token {
2757 if let Some(keyword) = uppercase_keyword_match(name, ALL_UPPERCASE_KEYWORDS) {
2758 Err(HematiteError::ParseError(format!(
2759 "Keyword '{}' must be capitalized as '{}'",
2760 name, keyword
2761 )))
2762 } else if let Some(keyword) = token_keyword_name(expected) {
2763 if name.eq_ignore_ascii_case(keyword) {
2764 Err(HematiteError::ParseError(format!(
2765 "Keyword '{}' must be capitalized as '{}'",
2766 name, keyword
2767 )))
2768 } else {
2769 Err(HematiteError::ParseError(format!(
2770 "Expected {:?}, found: {:?}",
2771 expected, token
2772 )))
2773 }
2774 } else {
2775 Err(HematiteError::ParseError(format!(
2776 "Expected {:?}, found: {:?}",
2777 expected, token
2778 )))
2779 }
2780 } else {
2781 Err(HematiteError::ParseError(format!(
2782 "Expected {:?}, found: {:?}",
2783 expected, token
2784 )))
2785 }
2786 }
2787}
2788
2789pub fn parse_condition_fragment(sql: &str) -> Result<Condition> {
2790 let mut lexer = Lexer::new(sql.to_string());
2791 lexer.tokenize()?;
2792 let mut parser = Parser::new(lexer.get_tokens().to_vec());
2793 let condition = parser.parse_or_condition()?;
2794 if parser.position != parser.tokens.len() {
2795 return Err(HematiteError::ParseError(
2796 "Unexpected trailing tokens in CHECK constraint".to_string(),
2797 ));
2798 }
2799 Ok(condition)
2800}
2801
2802fn parse_non_negative_integer_literal(value: &str, label: &str) -> Result<usize> {
2803 value.parse::<usize>().map_err(|_| {
2804 HematiteError::ParseError(format!(
2805 "Expected non-negative integer after {}, found: {}",
2806 label, value
2807 ))
2808 })
2809}
2810
2811fn parse_non_negative_u32_literal(value: &str, label: &str) -> Result<u32> {
2812 value.parse::<u32>().map_err(|_| {
2813 HematiteError::ParseError(format!(
2814 "Expected non-negative integer {} value, found: {}",
2815 label, value
2816 ))
2817 })
2818}
2819
2820fn parse_positive_u32_literal(value: &str, label: &str) -> Result<u32> {
2821 let parsed = value.parse::<u32>().map_err(|_| {
2822 HematiteError::ParseError(format!(
2823 "Expected positive integer {}, found: {}",
2824 label, value
2825 ))
2826 })?;
2827 if parsed == 0 {
2828 return Err(HematiteError::ParseError(format!(
2829 "Expected positive integer {}, found: {}",
2830 label, value
2831 )));
2832 }
2833 Ok(parsed)
2834}
2835
2836const TOP_LEVEL_KEYWORDS: &[&str] = &[
2837 "BEGIN",
2838 "COMMIT",
2839 "ROLLBACK",
2840 "SAVEPOINT",
2841 "RELEASE",
2842 "EXPLAIN",
2843 "DESCRIBE",
2844 "SHOW",
2845 "SELECT",
2846 "UPDATE",
2847 "INSERT",
2848 "DELETE",
2849 "CREATE",
2850 "ALTER",
2851 "DROP",
2852 "WITH",
2853];
2854
2855const ALL_UPPERCASE_KEYWORDS: &[&str] = &[
2856 "BEGIN",
2857 "COMMIT",
2858 "ROLLBACK",
2859 "SAVEPOINT",
2860 "RELEASE",
2861 "SELECT",
2862 "UPDATE",
2863 "FROM",
2864 "INSERT",
2865 "DELETE",
2866 "DROP",
2867 "EXPLAIN",
2868 "DESCRIBE",
2869 "SHOW",
2870 "TABLES",
2871 "VIEWS",
2872 "INDEXES",
2873 "TRIGGERS",
2874 "ALTER",
2875 "ADD",
2876 "IF",
2877 "INTO",
2878 "SET",
2879 "VALUES",
2880 "CREATE",
2881 "VIEW",
2882 "TRIGGER",
2883 "INDEX",
2884 "EXISTS",
2885 "UNION",
2886 "INTERSECT",
2887 "EXCEPT",
2888 "ALL",
2889 "WITH",
2890 "RECURSIVE",
2891 "LEFT",
2892 "RIGHT",
2893 "FULL",
2894 "OUTER",
2895 "INNER",
2896 "JOIN",
2897 "ON",
2898 "AS",
2899 "DISTINCT",
2900 "CAST",
2901 "TABLE",
2902 "COLUMN",
2903 "WHERE",
2904 "GROUP",
2905 "HAVING",
2906 "ORDER",
2907 "BY",
2908 "ASC",
2909 "DESC",
2910 "OVER",
2911 "PARTITION",
2912 "INTERVAL",
2913 "LIMIT",
2914 "OFFSET",
2915 "COUNT",
2916 "SUM",
2917 "AVG",
2918 "MIN",
2919 "MAX",
2920 "INT8",
2921 "INT16",
2922 "INT",
2923 "INT32",
2924 "INT64",
2925 "INT128",
2926 "UINT8",
2927 "UINT16",
2928 "UINT",
2929 "UINT32",
2930 "UINT64",
2931 "UINT128",
2932 "TEXT",
2933 "BOOLEAN",
2934 "FLOAT",
2935 "FLOAT32",
2936 "FLOAT64",
2937 "BOOL",
2938 "DECIMAL",
2939 "BLOB",
2940 "DATE",
2941 "TIME",
2942 "DATETIME",
2943 "ZONE",
2944 "CHAR",
2945 "VARCHAR",
2946 "BINARY",
2947 "VARBINARY",
2948 "ENUM",
2949 "AUTO_INCREMENT",
2950 "UNIQUE",
2951 "PRIMARY",
2952 "KEY",
2953 "DUPLICATE",
2954 "CONSTRAINT",
2955 "CHECK",
2956 "FOREIGN",
2957 "REFERENCES",
2958 "CASCADE",
2959 "RESTRICT",
2960 "RENAME",
2961 "TO",
2962 "AFTER",
2963 "NOT",
2964 "IS",
2965 "NULL",
2966 "DEFAULT",
2967 "IN",
2968 "BETWEEN",
2969 "LIKE",
2970 "CASE",
2971 "WHEN",
2972 "THEN",
2973 "ELSE",
2974 "END",
2975 "AND",
2976 "OR",
2977 "USING",
2978 "ENGINE",
2979 "CHARSET",
2980 "CHARACTER",
2981 "COLLATE",
2982 "BTREE",
2983 "HASH",
2984];
2985
2986const DATA_TYPE_KEYWORDS: &[&str] = &[
2987 "INT8",
2988 "INT16",
2989 "INT",
2990 "INT32",
2991 "INT64",
2992 "INT128",
2993 "UINT8",
2994 "UINT16",
2995 "UINT",
2996 "UINT32",
2997 "UINT64",
2998 "UINT128",
2999 "TEXT",
3000 "BOOLEAN",
3001 "BOOL",
3002 "FLOAT",
3003 "FLOAT32",
3004 "FLOAT64",
3005 "DECIMAL",
3006 "BLOB",
3007 "DATE",
3008 "TIME",
3009 "DATETIME",
3010 "CHAR",
3011 "VARCHAR",
3012 "BINARY",
3013 "VARBINARY",
3014 "ENUM",
3015];
3016
3017fn uppercase_keyword_match<'a>(name: &str, keywords: &'a [&'a str]) -> Option<&'a str> {
3018 keywords
3019 .iter()
3020 .copied()
3021 .find(|keyword| name.eq_ignore_ascii_case(keyword) && name != *keyword)
3022}
3023
3024fn capitalization_hint_for_token(token: &Token) -> Option<String> {
3025 match token {
3026 Token::Identifier(name) => uppercase_keyword_match(name, ALL_UPPERCASE_KEYWORDS)
3027 .map(|keyword| format!("Keyword '{}' must be capitalized as '{}'", name, keyword)),
3028 _ => None,
3029 }
3030}
3031
3032fn token_keyword_name(token: &Token) -> Option<&'static str> {
3033 match token {
3034 Token::Begin => Some("BEGIN"),
3035 Token::Commit => Some("COMMIT"),
3036 Token::Rollback => Some("ROLLBACK"),
3037 Token::Savepoint => Some("SAVEPOINT"),
3038 Token::Release => Some("RELEASE"),
3039 Token::Select => Some("SELECT"),
3040 Token::Update => Some("UPDATE"),
3041 Token::From => Some("FROM"),
3042 Token::Insert => Some("INSERT"),
3043 Token::Delete => Some("DELETE"),
3044 Token::Drop => Some("DROP"),
3045 Token::Explain => Some("EXPLAIN"),
3046 Token::Describe => Some("DESCRIBE"),
3047 Token::Show => Some("SHOW"),
3048 Token::Tables => Some("TABLES"),
3049 Token::Views => Some("VIEWS"),
3050 Token::Indexes => Some("INDEXES"),
3051 Token::Triggers => Some("TRIGGERS"),
3052 Token::Alter => Some("ALTER"),
3053 Token::Add => Some("ADD"),
3054 Token::If => Some("IF"),
3055 Token::Into => Some("INTO"),
3056 Token::Set => Some("SET"),
3057 Token::Values => Some("VALUES"),
3058 Token::Create => Some("CREATE"),
3059 Token::View => Some("VIEW"),
3060 Token::Trigger => Some("TRIGGER"),
3061 Token::Index => Some("INDEX"),
3062 Token::Exists => Some("EXISTS"),
3063 Token::Union => Some("UNION"),
3064 Token::Intersect => Some("INTERSECT"),
3065 Token::Except => Some("EXCEPT"),
3066 Token::All => Some("ALL"),
3067 Token::With => Some("WITH"),
3068 Token::Recursive => Some("RECURSIVE"),
3069 Token::Left => Some("LEFT"),
3070 Token::Right => Some("RIGHT"),
3071 Token::Full => Some("FULL"),
3072 Token::Outer => Some("OUTER"),
3073 Token::Inner => Some("INNER"),
3074 Token::Join => Some("JOIN"),
3075 Token::On => Some("ON"),
3076 Token::As => Some("AS"),
3077 Token::Distinct => Some("DISTINCT"),
3078 Token::Cast => Some("CAST"),
3079 Token::Table => Some("TABLE"),
3080 Token::Column => Some("COLUMN"),
3081 Token::Where => Some("WHERE"),
3082 Token::Group => Some("GROUP"),
3083 Token::Having => Some("HAVING"),
3084 Token::Order => Some("ORDER"),
3085 Token::By => Some("BY"),
3086 Token::Asc => Some("ASC"),
3087 Token::Desc => Some("DESC"),
3088 Token::Over => Some("OVER"),
3089 Token::Partition => Some("PARTITION"),
3090 Token::Interval => Some("INTERVAL"),
3091 Token::Limit => Some("LIMIT"),
3092 Token::Offset => Some("OFFSET"),
3093 Token::Count => Some("COUNT"),
3094 Token::Sum => Some("SUM"),
3095 Token::Avg => Some("AVG"),
3096 Token::Min => Some("MIN"),
3097 Token::Max => Some("MAX"),
3098 Token::Int32 => Some("INT32"),
3099 Token::Text => Some("TEXT"),
3100 Token::Boolean => Some("BOOLEAN"),
3101 Token::Float => Some("FLOAT"),
3102 Token::Float32 => Some("FLOAT32"),
3103 Token::Float64 => Some("FLOAT64"),
3104 Token::Int8 => Some("INT8"),
3105 Token::Int16 => Some("INT16"),
3106 Token::Int64 => Some("INT64"),
3107 Token::Int128 => Some("INT128"),
3108 Token::Int => Some("INT"),
3109 Token::UInt8 => Some("UINT8"),
3110 Token::UInt16 => Some("UINT16"),
3111 Token::UInt64 => Some("UINT64"),
3112 Token::UInt128 => Some("UINT128"),
3113 Token::UInt32 => Some("UINT32"),
3114 Token::UInt => Some("UINT"),
3115 Token::Bool => Some("BOOL"),
3116 Token::Decimal => Some("DECIMAL"),
3117 Token::Blob => Some("BLOB"),
3118 Token::Date => Some("DATE"),
3119 Token::Time => Some("TIME"),
3120 Token::DateTime => Some("DATETIME"),
3121 Token::Zone => Some("ZONE"),
3122 Token::Char => Some("CHAR"),
3123 Token::Varchar => Some("VARCHAR"),
3124 Token::BinaryType => Some("BINARY"),
3125 Token::VarBinary => Some("VARBINARY"),
3126 Token::Enum => Some("ENUM"),
3127 Token::AutoIncrement => Some("AUTO_INCREMENT"),
3128 Token::Unique => Some("UNIQUE"),
3129 Token::Primary => Some("PRIMARY"),
3130 Token::Key => Some("KEY"),
3131 Token::Duplicate => Some("DUPLICATE"),
3132 Token::Constraint => Some("CONSTRAINT"),
3133 Token::Check => Some("CHECK"),
3134 Token::Foreign => Some("FOREIGN"),
3135 Token::References => Some("REFERENCES"),
3136 Token::Cascade => Some("CASCADE"),
3137 Token::Restrict => Some("RESTRICT"),
3138 Token::Rename => Some("RENAME"),
3139 Token::To => Some("TO"),
3140 Token::After => Some("AFTER"),
3141 Token::Not => Some("NOT"),
3142 Token::Is => Some("IS"),
3143 Token::Null => Some("NULL"),
3144 Token::Default => Some("DEFAULT"),
3145 Token::In => Some("IN"),
3146 Token::Between => Some("BETWEEN"),
3147 Token::Like => Some("LIKE"),
3148 Token::Case => Some("CASE"),
3149 Token::When => Some("WHEN"),
3150 Token::Then => Some("THEN"),
3151 Token::Else => Some("ELSE"),
3152 Token::End => Some("END"),
3153 Token::And => Some("AND"),
3154 Token::Or => Some("OR"),
3155 _ => None,
3156 }
3157}
3158
3159fn is_window_only_function_name(name: &str) -> bool {
3160 matches!(
3161 name.to_ascii_uppercase().as_str(),
3162 "ROW_NUMBER" | "RANK" | "DENSE_RANK"
3163 )
3164}