1use datafusion_common::DataFusionError;
24use datafusion_common::config::SqlParserOptions;
25use datafusion_common::{Diagnostic, Span, sql_err};
26use sqlparser::ast::{ExprWithAlias, Ident, OrderByOptions};
27use sqlparser::tokenizer::TokenWithSpan;
28use sqlparser::{
29 ast::{
30 ColumnDef, ColumnOptionDef, ObjectName, OrderByExpr, Query,
31 Statement as SQLStatement, TableConstraint, Value,
32 },
33 dialect::{Dialect, GenericDialect, keywords::Keyword},
34 parser::{Parser, ParserError},
35 tokenizer::{Token, Tokenizer, Word},
36};
37use std::collections::VecDeque;
38use std::fmt;
39
40macro_rules! parser_err {
42 ($MSG:expr $(; diagnostic = $DIAG:expr)?) => {{
43
44 let err = DataFusionError::from(ParserError::ParserError($MSG.to_string()));
45 $(
46 let err = err.with_diagnostic($DIAG);
47 )?
48 Err(err)
49 }};
50}
51
52fn parse_file_type(s: &str) -> Result<String, DataFusionError> {
53 Ok(s.to_uppercase())
54}
55
56#[derive(Debug, Clone, PartialEq, Eq)]
63pub struct ExplainStatement {
64 pub analyze: bool,
66 pub verbose: bool,
68 pub format: Option<String>,
70 pub statement: Box<Statement>,
74}
75
76impl fmt::Display for ExplainStatement {
77 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
78 let Self {
79 analyze,
80 verbose,
81 format,
82 statement,
83 } = self;
84
85 write!(f, "EXPLAIN ")?;
86 if *analyze {
87 write!(f, "ANALYZE ")?;
88 }
89 if *verbose {
90 write!(f, "VERBOSE ")?;
91 }
92 if let Some(format) = format.as_ref() {
93 write!(f, "FORMAT {format} ")?;
94 }
95
96 write!(f, "{statement}")
97 }
98}
99
100#[derive(Debug, Clone, PartialEq, Eq)]
124pub struct CopyToStatement {
125 pub source: CopyToSource,
127 pub target: String,
129 pub partitioned_by: Vec<String>,
131 pub stored_as: Option<String>,
133 pub options: Vec<(String, Value)>,
135}
136
137impl fmt::Display for CopyToStatement {
138 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
139 let Self {
140 source,
141 target,
142 partitioned_by,
143 stored_as,
144 options,
145 ..
146 } = self;
147
148 write!(f, "COPY {source} TO {target}")?;
149 if let Some(file_type) = stored_as {
150 write!(f, " STORED AS {file_type}")?;
151 }
152 if !partitioned_by.is_empty() {
153 write!(f, " PARTITIONED BY ({})", partitioned_by.join(", "))?;
154 }
155
156 if !options.is_empty() {
157 let opts: Vec<_> =
158 options.iter().map(|(k, v)| format!("'{k}' {v}")).collect();
159 write!(f, " OPTIONS ({})", opts.join(", "))?;
160 }
161
162 Ok(())
163 }
164}
165
166#[derive(Debug, Clone, PartialEq, Eq)]
167pub enum CopyToSource {
168 Relation(ObjectName),
170 Query(Box<Query>),
172}
173
174impl fmt::Display for CopyToSource {
175 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
176 match self {
177 CopyToSource::Relation(r) => write!(f, "{r}"),
178 CopyToSource::Query(q) => write!(f, "({q})"),
179 }
180 }
181}
182
183pub(crate) type LexOrdering = Vec<OrderByExpr>;
185
186#[derive(Debug, Clone, PartialEq, Eq)]
211pub struct CreateExternalTable {
212 pub name: ObjectName,
214 pub columns: Vec<ColumnDef>,
216 pub file_type: String,
218 pub location: String,
220 pub table_partition_cols: Vec<String>,
222 pub order_exprs: Vec<LexOrdering>,
224 pub if_not_exists: bool,
226 pub or_replace: bool,
228 pub temporary: bool,
230 pub unbounded: bool,
232 pub options: Vec<(String, Value)>,
234 pub constraints: Vec<TableConstraint>,
236}
237
238impl fmt::Display for CreateExternalTable {
239 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
240 write!(f, "CREATE EXTERNAL TABLE ")?;
241 if self.if_not_exists {
242 write!(f, "IF NOT EXISTS ")?;
243 }
244 write!(f, "{} ", self.name)?;
245 write!(f, "STORED AS {} ", self.file_type)?;
246 if !self.order_exprs.is_empty() {
247 write!(f, "WITH ORDER (")?;
248 let mut first = true;
249 for expr in self.order_exprs.iter().flatten() {
250 if !first {
251 write!(f, ", ")?;
252 }
253 write!(f, "{expr}")?;
254 first = false;
255 }
256 write!(f, ") ")?;
257 }
258 write!(f, "LOCATION {}", self.location)
259 }
260}
261
262#[derive(Debug, Clone, PartialEq, Eq)]
264pub enum ResetStatement {
265 Variable(ObjectName),
267}
268
269impl fmt::Display for ResetStatement {
270 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
271 match self {
272 ResetStatement::Variable(name) => write!(f, "RESET {name}"),
273 }
274 }
275}
276
277#[derive(Debug, Clone, PartialEq, Eq)]
285pub enum Statement {
286 Statement(Box<SQLStatement>),
288 CreateExternalTable(CreateExternalTable),
290 CopyTo(CopyToStatement),
292 Explain(ExplainStatement),
294 Reset(ResetStatement),
296}
297
298impl fmt::Display for Statement {
299 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
300 match self {
301 Statement::Statement(stmt) => write!(f, "{stmt}"),
302 Statement::CreateExternalTable(stmt) => write!(f, "{stmt}"),
303 Statement::CopyTo(stmt) => write!(f, "{stmt}"),
304 Statement::Explain(stmt) => write!(f, "{stmt}"),
305 Statement::Reset(stmt) => write!(f, "{stmt}"),
306 }
307 }
308}
309
310fn ensure_not_set<T>(field: &Option<T>, name: &str) -> Result<(), DataFusionError> {
311 if field.is_some() {
312 parser_err!(format!("{name} specified more than once",))?
313 }
314 Ok(())
315}
316
317pub struct DFParser<'a> {
326 pub parser: Parser<'a>,
327 options: SqlParserOptions,
328}
329
330const DEFAULT_RECURSION_LIMIT: usize = 50;
332const DEFAULT_DIALECT: GenericDialect = GenericDialect {};
333
334pub struct DFParserBuilder<'a, 'b> {
367 input: ParserInput<'a>,
369 dialect: &'b dyn Dialect,
371 recursion_limit: usize,
373}
374
375pub enum ParserInput<'a> {
377 Sql(&'a str),
380 Tokens(Vec<TokenWithSpan>),
382}
383
384impl<'a> From<&'a str> for ParserInput<'a> {
385 fn from(sql: &'a str) -> Self {
386 Self::Sql(sql)
387 }
388}
389
390impl From<Vec<TokenWithSpan>> for ParserInput<'static> {
391 fn from(tokens: Vec<TokenWithSpan>) -> Self {
392 Self::Tokens(tokens)
393 }
394}
395
396impl<'a, 'b> DFParserBuilder<'a, 'b> {
397 pub fn new(input: impl Into<ParserInput<'a>>) -> Self {
400 Self {
401 input: input.into(),
402 dialect: &DEFAULT_DIALECT,
403 recursion_limit: DEFAULT_RECURSION_LIMIT,
404 }
405 }
406
407 pub fn with_dialect(mut self, dialect: &'b dyn Dialect) -> Self {
409 self.dialect = dialect;
410 self
411 }
412
413 pub fn with_recursion_limit(mut self, recursion_limit: usize) -> Self {
415 self.recursion_limit = recursion_limit;
416 self
417 }
418
419 pub fn build(self) -> Result<DFParser<'b>, DataFusionError> {
421 let tokens = match self.input {
422 ParserInput::Tokens(tokens) => tokens,
423 ParserInput::Sql(sql) => {
424 let mut tokenizer = Tokenizer::new(self.dialect, sql);
425 tokenizer
427 .tokenize_with_location()
428 .map_err(ParserError::from)?
429 }
430 };
431
432 Ok(DFParser {
433 parser: Parser::new(self.dialect)
434 .with_tokens_with_locations(tokens)
435 .with_recursion_limit(self.recursion_limit),
436 options: SqlParserOptions {
437 recursion_limit: self.recursion_limit,
438 ..Default::default()
439 },
440 })
441 }
442}
443
444impl<'a> DFParser<'a> {
445 #[deprecated(since = "46.0.0", note = "DFParserBuilder")]
446 pub fn new(sql: &'a str) -> Result<Self, DataFusionError> {
447 DFParserBuilder::new(sql).build()
448 }
449
450 #[deprecated(since = "46.0.0", note = "DFParserBuilder")]
451 pub fn new_with_dialect(
452 sql: &'a str,
453 dialect: &'a dyn Dialect,
454 ) -> Result<Self, DataFusionError> {
455 DFParserBuilder::new(sql).with_dialect(dialect).build()
456 }
457
458 pub fn parse_sql(sql: &'a str) -> Result<VecDeque<Statement>, DataFusionError> {
461 let mut parser = DFParserBuilder::new(sql).build()?;
462
463 parser.parse_statements()
464 }
465
466 pub fn parse_sql_with_dialect(
469 sql: &str,
470 dialect: &dyn Dialect,
471 ) -> Result<VecDeque<Statement>, DataFusionError> {
472 let mut parser = DFParserBuilder::new(sql).with_dialect(dialect).build()?;
473 parser.parse_statements()
474 }
475
476 pub fn parse_sql_into_expr(sql: &str) -> Result<ExprWithAlias, DataFusionError> {
477 DFParserBuilder::new(sql).build()?.parse_into_expr()
478 }
479
480 pub fn parse_sql_into_expr_with_dialect(
481 sql: &str,
482 dialect: &dyn Dialect,
483 ) -> Result<ExprWithAlias, DataFusionError> {
484 DFParserBuilder::new(sql)
485 .with_dialect(dialect)
486 .build()?
487 .parse_into_expr()
488 }
489
490 pub fn parse_statements(&mut self) -> Result<VecDeque<Statement>, DataFusionError> {
492 let mut stmts = VecDeque::new();
493 let mut expecting_statement_delimiter = false;
494 loop {
495 while self.parser.consume_token(&Token::SemiColon) {
497 expecting_statement_delimiter = false;
498 }
499
500 if self.parser.peek_token() == Token::EOF {
501 break;
502 }
503 if expecting_statement_delimiter {
504 return self.expected("end of statement", &self.parser.peek_token());
505 }
506
507 let statement = self.parse_statement()?;
508 stmts.push_back(statement);
509 expecting_statement_delimiter = true;
510 }
511 Ok(stmts)
512 }
513
514 fn expected<T>(
516 &self,
517 expected: &str,
518 found: &TokenWithSpan,
519 ) -> Result<T, DataFusionError> {
520 let sql_parser_span = found.span;
521 let span = Span::try_from_sqlparser_span(sql_parser_span);
522 let diagnostic = Diagnostic::new_error(
523 format!("Expected: {expected}, found: {found}{}", found.span.start),
524 span,
525 );
526 parser_err!(
527 format!("Expected: {expected}, found: {found}{}", found.span.start);
528 diagnostic=
529 diagnostic
530 )
531 }
532
533 fn expect_token(
534 &mut self,
535 expected: &str,
536 token: &Token,
537 ) -> Result<(), DataFusionError> {
538 let next_token = self.parser.peek_token_ref();
539 if next_token.token != *token {
540 self.expected(expected, next_token)
541 } else {
542 Ok(())
543 }
544 }
545
546 pub fn parse_statement(&mut self) -> Result<Statement, DataFusionError> {
548 match self.parser.peek_token().token {
549 Token::Word(w) => {
550 match w.keyword {
551 Keyword::CREATE => {
552 self.parser.next_token(); self.parse_create()
554 }
555 Keyword::COPY => {
556 if let Token::Word(w) = self.parser.peek_nth_token(1).token {
557 if w.keyword == Keyword::INTO {
559 return self.parse_and_handle_statement();
560 }
561 }
562 self.parser.next_token(); self.parse_copy()
564 }
565 Keyword::EXPLAIN => {
566 self.parser.next_token(); self.parse_explain()
568 }
569 Keyword::RESET => {
570 self.parser.next_token(); self.parse_reset()
572 }
573 _ => {
574 self.parse_and_handle_statement()
576 }
577 }
578 }
579 _ => {
580 self.parse_and_handle_statement()
582 }
583 }
584 }
585
586 pub fn parse_expr(&mut self) -> Result<ExprWithAlias, DataFusionError> {
587 if let Token::Word(w) = self.parser.peek_token().token {
588 match w.keyword {
589 Keyword::CREATE | Keyword::COPY | Keyword::EXPLAIN => {
590 return parser_err!("Unsupported command in expression")?;
591 }
592 _ => {}
593 }
594 }
595
596 Ok(self.parser.parse_expr_with_alias()?)
597 }
598
599 pub fn parse_into_expr(&mut self) -> Result<ExprWithAlias, DataFusionError> {
604 let expr = self.parse_expr()?;
605 self.expect_token("end of expression", &Token::EOF)?;
606 Ok(expr)
607 }
608
609 fn parse_and_handle_statement(&mut self) -> Result<Statement, DataFusionError> {
611 self.parser
612 .parse_statement()
613 .map(|stmt| Statement::Statement(Box::from(stmt)))
614 .map_err(|e| match e {
615 ParserError::RecursionLimitExceeded => DataFusionError::SQL(
616 Box::new(ParserError::RecursionLimitExceeded),
617 Some(format!(
618 " (current limit: {})",
619 self.options.recursion_limit
620 )),
621 ),
622 other => DataFusionError::SQL(Box::new(other), None),
623 })
624 }
625
626 pub fn parse_copy(&mut self) -> Result<Statement, DataFusionError> {
628 let source = if self.parser.consume_token(&Token::LParen) {
630 let query = self.parser.parse_query()?;
631 self.parser.expect_token(&Token::RParen)?;
632 CopyToSource::Query(query)
633 } else {
634 let table_name = self.parser.parse_object_name(true)?;
636 CopyToSource::Relation(table_name)
637 };
638
639 #[derive(Default)]
640 struct Builder {
641 stored_as: Option<String>,
642 target: Option<String>,
643 partitioned_by: Option<Vec<String>>,
644 options: Option<Vec<(String, Value)>>,
645 }
646
647 let mut builder = Builder::default();
648
649 loop {
650 if let Some(keyword) = self.parser.parse_one_of_keywords(&[
651 Keyword::STORED,
652 Keyword::TO,
653 Keyword::PARTITIONED,
654 Keyword::OPTIONS,
655 Keyword::WITH,
656 ]) {
657 match keyword {
658 Keyword::STORED => {
659 self.parser.expect_keyword(Keyword::AS)?;
660 ensure_not_set(&builder.stored_as, "STORED AS")?;
661 builder.stored_as = Some(self.parse_file_format()?);
662 }
663 Keyword::TO => {
664 ensure_not_set(&builder.target, "TO")?;
665 builder.target = Some(self.parser.parse_literal_string()?);
666 }
667 Keyword::WITH => {
668 self.parser.expect_keyword(Keyword::HEADER)?;
669 self.parser.expect_keyword(Keyword::ROW)?;
670 return parser_err!(
671 "WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')"
672 )?;
673 }
674 Keyword::PARTITIONED => {
675 self.parser.expect_keyword(Keyword::BY)?;
676 ensure_not_set(&builder.partitioned_by, "PARTITIONED BY")?;
677 builder.partitioned_by = Some(self.parse_partitions()?);
678 }
679 Keyword::OPTIONS => {
680 ensure_not_set(&builder.options, "OPTIONS")?;
681 builder.options = Some(self.parse_value_options()?);
682 }
683 _ => {
684 unreachable!()
685 }
686 }
687 } else {
688 let token = self.parser.peek_token();
689 if token == Token::EOF || token == Token::SemiColon {
690 break;
691 } else {
692 return self.expected("end of statement or ;", &token)?;
693 }
694 }
695 }
696
697 let Some(target) = builder.target else {
698 return parser_err!("Missing TO clause in COPY statement")?;
699 };
700
701 Ok(Statement::CopyTo(CopyToStatement {
702 source,
703 target,
704 partitioned_by: builder.partitioned_by.unwrap_or(vec![]),
705 stored_as: builder.stored_as,
706 options: builder.options.unwrap_or(vec![]),
707 }))
708 }
709
710 pub fn parse_option_key(&mut self) -> Result<String, DataFusionError> {
717 let next_token = self.parser.next_token();
718 match next_token.token {
719 Token::Word(Word { value, .. }) => {
720 let mut parts = vec![value];
721 while self.parser.consume_token(&Token::Period) {
722 let next_token = self.parser.next_token();
723 if let Token::Word(Word { value, .. }) = next_token.token {
724 parts.push(value);
725 } else {
726 return self.expected("key name", &next_token);
730 }
731 }
732 Ok(parts.join("."))
733 }
734 Token::SingleQuotedString(s) => Ok(s),
735 Token::DoubleQuotedString(s) => Ok(s),
736 Token::EscapedStringLiteral(s) => Ok(s),
737 _ => self.expected("key name", &next_token),
738 }
739 }
740
741 pub fn parse_option_value(&mut self) -> Result<Value, DataFusionError> {
748 let next_token = self.parser.next_token();
749 match next_token.token {
750 Token::Word(word) => Ok(Value::SingleQuotedString(word.value)),
752 Token::SingleQuotedString(s) => Ok(Value::SingleQuotedString(s)),
753 Token::DoubleQuotedString(s) => Ok(Value::DoubleQuotedString(s)),
754 Token::EscapedStringLiteral(s) => Ok(Value::EscapedStringLiteral(s)),
755 Token::Number(n, l) => Ok(Value::Number(n, l)),
756 _ => self.expected("string or numeric value", &next_token),
757 }
758 }
759
760 pub fn parse_explain(&mut self) -> Result<Statement, DataFusionError> {
762 let analyze = self.parser.parse_keyword(Keyword::ANALYZE);
763 let verbose = self.parser.parse_keyword(Keyword::VERBOSE);
764 let format = self.parse_explain_format()?;
765
766 let statement = self.parse_statement()?;
767
768 Ok(Statement::Explain(ExplainStatement {
769 statement: Box::new(statement),
770 analyze,
771 verbose,
772 format,
773 }))
774 }
775
776 pub fn parse_reset(&mut self) -> Result<Statement, DataFusionError> {
778 let mut parts: Vec<String> = Vec::new();
779 let mut expecting_segment = true;
780
781 loop {
782 let next_token = self.parser.peek_token();
783 match &next_token.token {
784 Token::Word(word) => {
785 self.parser.next_token();
786 parts.push(word.value.clone());
787 expecting_segment = false;
788 }
789 Token::SingleQuotedString(s)
790 | Token::DoubleQuotedString(s)
791 | Token::EscapedStringLiteral(s) => {
792 self.parser.next_token();
793 parts.push(s.clone());
794 expecting_segment = false;
795 }
796 Token::Period => {
797 self.parser.next_token();
798 if expecting_segment || parts.is_empty() {
799 return self.expected("configuration parameter", &next_token);
800 }
801 expecting_segment = true;
802 }
803 Token::EOF | Token::SemiColon => break,
804 _ => return self.expected("configuration parameter", &next_token),
805 }
806 }
807
808 if parts.is_empty() || expecting_segment {
809 return self.expected("configuration parameter", &self.parser.peek_token());
810 }
811
812 let idents: Vec<Ident> = parts.into_iter().map(Ident::new).collect();
813 let variable = ObjectName::from(idents);
814 Ok(Statement::Reset(ResetStatement::Variable(variable)))
815 }
816
817 pub fn parse_explain_format(&mut self) -> Result<Option<String>, DataFusionError> {
818 if !self.parser.parse_keyword(Keyword::FORMAT) {
819 return Ok(None);
820 }
821
822 let next_token = self.parser.next_token();
823 let format = match next_token.token {
824 Token::Word(w) => Ok(w.value),
825 Token::SingleQuotedString(w) => Ok(w),
826 Token::DoubleQuotedString(w) => Ok(w),
827 _ => self.expected("an explain format such as TREE", &next_token),
828 }?;
829 Ok(Some(format))
830 }
831
832 pub fn parse_create(&mut self) -> Result<Statement, DataFusionError> {
834 if self
836 .parser
837 .parse_keywords(&[Keyword::OR, Keyword::REPLACE, Keyword::EXTERNAL])
838 {
839 self.parse_create_external_table(false, true)
840 } else if self.parser.parse_keywords(&[
841 Keyword::OR,
842 Keyword::REPLACE,
843 Keyword::UNBOUNDED,
844 Keyword::EXTERNAL,
845 ]) {
846 self.parse_create_external_table(true, true)
847 } else if self.parser.parse_keyword(Keyword::EXTERNAL) {
848 self.parse_create_external_table(false, false)
849 } else if self
850 .parser
851 .parse_keywords(&[Keyword::UNBOUNDED, Keyword::EXTERNAL])
852 {
853 self.parse_create_external_table(true, false)
854 } else {
855 Ok(Statement::Statement(Box::from(self.parser.parse_create()?)))
856 }
857 }
858
859 fn parse_partitions(&mut self) -> Result<Vec<String>, DataFusionError> {
860 let mut partitions: Vec<String> = vec![];
861 if !self.parser.consume_token(&Token::LParen)
862 || self.parser.consume_token(&Token::RParen)
863 {
864 return Ok(partitions);
865 }
866
867 loop {
868 if let Token::Word(_) = self.parser.peek_token().token {
869 let identifier = self.parser.parse_identifier()?;
870 partitions.push(identifier.to_string());
871 } else {
872 return self.expected("partition name", &self.parser.peek_token());
873 }
874 let comma = self.parser.consume_token(&Token::Comma);
875 if self.parser.consume_token(&Token::RParen) {
876 break;
878 } else if !comma {
879 return self.expected(
880 "',' or ')' after partition definition",
881 &self.parser.peek_token(),
882 );
883 }
884 }
885 Ok(partitions)
886 }
887
888 pub fn parse_order_by_exprs(&mut self) -> Result<Vec<OrderByExpr>, DataFusionError> {
890 let mut values = vec![];
891 self.parser.expect_token(&Token::LParen)?;
892 loop {
893 values.push(self.parse_order_by_expr()?);
894 if !self.parser.consume_token(&Token::Comma) {
895 self.parser.expect_token(&Token::RParen)?;
896 return Ok(values);
897 }
898 }
899 }
900
901 pub fn parse_order_by_expr(&mut self) -> Result<OrderByExpr, DataFusionError> {
903 let expr = self.parser.parse_expr()?;
904
905 let asc = if self.parser.parse_keyword(Keyword::ASC) {
906 Some(true)
907 } else if self.parser.parse_keyword(Keyword::DESC) {
908 Some(false)
909 } else {
910 None
911 };
912
913 let nulls_first = if self
914 .parser
915 .parse_keywords(&[Keyword::NULLS, Keyword::FIRST])
916 {
917 Some(true)
918 } else if self.parser.parse_keywords(&[Keyword::NULLS, Keyword::LAST]) {
919 Some(false)
920 } else {
921 None
922 };
923
924 Ok(OrderByExpr {
925 expr,
926 options: OrderByOptions { asc, nulls_first },
927 with_fill: None,
928 })
929 }
930
931 fn parse_columns(
933 &mut self,
934 ) -> Result<(Vec<ColumnDef>, Vec<TableConstraint>), DataFusionError> {
935 let mut columns = vec![];
936 let mut constraints = vec![];
937 if !self.parser.consume_token(&Token::LParen)
938 || self.parser.consume_token(&Token::RParen)
939 {
940 return Ok((columns, constraints));
941 }
942
943 loop {
944 if let Some(constraint) = self.parser.parse_optional_table_constraint()? {
945 constraints.push(constraint);
946 } else if let Token::Word(_) = self.parser.peek_token().token {
947 let column_def = self.parse_column_def()?;
948 columns.push(column_def);
949 } else {
950 return self.expected(
951 "column name or constraint definition",
952 &self.parser.peek_token(),
953 );
954 }
955 let comma = self.parser.consume_token(&Token::Comma);
956 if self.parser.consume_token(&Token::RParen) {
957 break;
959 } else if !comma {
960 return self.expected(
961 "',' or ')' after column definition",
962 &self.parser.peek_token(),
963 );
964 }
965 }
966
967 Ok((columns, constraints))
968 }
969
970 fn parse_column_def(&mut self) -> Result<ColumnDef, DataFusionError> {
971 let name = self.parser.parse_identifier()?;
972 let data_type = self.parser.parse_data_type()?;
973 let mut options = vec![];
974 loop {
975 if self.parser.parse_keyword(Keyword::CONSTRAINT) {
976 let name = Some(self.parser.parse_identifier()?);
977 if let Some(option) = self.parser.parse_optional_column_option()? {
978 options.push(ColumnOptionDef { name, option });
979 } else {
980 return self.expected(
981 "constraint details after CONSTRAINT <name>",
982 &self.parser.peek_token(),
983 );
984 }
985 } else if let Some(option) = self.parser.parse_optional_column_option()? {
986 options.push(ColumnOptionDef { name: None, option });
987 } else {
988 break;
989 };
990 }
991 Ok(ColumnDef {
992 name,
993 data_type,
994 options,
995 })
996 }
997
998 fn parse_create_external_table(
999 &mut self,
1000 unbounded: bool,
1001 or_replace: bool,
1002 ) -> Result<Statement, DataFusionError> {
1003 let temporary = self
1004 .parser
1005 .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY])
1006 .is_some();
1007
1008 self.parser.expect_keyword(Keyword::TABLE)?;
1009 let if_not_exists =
1010 self.parser
1011 .parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]);
1012
1013 if if_not_exists && or_replace {
1014 return parser_err!("'IF NOT EXISTS' cannot coexist with 'REPLACE'");
1015 }
1016
1017 let table_name = self.parser.parse_object_name(true)?;
1018 let (mut columns, constraints) = self.parse_columns()?;
1019
1020 #[derive(Default)]
1021 struct Builder {
1022 file_type: Option<String>,
1023 location: Option<String>,
1024 table_partition_cols: Option<Vec<String>>,
1025 order_exprs: Vec<LexOrdering>,
1026 options: Option<Vec<(String, Value)>>,
1027 }
1028 let mut builder = Builder::default();
1029
1030 loop {
1031 if let Some(keyword) = self.parser.parse_one_of_keywords(&[
1032 Keyword::STORED,
1033 Keyword::LOCATION,
1034 Keyword::WITH,
1035 Keyword::DELIMITER,
1036 Keyword::COMPRESSION,
1037 Keyword::PARTITIONED,
1038 Keyword::OPTIONS,
1039 ]) {
1040 match keyword {
1041 Keyword::STORED => {
1042 self.parser.expect_keyword(Keyword::AS)?;
1043 ensure_not_set(&builder.file_type, "STORED AS")?;
1044 builder.file_type = Some(self.parse_file_format()?);
1045 }
1046 Keyword::LOCATION => {
1047 ensure_not_set(&builder.location, "LOCATION")?;
1048 builder.location = Some(self.parser.parse_literal_string()?);
1049 }
1050 Keyword::WITH => {
1051 if self.parser.parse_keyword(Keyword::ORDER) {
1052 builder.order_exprs.push(self.parse_order_by_exprs()?);
1053 } else {
1054 self.parser.expect_keyword(Keyword::HEADER)?;
1055 self.parser.expect_keyword(Keyword::ROW)?;
1056 return parser_err!(
1057 "WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS (format.has_header true)"
1058 )?;
1059 }
1060 }
1061 Keyword::DELIMITER => {
1062 return parser_err!(
1063 "DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS (format.delimiter ',')"
1064 )?;
1065 }
1066 Keyword::COMPRESSION => {
1067 self.parser.expect_keyword(Keyword::TYPE)?;
1068 return parser_err!(
1069 "COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS (format.compression gzip)"
1070 )?;
1071 }
1072 Keyword::PARTITIONED => {
1073 self.parser.expect_keyword(Keyword::BY)?;
1074 ensure_not_set(&builder.table_partition_cols, "PARTITIONED BY")?;
1075 let peeked = self.parser.peek_nth_token(2);
1080 if peeked == Token::Comma || peeked == Token::RParen {
1081 builder.table_partition_cols = Some(self.parse_partitions()?)
1083 } else {
1084 let (cols, cons) = self.parse_columns()?;
1086 builder.table_partition_cols = Some(
1087 cols.iter().map(|col| col.name.to_string()).collect(),
1088 );
1089
1090 columns.extend(cols);
1091
1092 if !cons.is_empty() {
1093 return sql_err!(ParserError::ParserError(
1094 "Constraints on Partition Columns are not supported"
1095 .to_string(),
1096 ));
1097 }
1098 }
1099 }
1100 Keyword::OPTIONS => {
1101 ensure_not_set(&builder.options, "OPTIONS")?;
1102 builder.options = Some(self.parse_value_options()?);
1103 }
1104 _ => {
1105 unreachable!()
1106 }
1107 }
1108 } else {
1109 let token = self.parser.peek_token();
1110 if token == Token::EOF || token == Token::SemiColon {
1111 break;
1112 } else {
1113 return self.expected("end of statement or ;", &token)?;
1114 }
1115 }
1116 }
1117
1118 if builder.file_type.is_none() {
1120 return sql_err!(ParserError::ParserError(
1121 "Missing STORED AS clause in CREATE EXTERNAL TABLE statement".into(),
1122 ));
1123 }
1124 if builder.location.is_none() {
1125 return sql_err!(ParserError::ParserError(
1126 "Missing LOCATION clause in CREATE EXTERNAL TABLE statement".into(),
1127 ));
1128 }
1129
1130 let create = CreateExternalTable {
1131 name: table_name,
1132 columns,
1133 file_type: builder.file_type.unwrap(),
1134 location: builder.location.unwrap(),
1135 table_partition_cols: builder.table_partition_cols.unwrap_or(vec![]),
1136 order_exprs: builder.order_exprs,
1137 if_not_exists,
1138 or_replace,
1139 temporary,
1140 unbounded,
1141 options: builder.options.unwrap_or(Vec::new()),
1142 constraints,
1143 };
1144 Ok(Statement::CreateExternalTable(create))
1145 }
1146
1147 fn parse_file_format(&mut self) -> Result<String, DataFusionError> {
1149 let token = self.parser.next_token();
1150 match &token.token {
1151 Token::Word(w) => parse_file_type(&w.value),
1152 _ => self.expected("one of ARROW, PARQUET, NDJSON, or CSV", &token),
1153 }
1154 }
1155
1156 fn parse_value_options(&mut self) -> Result<Vec<(String, Value)>, DataFusionError> {
1161 let mut options = vec![];
1162 self.parser.expect_token(&Token::LParen)?;
1163
1164 loop {
1165 let key = self.parse_option_key()?;
1166 let value = self.parse_option_value()?;
1167 options.push((key, value));
1168 let comma = self.parser.consume_token(&Token::Comma);
1169 if self.parser.consume_token(&Token::RParen) {
1170 break;
1172 } else if !comma {
1173 return self.expected(
1174 "',' or ')' after option definition",
1175 &self.parser.peek_token(),
1176 );
1177 }
1178 }
1179 Ok(options)
1180 }
1181}
1182
1183#[cfg(test)]
1184mod tests {
1185 use super::*;
1186 use datafusion_common::assert_contains;
1187 use sqlparser::ast::Expr::Identifier;
1188 use sqlparser::ast::{
1189 BinaryOperator, DataType, ExactNumberInfo, Expr, Ident, ValueWithSpan,
1190 };
1191 use sqlparser::dialect::SnowflakeDialect;
1192 use sqlparser::tokenizer::{Location, Span, Whitespace};
1193
1194 fn expect_parse_ok(sql: &str, expected: Statement) -> Result<(), DataFusionError> {
1195 let statements = DFParser::parse_sql(sql)?;
1196 assert_eq!(
1197 statements.len(),
1198 1,
1199 "Expected to parse exactly one statement"
1200 );
1201 assert_eq!(statements[0], expected, "actual:\n{:#?}", statements[0]);
1202 Ok(())
1203 }
1204
1205 fn expect_parse_error(sql: &str, expected_error: &str) {
1207 match DFParser::parse_sql(sql) {
1208 Ok(statements) => {
1209 panic!(
1210 "Expected parse error for '{sql}', but was successful: {statements:?}"
1211 );
1212 }
1213 Err(e) => {
1214 let error_message = e.to_string();
1215 assert!(
1216 error_message.contains(expected_error),
1217 "Expected error '{expected_error}' not found in actual error '{error_message}'"
1218 );
1219 }
1220 }
1221 }
1222
1223 fn make_column_def(name: impl Into<String>, data_type: DataType) -> ColumnDef {
1224 ColumnDef {
1225 name: Ident {
1226 value: name.into(),
1227 quote_style: None,
1228 span: Span::empty(),
1229 },
1230 data_type,
1231 options: vec![],
1232 }
1233 }
1234
1235 #[test]
1236 fn create_external_table() -> Result<(), DataFusionError> {
1237 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'";
1239 let display = None;
1240 let name = ObjectName::from(vec![Ident::from("t")]);
1241 let expected = Statement::CreateExternalTable(CreateExternalTable {
1242 name: name.clone(),
1243 columns: vec![make_column_def("c1", DataType::Int(display))],
1244 file_type: "CSV".to_string(),
1245 location: "foo.csv".into(),
1246 table_partition_cols: vec![],
1247 order_exprs: vec![],
1248 if_not_exists: false,
1249 or_replace: false,
1250 temporary: false,
1251 unbounded: false,
1252 options: vec![],
1253 constraints: vec![],
1254 });
1255 expect_parse_ok(sql, expected)?;
1256
1257 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' ";
1259 let expected = Statement::CreateExternalTable(CreateExternalTable {
1260 name: name.clone(),
1261 columns: vec![make_column_def("c1", DataType::Int(None))],
1262 file_type: "CSV".to_string(),
1263 location: "foo.csv".into(),
1264 table_partition_cols: vec![],
1265 order_exprs: vec![],
1266 if_not_exists: false,
1267 or_replace: false,
1268 temporary: false,
1269 unbounded: false,
1270 options: vec![],
1271 constraints: vec![],
1272 });
1273 expect_parse_ok(sql, expected)?;
1274
1275 let sql =
1277 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' ;";
1278 let expected = Statement::CreateExternalTable(CreateExternalTable {
1279 name: name.clone(),
1280 columns: vec![make_column_def("c1", DataType::Int(None))],
1281 file_type: "CSV".to_string(),
1282 location: "foo.csv".into(),
1283 table_partition_cols: vec![],
1284 order_exprs: vec![],
1285 if_not_exists: false,
1286 or_replace: false,
1287 temporary: false,
1288 unbounded: false,
1289 options: vec![],
1290 constraints: vec![],
1291 });
1292 expect_parse_ok(sql, expected)?;
1293
1294 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS (format.delimiter '|')";
1296 let display = None;
1297 let expected = Statement::CreateExternalTable(CreateExternalTable {
1298 name: name.clone(),
1299 columns: vec![make_column_def("c1", DataType::Int(display))],
1300 file_type: "CSV".to_string(),
1301 location: "foo.csv".into(),
1302 table_partition_cols: vec![],
1303 order_exprs: vec![],
1304 if_not_exists: false,
1305 or_replace: false,
1306 temporary: false,
1307 unbounded: false,
1308 options: vec![(
1309 "format.delimiter".into(),
1310 Value::SingleQuotedString("|".into()),
1311 )],
1312 constraints: vec![],
1313 });
1314 expect_parse_ok(sql, expected)?;
1315
1316 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1, p2) LOCATION 'foo.csv'";
1318 let display = None;
1319 let expected = Statement::CreateExternalTable(CreateExternalTable {
1320 name: name.clone(),
1321 columns: vec![make_column_def("c1", DataType::Int(display))],
1322 file_type: "CSV".to_string(),
1323 location: "foo.csv".into(),
1324 table_partition_cols: vec!["p1".to_string(), "p2".to_string()],
1325 order_exprs: vec![],
1326 if_not_exists: false,
1327 or_replace: false,
1328 temporary: false,
1329 unbounded: false,
1330 options: vec![],
1331 constraints: vec![],
1332 });
1333 expect_parse_ok(sql, expected)?;
1334
1335 let sqls =
1337 vec![
1338 ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1339 ('format.compression' 'GZIP')", "GZIP"),
1340 ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1341 ('format.compression' 'BZIP2')", "BZIP2"),
1342 ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1343 ('format.compression' 'XZ')", "XZ"),
1344 ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1345 ('format.compression' 'ZSTD')", "ZSTD"),
1346 ];
1347 for (sql, compression) in sqls {
1348 let expected = Statement::CreateExternalTable(CreateExternalTable {
1349 name: name.clone(),
1350 columns: vec![make_column_def("c1", DataType::Int(display))],
1351 file_type: "CSV".to_string(),
1352 location: "foo.csv".into(),
1353 table_partition_cols: vec![],
1354 order_exprs: vec![],
1355 if_not_exists: false,
1356 or_replace: false,
1357 temporary: false,
1358 unbounded: false,
1359 options: vec![(
1360 "format.compression".into(),
1361 Value::SingleQuotedString(compression.into()),
1362 )],
1363 constraints: vec![],
1364 });
1365 expect_parse_ok(sql, expected)?;
1366 }
1367
1368 let sql = "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'";
1370 let expected = Statement::CreateExternalTable(CreateExternalTable {
1371 name: name.clone(),
1372 columns: vec![],
1373 file_type: "PARQUET".to_string(),
1374 location: "foo.parquet".into(),
1375 table_partition_cols: vec![],
1376 order_exprs: vec![],
1377 if_not_exists: false,
1378 or_replace: false,
1379 temporary: false,
1380 unbounded: false,
1381 options: vec![],
1382 constraints: vec![],
1383 });
1384 expect_parse_ok(sql, expected)?;
1385
1386 let sql = "CREATE EXTERNAL TABLE t STORED AS parqueT LOCATION 'foo.parquet'";
1388 let expected = Statement::CreateExternalTable(CreateExternalTable {
1389 name: name.clone(),
1390 columns: vec![],
1391 file_type: "PARQUET".to_string(),
1392 location: "foo.parquet".into(),
1393 table_partition_cols: vec![],
1394 order_exprs: vec![],
1395 if_not_exists: false,
1396 or_replace: false,
1397 temporary: false,
1398 unbounded: false,
1399 options: vec![],
1400 constraints: vec![],
1401 });
1402 expect_parse_ok(sql, expected)?;
1403
1404 let sql = "CREATE EXTERNAL TABLE t STORED AS AVRO LOCATION 'foo.avro'";
1406 let expected = Statement::CreateExternalTable(CreateExternalTable {
1407 name: name.clone(),
1408 columns: vec![],
1409 file_type: "AVRO".to_string(),
1410 location: "foo.avro".into(),
1411 table_partition_cols: vec![],
1412 order_exprs: vec![],
1413 if_not_exists: false,
1414 or_replace: false,
1415 temporary: false,
1416 unbounded: false,
1417 options: vec![],
1418 constraints: vec![],
1419 });
1420 expect_parse_ok(sql, expected)?;
1421
1422 let sql = "CREATE EXTERNAL TABLE IF NOT EXISTS t STORED AS PARQUET LOCATION 'foo.parquet'";
1424 let expected = Statement::CreateExternalTable(CreateExternalTable {
1425 name: name.clone(),
1426 columns: vec![],
1427 file_type: "PARQUET".to_string(),
1428 location: "foo.parquet".into(),
1429 table_partition_cols: vec![],
1430 order_exprs: vec![],
1431 if_not_exists: true,
1432 or_replace: false,
1433 temporary: false,
1434 unbounded: false,
1435 options: vec![],
1436 constraints: vec![],
1437 });
1438 expect_parse_ok(sql, expected)?;
1439
1440 let sql =
1442 "CREATE OR REPLACE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'";
1443 let expected = Statement::CreateExternalTable(CreateExternalTable {
1444 name: name.clone(),
1445 columns: vec![],
1446 file_type: "PARQUET".to_string(),
1447 location: "foo.parquet".into(),
1448 table_partition_cols: vec![],
1449 order_exprs: vec![],
1450 if_not_exists: false,
1451 or_replace: true,
1452 temporary: false,
1453 unbounded: false,
1454 options: vec![],
1455 constraints: vec![],
1456 });
1457 expect_parse_ok(sql, expected)?;
1458
1459 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int) LOCATION 'foo.csv'";
1461 let expected = Statement::CreateExternalTable(CreateExternalTable {
1462 name: name.clone(),
1463 columns: vec![
1464 make_column_def("c1", DataType::Int(None)),
1465 make_column_def("p1", DataType::Int(None)),
1466 ],
1467 file_type: "CSV".to_string(),
1468 location: "foo.csv".into(),
1469 table_partition_cols: vec!["p1".to_string()],
1470 order_exprs: vec![],
1471 if_not_exists: false,
1472 or_replace: false,
1473 temporary: false,
1474 unbounded: false,
1475 options: vec![],
1476 constraints: vec![],
1477 });
1478 expect_parse_ok(sql, expected)?;
1479
1480 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int, c1) LOCATION 'foo.csv'";
1482 expect_parse_error(
1483 sql,
1484 "SQL error: ParserError(\"Expected: a data type name, found: ) at Line: 1, Column: 73\")",
1485 );
1486
1487 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (c1, p1 int) LOCATION 'foo.csv'";
1489 expect_parse_error(
1490 sql,
1491 "SQL error: ParserError(\"Expected: ',' or ')' after partition definition, found: int at Line: 1, Column: 70\")",
1492 );
1493
1494 let sql =
1496 "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1') LOCATION 'blahblah'";
1497 let expected = Statement::CreateExternalTable(CreateExternalTable {
1498 name: name.clone(),
1499 columns: vec![],
1500 file_type: "X".to_string(),
1501 location: "blahblah".into(),
1502 table_partition_cols: vec![],
1503 order_exprs: vec![],
1504 if_not_exists: false,
1505 or_replace: false,
1506 temporary: false,
1507 unbounded: false,
1508 options: vec![("k1".into(), Value::SingleQuotedString("v1".into()))],
1509 constraints: vec![],
1510 });
1511 expect_parse_ok(sql, expected)?;
1512
1513 let sql = "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2) LOCATION 'blahblah'";
1515 let expected = Statement::CreateExternalTable(CreateExternalTable {
1516 name: name.clone(),
1517 columns: vec![],
1518 file_type: "X".to_string(),
1519 location: "blahblah".into(),
1520 table_partition_cols: vec![],
1521 order_exprs: vec![],
1522 if_not_exists: false,
1523 or_replace: false,
1524 temporary: false,
1525 unbounded: false,
1526 options: vec![
1527 ("k1".into(), Value::SingleQuotedString("v1".into())),
1528 ("k2".into(), Value::SingleQuotedString("v2".into())),
1529 ],
1530 constraints: vec![],
1531 });
1532 expect_parse_ok(sql, expected)?;
1533
1534 let sqls = [
1536 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1) LOCATION 'foo.csv'",
1537 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS FIRST) LOCATION 'foo.csv'",
1538 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS LAST) LOCATION 'foo.csv'",
1539 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC) LOCATION 'foo.csv'",
1540 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC) LOCATION 'foo.csv'",
1541 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS FIRST) LOCATION 'foo.csv'",
1542 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS LAST) LOCATION 'foo.csv'",
1543 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS FIRST) LOCATION 'foo.csv'",
1544 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS LAST) LOCATION 'foo.csv'",
1545 ];
1546 let expected = vec![
1547 (None, None),
1548 (None, Some(true)),
1549 (None, Some(false)),
1550 (Some(true), None),
1551 (Some(false), None),
1552 (Some(false), Some(true)),
1553 (Some(false), Some(false)),
1554 (Some(true), Some(true)),
1555 (Some(true), Some(false)),
1556 ];
1557 for (sql, (asc, nulls_first)) in sqls.iter().zip(expected.into_iter()) {
1558 let expected = Statement::CreateExternalTable(CreateExternalTable {
1559 name: name.clone(),
1560 columns: vec![make_column_def("c1", DataType::Int(None))],
1561 file_type: "CSV".to_string(),
1562 location: "foo.csv".into(),
1563 table_partition_cols: vec![],
1564 order_exprs: vec![vec![OrderByExpr {
1565 expr: Identifier(Ident {
1566 value: "c1".to_owned(),
1567 quote_style: None,
1568 span: Span::empty(),
1569 }),
1570 options: OrderByOptions { asc, nulls_first },
1571 with_fill: None,
1572 }]],
1573 if_not_exists: false,
1574 or_replace: false,
1575 temporary: false,
1576 unbounded: false,
1577 options: vec![],
1578 constraints: vec![],
1579 });
1580 expect_parse_ok(sql, expected)?;
1581 }
1582
1583 let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 ASC, c2 DESC NULLS FIRST) LOCATION 'foo.csv'";
1585 let display = None;
1586 let expected = Statement::CreateExternalTable(CreateExternalTable {
1587 name: name.clone(),
1588 columns: vec![
1589 make_column_def("c1", DataType::Int(display)),
1590 make_column_def("c2", DataType::Int(display)),
1591 ],
1592 file_type: "CSV".to_string(),
1593 location: "foo.csv".into(),
1594 table_partition_cols: vec![],
1595 order_exprs: vec![vec![
1596 OrderByExpr {
1597 expr: Identifier(Ident {
1598 value: "c1".to_owned(),
1599 quote_style: None,
1600 span: Span::empty(),
1601 }),
1602 options: OrderByOptions {
1603 asc: Some(true),
1604 nulls_first: None,
1605 },
1606 with_fill: None,
1607 },
1608 OrderByExpr {
1609 expr: Identifier(Ident {
1610 value: "c2".to_owned(),
1611 quote_style: None,
1612 span: Span::empty(),
1613 }),
1614 options: OrderByOptions {
1615 asc: Some(false),
1616 nulls_first: Some(true),
1617 },
1618 with_fill: None,
1619 },
1620 ]],
1621 if_not_exists: false,
1622 or_replace: false,
1623 temporary: false,
1624 unbounded: false,
1625 options: vec![],
1626 constraints: vec![],
1627 });
1628 expect_parse_ok(sql, expected)?;
1629
1630 let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 - c2 ASC) LOCATION 'foo.csv'";
1632 let display = None;
1633 let expected = Statement::CreateExternalTable(CreateExternalTable {
1634 name: name.clone(),
1635 columns: vec![
1636 make_column_def("c1", DataType::Int(display)),
1637 make_column_def("c2", DataType::Int(display)),
1638 ],
1639 file_type: "CSV".to_string(),
1640 location: "foo.csv".into(),
1641 table_partition_cols: vec![],
1642 order_exprs: vec![vec![OrderByExpr {
1643 expr: Expr::BinaryOp {
1644 left: Box::new(Identifier(Ident {
1645 value: "c1".to_owned(),
1646 quote_style: None,
1647 span: Span::empty(),
1648 })),
1649 op: BinaryOperator::Minus,
1650 right: Box::new(Identifier(Ident {
1651 value: "c2".to_owned(),
1652 quote_style: None,
1653 span: Span::empty(),
1654 })),
1655 },
1656 options: OrderByOptions {
1657 asc: Some(true),
1658 nulls_first: None,
1659 },
1660 with_fill: None,
1661 }]],
1662 if_not_exists: false,
1663 or_replace: false,
1664 temporary: false,
1665 unbounded: false,
1666 options: vec![],
1667 constraints: vec![],
1668 });
1669 expect_parse_ok(sql, expected)?;
1670
1671 let sql = "
1673 CREATE UNBOUNDED EXTERNAL TABLE IF NOT EXISTS t (c1 int, c2 float)
1674 STORED AS PARQUET
1675 WITH ORDER (c1 - c2 ASC)
1676 PARTITIONED BY (c1)
1677 LOCATION 'foo.parquet'
1678 OPTIONS ('format.compression' 'zstd',
1679 'format.delimiter' '*',
1680 'ROW_GROUP_SIZE' '1024',
1681 'TRUNCATE' 'NO',
1682 'format.has_header' 'true')";
1683 let expected = Statement::CreateExternalTable(CreateExternalTable {
1684 name: name.clone(),
1685 columns: vec![
1686 make_column_def("c1", DataType::Int(None)),
1687 make_column_def("c2", DataType::Float(ExactNumberInfo::None)),
1688 ],
1689 file_type: "PARQUET".to_string(),
1690 location: "foo.parquet".into(),
1691 table_partition_cols: vec!["c1".into()],
1692 order_exprs: vec![vec![OrderByExpr {
1693 expr: Expr::BinaryOp {
1694 left: Box::new(Identifier(Ident {
1695 value: "c1".to_owned(),
1696 quote_style: None,
1697 span: Span::empty(),
1698 })),
1699 op: BinaryOperator::Minus,
1700 right: Box::new(Identifier(Ident {
1701 value: "c2".to_owned(),
1702 quote_style: None,
1703 span: Span::empty(),
1704 })),
1705 },
1706 options: OrderByOptions {
1707 asc: Some(true),
1708 nulls_first: None,
1709 },
1710 with_fill: None,
1711 }]],
1712 if_not_exists: true,
1713 or_replace: false,
1714 temporary: false,
1715 unbounded: true,
1716 options: vec![
1717 (
1718 "format.compression".into(),
1719 Value::SingleQuotedString("zstd".into()),
1720 ),
1721 (
1722 "format.delimiter".into(),
1723 Value::SingleQuotedString("*".into()),
1724 ),
1725 (
1726 "ROW_GROUP_SIZE".into(),
1727 Value::SingleQuotedString("1024".into()),
1728 ),
1729 ("TRUNCATE".into(), Value::SingleQuotedString("NO".into())),
1730 (
1731 "format.has_header".into(),
1732 Value::SingleQuotedString("true".into()),
1733 ),
1734 ],
1735 constraints: vec![],
1736 });
1737 expect_parse_ok(sql, expected)?;
1738
1739 let sql = "
1741 CREATE OR REPLACE UNBOUNDED EXTERNAL TABLE t (c1 int, c2 float)
1742 STORED AS PARQUET
1743 WITH ORDER (c1 - c2 ASC)
1744 PARTITIONED BY (c1)
1745 LOCATION 'foo.parquet'
1746 OPTIONS ('format.compression' 'zstd',
1747 'format.delimiter' '*',
1748 'ROW_GROUP_SIZE' '1024',
1749 'TRUNCATE' 'NO',
1750 'format.has_header' 'true')";
1751 let expected = Statement::CreateExternalTable(CreateExternalTable {
1752 name: name.clone(),
1753 columns: vec![
1754 make_column_def("c1", DataType::Int(None)),
1755 make_column_def("c2", DataType::Float(ExactNumberInfo::None)),
1756 ],
1757 file_type: "PARQUET".to_string(),
1758 location: "foo.parquet".into(),
1759 table_partition_cols: vec!["c1".into()],
1760 order_exprs: vec![vec![OrderByExpr {
1761 expr: Expr::BinaryOp {
1762 left: Box::new(Identifier(Ident {
1763 value: "c1".to_owned(),
1764 quote_style: None,
1765 span: Span::empty(),
1766 })),
1767 op: BinaryOperator::Minus,
1768 right: Box::new(Identifier(Ident {
1769 value: "c2".to_owned(),
1770 quote_style: None,
1771 span: Span::empty(),
1772 })),
1773 },
1774 options: OrderByOptions {
1775 asc: Some(true),
1776 nulls_first: None,
1777 },
1778 with_fill: None,
1779 }]],
1780 if_not_exists: false,
1781 or_replace: true,
1782 temporary: false,
1783 unbounded: true,
1784 options: vec![
1785 (
1786 "format.compression".into(),
1787 Value::SingleQuotedString("zstd".into()),
1788 ),
1789 (
1790 "format.delimiter".into(),
1791 Value::SingleQuotedString("*".into()),
1792 ),
1793 (
1794 "ROW_GROUP_SIZE".into(),
1795 Value::SingleQuotedString("1024".into()),
1796 ),
1797 ("TRUNCATE".into(), Value::SingleQuotedString("NO".into())),
1798 (
1799 "format.has_header".into(),
1800 Value::SingleQuotedString("true".into()),
1801 ),
1802 ],
1803 constraints: vec![],
1804 });
1805 expect_parse_ok(sql, expected)?;
1806
1807 Ok(())
1810 }
1811
1812 #[test]
1813 fn copy_to_table_to_table() -> Result<(), DataFusionError> {
1814 let sql = "COPY foo TO bar STORED AS CSV";
1816 let expected = Statement::CopyTo(CopyToStatement {
1817 source: object_name("foo"),
1818 target: "bar".to_string(),
1819 partitioned_by: vec![],
1820 stored_as: Some("CSV".to_owned()),
1821 options: vec![],
1822 });
1823
1824 assert_eq!(verified_stmt(sql), expected);
1825 Ok(())
1826 }
1827
1828 #[test]
1829 fn skip_copy_into_snowflake() -> Result<(), DataFusionError> {
1830 let sql = "COPY INTO foo FROM @~/staged FILE_FORMAT = (FORMAT_NAME = 'mycsv');";
1831 let dialect = Box::new(SnowflakeDialect);
1832 let statements = DFParser::parse_sql_with_dialect(sql, dialect.as_ref())?;
1833
1834 assert_eq!(
1835 statements.len(),
1836 1,
1837 "Expected to parse exactly one statement"
1838 );
1839 if let Statement::CopyTo(_) = &statements[0] {
1840 panic!("Expected non COPY TO statement, but was successful: {statements:?}");
1841 }
1842 Ok(())
1843 }
1844
1845 #[test]
1846 fn explain_copy_to_table_to_table() -> Result<(), DataFusionError> {
1847 let cases = vec![
1848 ("EXPLAIN COPY foo TO bar STORED AS PARQUET", false, false),
1849 (
1850 "EXPLAIN ANALYZE COPY foo TO bar STORED AS PARQUET",
1851 true,
1852 false,
1853 ),
1854 (
1855 "EXPLAIN VERBOSE COPY foo TO bar STORED AS PARQUET",
1856 false,
1857 true,
1858 ),
1859 (
1860 "EXPLAIN ANALYZE VERBOSE COPY foo TO bar STORED AS PARQUET",
1861 true,
1862 true,
1863 ),
1864 ];
1865 for (sql, analyze, verbose) in cases {
1866 println!("sql: {sql}, analyze: {analyze}, verbose: {verbose}");
1867
1868 let expected_copy = Statement::CopyTo(CopyToStatement {
1869 source: object_name("foo"),
1870 target: "bar".to_string(),
1871 partitioned_by: vec![],
1872 stored_as: Some("PARQUET".to_owned()),
1873 options: vec![],
1874 });
1875 let expected = Statement::Explain(ExplainStatement {
1876 analyze,
1877 verbose,
1878 format: None,
1879 statement: Box::new(expected_copy),
1880 });
1881 assert_eq!(verified_stmt(sql), expected);
1882 }
1883 Ok(())
1884 }
1885
1886 #[test]
1887 fn copy_to_query_to_table() -> Result<(), DataFusionError> {
1888 let statement = verified_stmt("SELECT 1");
1889
1890 let statement = if let Statement::Statement(statement) = statement {
1892 *statement
1893 } else {
1894 panic!("Expected statement, got {statement:?}");
1895 };
1896
1897 let query = if let SQLStatement::Query(query) = statement {
1898 query
1899 } else {
1900 panic!("Expected query, got {statement:?}");
1901 };
1902
1903 let sql =
1904 "COPY (SELECT 1) TO bar STORED AS CSV OPTIONS ('format.has_header' 'true')";
1905 let expected = Statement::CopyTo(CopyToStatement {
1906 source: CopyToSource::Query(query),
1907 target: "bar".to_string(),
1908 partitioned_by: vec![],
1909 stored_as: Some("CSV".to_owned()),
1910 options: vec![(
1911 "format.has_header".into(),
1912 Value::SingleQuotedString("true".into()),
1913 )],
1914 });
1915 assert_eq!(verified_stmt(sql), expected);
1916 Ok(())
1917 }
1918
1919 #[test]
1920 fn copy_to_options() -> Result<(), DataFusionError> {
1921 let sql = "COPY foo TO bar STORED AS CSV OPTIONS ('row_group_size' '55')";
1922 let expected = Statement::CopyTo(CopyToStatement {
1923 source: object_name("foo"),
1924 target: "bar".to_string(),
1925 partitioned_by: vec![],
1926 stored_as: Some("CSV".to_owned()),
1927 options: vec![(
1928 "row_group_size".to_string(),
1929 Value::SingleQuotedString("55".to_string()),
1930 )],
1931 });
1932 assert_eq!(verified_stmt(sql), expected);
1933 Ok(())
1934 }
1935
1936 #[test]
1937 fn copy_to_partitioned_by() -> Result<(), DataFusionError> {
1938 let sql = "COPY foo TO bar STORED AS CSV PARTITIONED BY (a) OPTIONS ('row_group_size' '55')";
1939 let expected = Statement::CopyTo(CopyToStatement {
1940 source: object_name("foo"),
1941 target: "bar".to_string(),
1942 partitioned_by: vec!["a".to_string()],
1943 stored_as: Some("CSV".to_owned()),
1944 options: vec![(
1945 "row_group_size".to_string(),
1946 Value::SingleQuotedString("55".to_string()),
1947 )],
1948 });
1949 assert_eq!(verified_stmt(sql), expected);
1950 Ok(())
1951 }
1952
1953 #[test]
1954 fn copy_to_multi_options() -> Result<(), DataFusionError> {
1955 let sql = "COPY foo TO bar STORED AS parquet OPTIONS ('format.row_group_size' 55, 'format.compression' snappy, 'execution.keep_partition_by_columns' true)";
1957
1958 let expected_options = vec![
1959 (
1960 "format.row_group_size".to_string(),
1961 Value::Number("55".to_string(), false),
1962 ),
1963 (
1964 "format.compression".to_string(),
1965 Value::SingleQuotedString("snappy".to_string()),
1966 ),
1967 (
1968 "execution.keep_partition_by_columns".to_string(),
1969 Value::SingleQuotedString("true".to_string()),
1970 ),
1971 ];
1972
1973 let mut statements = DFParser::parse_sql(sql).unwrap();
1974 assert_eq!(statements.len(), 1);
1975 let only_statement = statements.pop_front().unwrap();
1976
1977 let options = if let Statement::CopyTo(copy_to) = only_statement {
1978 copy_to.options
1979 } else {
1980 panic!("Expected copy");
1981 };
1982
1983 assert_eq!(options, expected_options);
1984
1985 Ok(())
1986 }
1987
1988 fn object_name(name: &str) -> CopyToSource {
1991 CopyToSource::Relation(ObjectName::from(vec![Ident::new(name)]))
1992 }
1993
1994 fn one_statement_parses_to(sql: &str, canonical: &str) -> Statement {
2008 let mut statements = DFParser::parse_sql(sql).unwrap();
2009 assert_eq!(statements.len(), 1);
2010
2011 if sql != canonical {
2012 assert_eq!(DFParser::parse_sql(canonical).unwrap(), statements);
2013 }
2014
2015 let only_statement = statements.pop_front().unwrap();
2016 assert_eq!(
2017 canonical.to_uppercase(),
2018 only_statement.to_string().to_uppercase()
2019 );
2020 only_statement
2021 }
2022
2023 fn verified_stmt(sql: &str) -> Statement {
2027 one_statement_parses_to(sql, sql)
2028 }
2029
2030 #[test]
2031 fn test_recursion_limit() {
2034 let sql = "SELECT 1 OR 2";
2035
2036 DFParserBuilder::new(sql)
2038 .build()
2039 .unwrap()
2040 .parse_statements()
2041 .unwrap();
2042
2043 let err = DFParserBuilder::new(sql)
2044 .with_recursion_limit(1)
2045 .build()
2046 .unwrap()
2047 .parse_statements()
2048 .unwrap_err();
2049
2050 assert_contains!(
2051 err.to_string(),
2052 "SQL error: RecursionLimitExceeded (current limit: 1)"
2053 );
2054 }
2055
2056 #[test]
2057 fn test_multistatement() {
2058 let sql = "COPY foo TO bar STORED AS CSV; \
2059 CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'; \
2060 RESET var;";
2061 let statements = DFParser::parse_sql(sql).unwrap();
2062 assert_eq!(
2063 statements,
2064 vec![
2065 Statement::CopyTo(CopyToStatement {
2066 source: object_name("foo"),
2067 target: "bar".to_string(),
2068 partitioned_by: vec![],
2069 stored_as: Some("CSV".to_owned()),
2070 options: vec![],
2071 }),
2072 {
2073 let name = ObjectName::from(vec![Ident::from("t")]);
2074 let display = None;
2075 Statement::CreateExternalTable(CreateExternalTable {
2076 name: name.clone(),
2077 columns: vec![make_column_def("c1", DataType::Int(display))],
2078 file_type: "CSV".to_string(),
2079 location: "foo.csv".into(),
2080 table_partition_cols: vec![],
2081 order_exprs: vec![],
2082 if_not_exists: false,
2083 or_replace: false,
2084 temporary: false,
2085 unbounded: false,
2086 options: vec![],
2087 constraints: vec![],
2088 })
2089 },
2090 {
2091 let name = ObjectName::from(vec![Ident::from("var")]);
2092 Statement::Reset(ResetStatement::Variable(name))
2093 }
2094 ]
2095 );
2096 }
2097
2098 #[test]
2099 fn test_custom_tokens() {
2100 let span = Span {
2102 start: Location { line: 0, column: 0 },
2103 end: Location { line: 0, column: 0 },
2104 };
2105 let tokens = vec![
2106 TokenWithSpan {
2107 token: Token::make_keyword("SELECT"),
2108 span,
2109 },
2110 TokenWithSpan {
2111 token: Token::Whitespace(Whitespace::Space),
2112 span,
2113 },
2114 TokenWithSpan {
2115 token: Token::Placeholder("1".to_string()),
2116 span,
2117 },
2118 ];
2119
2120 let statements = DFParserBuilder::new(tokens)
2121 .build()
2122 .unwrap()
2123 .parse_statements()
2124 .unwrap();
2125 assert_eq!(statements.len(), 1);
2126 }
2127
2128 fn expect_parse_expr_ok(sql: &str, expected: ExprWithAlias) {
2129 let expr = DFParser::parse_sql_into_expr(sql).unwrap();
2130 assert_eq!(expr, expected, "actual:\n{expr:#?}");
2131 }
2132
2133 fn expect_parse_expr_error(sql: &str, expected_error: &str) {
2135 match DFParser::parse_sql_into_expr(sql) {
2136 Ok(expr) => {
2137 panic!("Expected parse error for '{sql}', but was successful: {expr:#?}");
2138 }
2139 Err(e) => {
2140 let error_message = e.to_string();
2141 assert!(
2142 error_message.contains(expected_error),
2143 "Expected error '{expected_error}' not found in actual error '{error_message}'"
2144 );
2145 }
2146 }
2147 }
2148
2149 #[test]
2150 fn literal() {
2151 expect_parse_expr_ok(
2152 "1234",
2153 ExprWithAlias {
2154 expr: Expr::Value(ValueWithSpan::from(Value::Number(
2155 "1234".to_string(),
2156 false,
2157 ))),
2158 alias: None,
2159 },
2160 )
2161 }
2162
2163 #[test]
2164 fn literal_with_alias() {
2165 expect_parse_expr_ok(
2166 "1234 as foo",
2167 ExprWithAlias {
2168 expr: Expr::Value(ValueWithSpan::from(Value::Number(
2169 "1234".to_string(),
2170 false,
2171 ))),
2172 alias: Some(Ident::from("foo")),
2173 },
2174 )
2175 }
2176
2177 #[test]
2178 fn literal_with_alias_and_trailing_tokens() {
2179 expect_parse_expr_error(
2180 "1234 as foo.bar",
2181 "Expected: end of expression, found: .",
2182 )
2183 }
2184
2185 #[test]
2186 fn literal_with_alias_and_trailing_whitespace() {
2187 expect_parse_expr_ok(
2188 "1234 as foo ",
2189 ExprWithAlias {
2190 expr: Expr::Value(ValueWithSpan::from(Value::Number(
2191 "1234".to_string(),
2192 false,
2193 ))),
2194 alias: Some(Ident::from("foo")),
2195 },
2196 )
2197 }
2198
2199 #[test]
2200 fn literal_with_alias_and_trailing_whitespace_and_token() {
2201 expect_parse_expr_error(
2202 "1234 as foo bar",
2203 "Expected: end of expression, found: bar",
2204 )
2205 }
2206}