1use datafusion_common::config::SqlParserOptions;
24use datafusion_common::DataFusionError;
25use datafusion_common::{sql_err, Diagnostic, Span};
26use sqlparser::ast::{ExprWithAlias, OrderByOptions};
27use sqlparser::tokenizer::TokenWithSpan;
28use sqlparser::{
29 ast::{
30 ColumnDef, ColumnOptionDef, ObjectName, OrderByExpr, Query,
31 Statement as SQLStatement, TableConstraint, Value,
32 },
33 dialect::{keywords::Keyword, Dialect, GenericDialect},
34 parser::{Parser, ParserError},
35 tokenizer::{Token, Tokenizer, Word},
36};
37use std::collections::VecDeque;
38use std::fmt;
39
40macro_rules! parser_err {
42 ($MSG:expr $(; diagnostic = $DIAG:expr)?) => {{
43
44 let err = DataFusionError::from(ParserError::ParserError($MSG.to_string()));
45 $(
46 let err = err.with_diagnostic($DIAG);
47 )?
48 Err(err)
49 }};
50}
51
52fn parse_file_type(s: &str) -> Result<String, DataFusionError> {
53 Ok(s.to_uppercase())
54}
55
56#[derive(Debug, Clone, PartialEq, Eq)]
63pub struct ExplainStatement {
64 pub analyze: bool,
66 pub verbose: bool,
68 pub format: Option<String>,
70 pub statement: Box<Statement>,
74}
75
76impl fmt::Display for ExplainStatement {
77 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
78 let Self {
79 analyze,
80 verbose,
81 format,
82 statement,
83 } = self;
84
85 write!(f, "EXPLAIN ")?;
86 if *analyze {
87 write!(f, "ANALYZE ")?;
88 }
89 if *verbose {
90 write!(f, "VERBOSE ")?;
91 }
92 if let Some(format) = format.as_ref() {
93 write!(f, "FORMAT {format} ")?;
94 }
95
96 write!(f, "{statement}")
97 }
98}
99
100#[derive(Debug, Clone, PartialEq, Eq)]
124pub struct CopyToStatement {
125 pub source: CopyToSource,
127 pub target: String,
129 pub partitioned_by: Vec<String>,
131 pub stored_as: Option<String>,
133 pub options: Vec<(String, Value)>,
135}
136
137impl fmt::Display for CopyToStatement {
138 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
139 let Self {
140 source,
141 target,
142 partitioned_by,
143 stored_as,
144 options,
145 ..
146 } = self;
147
148 write!(f, "COPY {source} TO {target}")?;
149 if let Some(file_type) = stored_as {
150 write!(f, " STORED AS {file_type}")?;
151 }
152 if !partitioned_by.is_empty() {
153 write!(f, " PARTITIONED BY ({})", partitioned_by.join(", "))?;
154 }
155
156 if !options.is_empty() {
157 let opts: Vec<_> =
158 options.iter().map(|(k, v)| format!("'{k}' {v}")).collect();
159 write!(f, " OPTIONS ({})", opts.join(", "))?;
160 }
161
162 Ok(())
163 }
164}
165
166#[derive(Debug, Clone, PartialEq, Eq)]
167pub enum CopyToSource {
168 Relation(ObjectName),
170 Query(Box<Query>),
172}
173
174impl fmt::Display for CopyToSource {
175 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
176 match self {
177 CopyToSource::Relation(r) => write!(f, "{r}"),
178 CopyToSource::Query(q) => write!(f, "({q})"),
179 }
180 }
181}
182
183pub(crate) type LexOrdering = Vec<OrderByExpr>;
185
186#[derive(Debug, Clone, PartialEq, Eq)]
211pub struct CreateExternalTable {
212 pub name: ObjectName,
214 pub columns: Vec<ColumnDef>,
216 pub file_type: String,
218 pub location: String,
220 pub table_partition_cols: Vec<String>,
222 pub order_exprs: Vec<LexOrdering>,
224 pub if_not_exists: bool,
226 pub or_replace: bool,
228 pub temporary: bool,
230 pub unbounded: bool,
232 pub options: Vec<(String, Value)>,
234 pub constraints: Vec<TableConstraint>,
236}
237
238impl fmt::Display for CreateExternalTable {
239 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
240 write!(f, "CREATE EXTERNAL TABLE ")?;
241 if self.if_not_exists {
242 write!(f, "IF NOT EXISTS ")?;
243 }
244 write!(f, "{} ", self.name)?;
245 write!(f, "STORED AS {} ", self.file_type)?;
246 if !self.order_exprs.is_empty() {
247 write!(f, "WITH ORDER (")?;
248 let mut first = true;
249 for expr in self.order_exprs.iter().flatten() {
250 if !first {
251 write!(f, ", ")?;
252 }
253 write!(f, "{expr}")?;
254 first = false;
255 }
256 write!(f, ") ")?;
257 }
258 write!(f, "LOCATION {}", self.location)
259 }
260}
261
262#[derive(Debug, Clone, PartialEq, Eq)]
270pub enum Statement {
271 Statement(Box<SQLStatement>),
273 CreateExternalTable(CreateExternalTable),
275 CopyTo(CopyToStatement),
277 Explain(ExplainStatement),
279}
280
281impl fmt::Display for Statement {
282 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
283 match self {
284 Statement::Statement(stmt) => write!(f, "{stmt}"),
285 Statement::CreateExternalTable(stmt) => write!(f, "{stmt}"),
286 Statement::CopyTo(stmt) => write!(f, "{stmt}"),
287 Statement::Explain(stmt) => write!(f, "{stmt}"),
288 }
289 }
290}
291
292fn ensure_not_set<T>(field: &Option<T>, name: &str) -> Result<(), DataFusionError> {
293 if field.is_some() {
294 parser_err!(format!("{name} specified more than once",))?
295 }
296 Ok(())
297}
298
299pub struct DFParser<'a> {
308 pub parser: Parser<'a>,
309 options: SqlParserOptions,
310}
311
312const DEFAULT_RECURSION_LIMIT: usize = 50;
314const DEFAULT_DIALECT: GenericDialect = GenericDialect {};
315
316pub struct DFParserBuilder<'a> {
349 sql: &'a str,
351 dialect: &'a dyn Dialect,
353 recursion_limit: usize,
355}
356
357impl<'a> DFParserBuilder<'a> {
358 pub fn new(sql: &'a str) -> Self {
361 Self {
362 sql,
363 dialect: &DEFAULT_DIALECT,
364 recursion_limit: DEFAULT_RECURSION_LIMIT,
365 }
366 }
367
368 pub fn with_dialect(mut self, dialect: &'a dyn Dialect) -> Self {
370 self.dialect = dialect;
371 self
372 }
373
374 pub fn with_recursion_limit(mut self, recursion_limit: usize) -> Self {
376 self.recursion_limit = recursion_limit;
377 self
378 }
379
380 pub fn build(self) -> Result<DFParser<'a>, DataFusionError> {
381 let mut tokenizer = Tokenizer::new(self.dialect, self.sql);
382 let tokens = tokenizer
384 .tokenize_with_location()
385 .map_err(ParserError::from)?;
386
387 Ok(DFParser {
388 parser: Parser::new(self.dialect)
389 .with_tokens_with_locations(tokens)
390 .with_recursion_limit(self.recursion_limit),
391 options: SqlParserOptions {
392 recursion_limit: self.recursion_limit,
393 ..Default::default()
394 },
395 })
396 }
397}
398
399impl<'a> DFParser<'a> {
400 #[deprecated(since = "46.0.0", note = "DFParserBuilder")]
401 pub fn new(sql: &'a str) -> Result<Self, DataFusionError> {
402 DFParserBuilder::new(sql).build()
403 }
404
405 #[deprecated(since = "46.0.0", note = "DFParserBuilder")]
406 pub fn new_with_dialect(
407 sql: &'a str,
408 dialect: &'a dyn Dialect,
409 ) -> Result<Self, DataFusionError> {
410 DFParserBuilder::new(sql).with_dialect(dialect).build()
411 }
412
413 pub fn parse_sql(sql: &'a str) -> Result<VecDeque<Statement>, DataFusionError> {
416 let mut parser = DFParserBuilder::new(sql).build()?;
417
418 parser.parse_statements()
419 }
420
421 pub fn parse_sql_with_dialect(
424 sql: &str,
425 dialect: &dyn Dialect,
426 ) -> Result<VecDeque<Statement>, DataFusionError> {
427 let mut parser = DFParserBuilder::new(sql).with_dialect(dialect).build()?;
428 parser.parse_statements()
429 }
430
431 pub fn parse_sql_into_expr(sql: &str) -> Result<ExprWithAlias, DataFusionError> {
432 DFParserBuilder::new(sql).build()?.parse_into_expr()
433 }
434
435 pub fn parse_sql_into_expr_with_dialect(
436 sql: &str,
437 dialect: &dyn Dialect,
438 ) -> Result<ExprWithAlias, DataFusionError> {
439 DFParserBuilder::new(sql)
440 .with_dialect(dialect)
441 .build()?
442 .parse_into_expr()
443 }
444
445 pub fn parse_statements(&mut self) -> Result<VecDeque<Statement>, DataFusionError> {
447 let mut stmts = VecDeque::new();
448 let mut expecting_statement_delimiter = false;
449 loop {
450 while self.parser.consume_token(&Token::SemiColon) {
452 expecting_statement_delimiter = false;
453 }
454
455 if self.parser.peek_token() == Token::EOF {
456 break;
457 }
458 if expecting_statement_delimiter {
459 return self.expected("end of statement", self.parser.peek_token());
460 }
461
462 let statement = self.parse_statement()?;
463 stmts.push_back(statement);
464 expecting_statement_delimiter = true;
465 }
466 Ok(stmts)
467 }
468
469 fn expected<T>(
471 &self,
472 expected: &str,
473 found: TokenWithSpan,
474 ) -> Result<T, DataFusionError> {
475 let sql_parser_span = found.span;
476 let span = Span::try_from_sqlparser_span(sql_parser_span);
477 let diagnostic = Diagnostic::new_error(
478 format!("Expected: {expected}, found: {found}{}", found.span.start),
479 span,
480 );
481 parser_err!(
482 format!("Expected: {expected}, found: {found}{}", found.span.start);
483 diagnostic=
484 diagnostic
485 )
486 }
487
488 fn expect_token(
489 &mut self,
490 expected: &str,
491 token: Token,
492 ) -> Result<(), DataFusionError> {
493 let next_token = self.parser.peek_token_ref();
494 if next_token.token != token {
495 self.expected(expected, next_token.clone())
496 } else {
497 Ok(())
498 }
499 }
500
501 pub fn parse_statement(&mut self) -> Result<Statement, DataFusionError> {
503 match self.parser.peek_token().token {
504 Token::Word(w) => {
505 match w.keyword {
506 Keyword::CREATE => {
507 self.parser.next_token(); self.parse_create()
509 }
510 Keyword::COPY => {
511 if let Token::Word(w) = self.parser.peek_nth_token(1).token {
512 if w.keyword == Keyword::INTO {
514 return self.parse_and_handle_statement();
515 }
516 }
517 self.parser.next_token(); self.parse_copy()
519 }
520 Keyword::EXPLAIN => {
521 self.parser.next_token(); self.parse_explain()
523 }
524 _ => {
525 self.parse_and_handle_statement()
527 }
528 }
529 }
530 _ => {
531 self.parse_and_handle_statement()
533 }
534 }
535 }
536
537 pub fn parse_expr(&mut self) -> Result<ExprWithAlias, DataFusionError> {
538 if let Token::Word(w) = self.parser.peek_token().token {
539 match w.keyword {
540 Keyword::CREATE | Keyword::COPY | Keyword::EXPLAIN => {
541 return parser_err!("Unsupported command in expression")?;
542 }
543 _ => {}
544 }
545 }
546
547 Ok(self.parser.parse_expr_with_alias()?)
548 }
549
550 pub fn parse_into_expr(&mut self) -> Result<ExprWithAlias, DataFusionError> {
555 let expr = self.parse_expr()?;
556 self.expect_token("end of expression", Token::EOF)?;
557 Ok(expr)
558 }
559
560 fn parse_and_handle_statement(&mut self) -> Result<Statement, DataFusionError> {
562 self.parser
563 .parse_statement()
564 .map(|stmt| Statement::Statement(Box::from(stmt)))
565 .map_err(|e| match e {
566 ParserError::RecursionLimitExceeded => DataFusionError::SQL(
567 Box::new(ParserError::RecursionLimitExceeded),
568 Some(format!(
569 " (current limit: {})",
570 self.options.recursion_limit
571 )),
572 ),
573 other => DataFusionError::SQL(Box::new(other), None),
574 })
575 }
576
577 pub fn parse_copy(&mut self) -> Result<Statement, DataFusionError> {
579 let source = if self.parser.consume_token(&Token::LParen) {
581 let query = self.parser.parse_query()?;
582 self.parser.expect_token(&Token::RParen)?;
583 CopyToSource::Query(query)
584 } else {
585 let table_name = self.parser.parse_object_name(true)?;
587 CopyToSource::Relation(table_name)
588 };
589
590 #[derive(Default)]
591 struct Builder {
592 stored_as: Option<String>,
593 target: Option<String>,
594 partitioned_by: Option<Vec<String>>,
595 options: Option<Vec<(String, Value)>>,
596 }
597
598 let mut builder = Builder::default();
599
600 loop {
601 if let Some(keyword) = self.parser.parse_one_of_keywords(&[
602 Keyword::STORED,
603 Keyword::TO,
604 Keyword::PARTITIONED,
605 Keyword::OPTIONS,
606 Keyword::WITH,
607 ]) {
608 match keyword {
609 Keyword::STORED => {
610 self.parser.expect_keyword(Keyword::AS)?;
611 ensure_not_set(&builder.stored_as, "STORED AS")?;
612 builder.stored_as = Some(self.parse_file_format()?);
613 }
614 Keyword::TO => {
615 ensure_not_set(&builder.target, "TO")?;
616 builder.target = Some(self.parser.parse_literal_string()?);
617 }
618 Keyword::WITH => {
619 self.parser.expect_keyword(Keyword::HEADER)?;
620 self.parser.expect_keyword(Keyword::ROW)?;
621 return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')")?;
622 }
623 Keyword::PARTITIONED => {
624 self.parser.expect_keyword(Keyword::BY)?;
625 ensure_not_set(&builder.partitioned_by, "PARTITIONED BY")?;
626 builder.partitioned_by = Some(self.parse_partitions()?);
627 }
628 Keyword::OPTIONS => {
629 ensure_not_set(&builder.options, "OPTIONS")?;
630 builder.options = Some(self.parse_value_options()?);
631 }
632 _ => {
633 unreachable!()
634 }
635 }
636 } else {
637 let token = self.parser.next_token();
638 if token == Token::EOF || token == Token::SemiColon {
639 break;
640 } else {
641 return self.expected("end of statement or ;", token)?;
642 }
643 }
644 }
645
646 let Some(target) = builder.target else {
647 return parser_err!("Missing TO clause in COPY statement")?;
648 };
649
650 Ok(Statement::CopyTo(CopyToStatement {
651 source,
652 target,
653 partitioned_by: builder.partitioned_by.unwrap_or(vec![]),
654 stored_as: builder.stored_as,
655 options: builder.options.unwrap_or(vec![]),
656 }))
657 }
658
659 pub fn parse_option_key(&mut self) -> Result<String, DataFusionError> {
666 let next_token = self.parser.next_token();
667 match next_token.token {
668 Token::Word(Word { value, .. }) => {
669 let mut parts = vec![value];
670 while self.parser.consume_token(&Token::Period) {
671 let next_token = self.parser.next_token();
672 if let Token::Word(Word { value, .. }) = next_token.token {
673 parts.push(value);
674 } else {
675 return self.expected("key name", next_token);
679 }
680 }
681 Ok(parts.join("."))
682 }
683 Token::SingleQuotedString(s) => Ok(s),
684 Token::DoubleQuotedString(s) => Ok(s),
685 Token::EscapedStringLiteral(s) => Ok(s),
686 _ => self.expected("key name", next_token),
687 }
688 }
689
690 pub fn parse_option_value(&mut self) -> Result<Value, DataFusionError> {
697 let next_token = self.parser.next_token();
698 match next_token.token {
699 Token::Word(word) => Ok(Value::SingleQuotedString(word.value)),
701 Token::SingleQuotedString(s) => Ok(Value::SingleQuotedString(s)),
702 Token::DoubleQuotedString(s) => Ok(Value::DoubleQuotedString(s)),
703 Token::EscapedStringLiteral(s) => Ok(Value::EscapedStringLiteral(s)),
704 Token::Number(n, l) => Ok(Value::Number(n, l)),
705 _ => self.expected("string or numeric value", next_token),
706 }
707 }
708
709 pub fn parse_explain(&mut self) -> Result<Statement, DataFusionError> {
711 let analyze = self.parser.parse_keyword(Keyword::ANALYZE);
712 let verbose = self.parser.parse_keyword(Keyword::VERBOSE);
713 let format = self.parse_explain_format()?;
714
715 let statement = self.parse_statement()?;
716
717 Ok(Statement::Explain(ExplainStatement {
718 statement: Box::new(statement),
719 analyze,
720 verbose,
721 format,
722 }))
723 }
724
725 pub fn parse_explain_format(&mut self) -> Result<Option<String>, DataFusionError> {
726 if !self.parser.parse_keyword(Keyword::FORMAT) {
727 return Ok(None);
728 }
729
730 let next_token = self.parser.next_token();
731 let format = match next_token.token {
732 Token::Word(w) => Ok(w.value),
733 Token::SingleQuotedString(w) => Ok(w),
734 Token::DoubleQuotedString(w) => Ok(w),
735 _ => self.expected("an explain format such as TREE", next_token),
736 }?;
737 Ok(Some(format))
738 }
739
740 pub fn parse_create(&mut self) -> Result<Statement, DataFusionError> {
742 if self
744 .parser
745 .parse_keywords(&[Keyword::OR, Keyword::REPLACE, Keyword::EXTERNAL])
746 {
747 self.parse_create_external_table(false, true)
748 } else if self.parser.parse_keywords(&[
749 Keyword::OR,
750 Keyword::REPLACE,
751 Keyword::UNBOUNDED,
752 Keyword::EXTERNAL,
753 ]) {
754 self.parse_create_external_table(true, true)
755 } else if self.parser.parse_keyword(Keyword::EXTERNAL) {
756 self.parse_create_external_table(false, false)
757 } else if self
758 .parser
759 .parse_keywords(&[Keyword::UNBOUNDED, Keyword::EXTERNAL])
760 {
761 self.parse_create_external_table(true, false)
762 } else {
763 Ok(Statement::Statement(Box::from(self.parser.parse_create()?)))
764 }
765 }
766
767 fn parse_partitions(&mut self) -> Result<Vec<String>, DataFusionError> {
768 let mut partitions: Vec<String> = vec![];
769 if !self.parser.consume_token(&Token::LParen)
770 || self.parser.consume_token(&Token::RParen)
771 {
772 return Ok(partitions);
773 }
774
775 loop {
776 if let Token::Word(_) = self.parser.peek_token().token {
777 let identifier = self.parser.parse_identifier()?;
778 partitions.push(identifier.to_string());
779 } else {
780 return self.expected("partition name", self.parser.peek_token());
781 }
782 let comma = self.parser.consume_token(&Token::Comma);
783 if self.parser.consume_token(&Token::RParen) {
784 break;
786 } else if !comma {
787 return self.expected(
788 "',' or ')' after partition definition",
789 self.parser.peek_token(),
790 );
791 }
792 }
793 Ok(partitions)
794 }
795
796 pub fn parse_order_by_exprs(&mut self) -> Result<Vec<OrderByExpr>, DataFusionError> {
798 let mut values = vec![];
799 self.parser.expect_token(&Token::LParen)?;
800 loop {
801 values.push(self.parse_order_by_expr()?);
802 if !self.parser.consume_token(&Token::Comma) {
803 self.parser.expect_token(&Token::RParen)?;
804 return Ok(values);
805 }
806 }
807 }
808
809 pub fn parse_order_by_expr(&mut self) -> Result<OrderByExpr, DataFusionError> {
811 let expr = self.parser.parse_expr()?;
812
813 let asc = if self.parser.parse_keyword(Keyword::ASC) {
814 Some(true)
815 } else if self.parser.parse_keyword(Keyword::DESC) {
816 Some(false)
817 } else {
818 None
819 };
820
821 let nulls_first = if self
822 .parser
823 .parse_keywords(&[Keyword::NULLS, Keyword::FIRST])
824 {
825 Some(true)
826 } else if self.parser.parse_keywords(&[Keyword::NULLS, Keyword::LAST]) {
827 Some(false)
828 } else {
829 None
830 };
831
832 Ok(OrderByExpr {
833 expr,
834 options: OrderByOptions { asc, nulls_first },
835 with_fill: None,
836 })
837 }
838
839 fn parse_columns(
841 &mut self,
842 ) -> Result<(Vec<ColumnDef>, Vec<TableConstraint>), DataFusionError> {
843 let mut columns = vec![];
844 let mut constraints = vec![];
845 if !self.parser.consume_token(&Token::LParen)
846 || self.parser.consume_token(&Token::RParen)
847 {
848 return Ok((columns, constraints));
849 }
850
851 loop {
852 if let Some(constraint) = self.parser.parse_optional_table_constraint()? {
853 constraints.push(constraint);
854 } else if let Token::Word(_) = self.parser.peek_token().token {
855 let column_def = self.parse_column_def()?;
856 columns.push(column_def);
857 } else {
858 return self.expected(
859 "column name or constraint definition",
860 self.parser.peek_token(),
861 );
862 }
863 let comma = self.parser.consume_token(&Token::Comma);
864 if self.parser.consume_token(&Token::RParen) {
865 break;
867 } else if !comma {
868 return self.expected(
869 "',' or ')' after column definition",
870 self.parser.peek_token(),
871 );
872 }
873 }
874
875 Ok((columns, constraints))
876 }
877
878 fn parse_column_def(&mut self) -> Result<ColumnDef, DataFusionError> {
879 let name = self.parser.parse_identifier()?;
880 let data_type = self.parser.parse_data_type()?;
881 let mut options = vec![];
882 loop {
883 if self.parser.parse_keyword(Keyword::CONSTRAINT) {
884 let name = Some(self.parser.parse_identifier()?);
885 if let Some(option) = self.parser.parse_optional_column_option()? {
886 options.push(ColumnOptionDef { name, option });
887 } else {
888 return self.expected(
889 "constraint details after CONSTRAINT <name>",
890 self.parser.peek_token(),
891 );
892 }
893 } else if let Some(option) = self.parser.parse_optional_column_option()? {
894 options.push(ColumnOptionDef { name: None, option });
895 } else {
896 break;
897 };
898 }
899 Ok(ColumnDef {
900 name,
901 data_type,
902 options,
903 })
904 }
905
906 fn parse_create_external_table(
907 &mut self,
908 unbounded: bool,
909 or_replace: bool,
910 ) -> Result<Statement, DataFusionError> {
911 let temporary = self
912 .parser
913 .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY])
914 .is_some();
915
916 self.parser.expect_keyword(Keyword::TABLE)?;
917 let if_not_exists =
918 self.parser
919 .parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]);
920
921 if if_not_exists && or_replace {
922 return parser_err!("'IF NOT EXISTS' cannot coexist with 'REPLACE'");
923 }
924
925 let table_name = self.parser.parse_object_name(true)?;
926 let (mut columns, constraints) = self.parse_columns()?;
927
928 #[derive(Default)]
929 struct Builder {
930 file_type: Option<String>,
931 location: Option<String>,
932 table_partition_cols: Option<Vec<String>>,
933 order_exprs: Vec<LexOrdering>,
934 options: Option<Vec<(String, Value)>>,
935 }
936 let mut builder = Builder::default();
937
938 loop {
939 if let Some(keyword) = self.parser.parse_one_of_keywords(&[
940 Keyword::STORED,
941 Keyword::LOCATION,
942 Keyword::WITH,
943 Keyword::DELIMITER,
944 Keyword::COMPRESSION,
945 Keyword::PARTITIONED,
946 Keyword::OPTIONS,
947 ]) {
948 match keyword {
949 Keyword::STORED => {
950 self.parser.expect_keyword(Keyword::AS)?;
951 ensure_not_set(&builder.file_type, "STORED AS")?;
952 builder.file_type = Some(self.parse_file_format()?);
953 }
954 Keyword::LOCATION => {
955 ensure_not_set(&builder.location, "LOCATION")?;
956 builder.location = Some(self.parser.parse_literal_string()?);
957 }
958 Keyword::WITH => {
959 if self.parser.parse_keyword(Keyword::ORDER) {
960 builder.order_exprs.push(self.parse_order_by_exprs()?);
961 } else {
962 self.parser.expect_keyword(Keyword::HEADER)?;
963 self.parser.expect_keyword(Keyword::ROW)?;
964 return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS (format.has_header true)")?;
965 }
966 }
967 Keyword::DELIMITER => {
968 return parser_err!("DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS (format.delimiter ',')")?;
969 }
970 Keyword::COMPRESSION => {
971 self.parser.expect_keyword(Keyword::TYPE)?;
972 return parser_err!("COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS (format.compression gzip)")?;
973 }
974 Keyword::PARTITIONED => {
975 self.parser.expect_keyword(Keyword::BY)?;
976 ensure_not_set(&builder.table_partition_cols, "PARTITIONED BY")?;
977 let peeked = self.parser.peek_nth_token(2);
982 if peeked == Token::Comma || peeked == Token::RParen {
983 builder.table_partition_cols = Some(self.parse_partitions()?)
985 } else {
986 let (cols, cons) = self.parse_columns()?;
988 builder.table_partition_cols = Some(
989 cols.iter().map(|col| col.name.to_string()).collect(),
990 );
991
992 columns.extend(cols);
993
994 if !cons.is_empty() {
995 return sql_err!(ParserError::ParserError(
996 "Constraints on Partition Columns are not supported"
997 .to_string(),
998 ));
999 }
1000 }
1001 }
1002 Keyword::OPTIONS => {
1003 ensure_not_set(&builder.options, "OPTIONS")?;
1004 builder.options = Some(self.parse_value_options()?);
1005 }
1006 _ => {
1007 unreachable!()
1008 }
1009 }
1010 } else {
1011 let token = self.parser.next_token();
1012 if token == Token::EOF || token == Token::SemiColon {
1013 break;
1014 } else {
1015 return self.expected("end of statement or ;", token)?;
1016 }
1017 }
1018 }
1019
1020 if builder.file_type.is_none() {
1022 return sql_err!(ParserError::ParserError(
1023 "Missing STORED AS clause in CREATE EXTERNAL TABLE statement".into(),
1024 ));
1025 }
1026 if builder.location.is_none() {
1027 return sql_err!(ParserError::ParserError(
1028 "Missing LOCATION clause in CREATE EXTERNAL TABLE statement".into(),
1029 ));
1030 }
1031
1032 let create = CreateExternalTable {
1033 name: table_name,
1034 columns,
1035 file_type: builder.file_type.unwrap(),
1036 location: builder.location.unwrap(),
1037 table_partition_cols: builder.table_partition_cols.unwrap_or(vec![]),
1038 order_exprs: builder.order_exprs,
1039 if_not_exists,
1040 or_replace,
1041 temporary,
1042 unbounded,
1043 options: builder.options.unwrap_or(Vec::new()),
1044 constraints,
1045 };
1046 Ok(Statement::CreateExternalTable(create))
1047 }
1048
1049 fn parse_file_format(&mut self) -> Result<String, DataFusionError> {
1051 let token = self.parser.next_token();
1052 match &token.token {
1053 Token::Word(w) => parse_file_type(&w.value),
1054 _ => self.expected("one of ARROW, PARQUET, NDJSON, or CSV", token),
1055 }
1056 }
1057
1058 fn parse_value_options(&mut self) -> Result<Vec<(String, Value)>, DataFusionError> {
1063 let mut options = vec![];
1064 self.parser.expect_token(&Token::LParen)?;
1065
1066 loop {
1067 let key = self.parse_option_key()?;
1068 let value = self.parse_option_value()?;
1069 options.push((key, value));
1070 let comma = self.parser.consume_token(&Token::Comma);
1071 if self.parser.consume_token(&Token::RParen) {
1072 break;
1074 } else if !comma {
1075 return self.expected(
1076 "',' or ')' after option definition",
1077 self.parser.peek_token(),
1078 );
1079 }
1080 }
1081 Ok(options)
1082 }
1083}
1084
1085#[cfg(test)]
1086mod tests {
1087 use super::*;
1088 use datafusion_common::assert_contains;
1089 use sqlparser::ast::Expr::Identifier;
1090 use sqlparser::ast::{
1091 BinaryOperator, DataType, ExactNumberInfo, Expr, Ident, ValueWithSpan,
1092 };
1093 use sqlparser::dialect::SnowflakeDialect;
1094 use sqlparser::tokenizer::Span;
1095
1096 fn expect_parse_ok(sql: &str, expected: Statement) -> Result<(), DataFusionError> {
1097 let statements = DFParser::parse_sql(sql)?;
1098 assert_eq!(
1099 statements.len(),
1100 1,
1101 "Expected to parse exactly one statement"
1102 );
1103 assert_eq!(statements[0], expected, "actual:\n{:#?}", statements[0]);
1104 Ok(())
1105 }
1106
1107 fn expect_parse_error(sql: &str, expected_error: &str) {
1109 match DFParser::parse_sql(sql) {
1110 Ok(statements) => {
1111 panic!(
1112 "Expected parse error for '{sql}', but was successful: {statements:?}"
1113 );
1114 }
1115 Err(e) => {
1116 let error_message = e.to_string();
1117 assert!(
1118 error_message.contains(expected_error),
1119 "Expected error '{expected_error}' not found in actual error '{error_message}'"
1120 );
1121 }
1122 }
1123 }
1124
1125 fn make_column_def(name: impl Into<String>, data_type: DataType) -> ColumnDef {
1126 ColumnDef {
1127 name: Ident {
1128 value: name.into(),
1129 quote_style: None,
1130 span: Span::empty(),
1131 },
1132 data_type,
1133 options: vec![],
1134 }
1135 }
1136
1137 #[test]
1138 fn create_external_table() -> Result<(), DataFusionError> {
1139 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'";
1141 let display = None;
1142 let name = ObjectName::from(vec![Ident::from("t")]);
1143 let expected = Statement::CreateExternalTable(CreateExternalTable {
1144 name: name.clone(),
1145 columns: vec![make_column_def("c1", DataType::Int(display))],
1146 file_type: "CSV".to_string(),
1147 location: "foo.csv".into(),
1148 table_partition_cols: vec![],
1149 order_exprs: vec![],
1150 if_not_exists: false,
1151 or_replace: false,
1152 temporary: false,
1153 unbounded: false,
1154 options: vec![],
1155 constraints: vec![],
1156 });
1157 expect_parse_ok(sql, expected)?;
1158
1159 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' ";
1161 let expected = Statement::CreateExternalTable(CreateExternalTable {
1162 name: name.clone(),
1163 columns: vec![make_column_def("c1", DataType::Int(None))],
1164 file_type: "CSV".to_string(),
1165 location: "foo.csv".into(),
1166 table_partition_cols: vec![],
1167 order_exprs: vec![],
1168 if_not_exists: false,
1169 or_replace: false,
1170 temporary: false,
1171 unbounded: false,
1172 options: vec![],
1173 constraints: vec![],
1174 });
1175 expect_parse_ok(sql, expected)?;
1176
1177 let sql =
1179 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' ;";
1180 let expected = Statement::CreateExternalTable(CreateExternalTable {
1181 name: name.clone(),
1182 columns: vec![make_column_def("c1", DataType::Int(None))],
1183 file_type: "CSV".to_string(),
1184 location: "foo.csv".into(),
1185 table_partition_cols: vec![],
1186 order_exprs: vec![],
1187 if_not_exists: false,
1188 or_replace: false,
1189 temporary: false,
1190 unbounded: false,
1191 options: vec![],
1192 constraints: vec![],
1193 });
1194 expect_parse_ok(sql, expected)?;
1195
1196 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS (format.delimiter '|')";
1198 let display = None;
1199 let expected = Statement::CreateExternalTable(CreateExternalTable {
1200 name: name.clone(),
1201 columns: vec![make_column_def("c1", DataType::Int(display))],
1202 file_type: "CSV".to_string(),
1203 location: "foo.csv".into(),
1204 table_partition_cols: vec![],
1205 order_exprs: vec![],
1206 if_not_exists: false,
1207 or_replace: false,
1208 temporary: false,
1209 unbounded: false,
1210 options: vec![(
1211 "format.delimiter".into(),
1212 Value::SingleQuotedString("|".into()),
1213 )],
1214 constraints: vec![],
1215 });
1216 expect_parse_ok(sql, expected)?;
1217
1218 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1, p2) LOCATION 'foo.csv'";
1220 let display = None;
1221 let expected = Statement::CreateExternalTable(CreateExternalTable {
1222 name: name.clone(),
1223 columns: vec![make_column_def("c1", DataType::Int(display))],
1224 file_type: "CSV".to_string(),
1225 location: "foo.csv".into(),
1226 table_partition_cols: vec!["p1".to_string(), "p2".to_string()],
1227 order_exprs: vec![],
1228 if_not_exists: false,
1229 or_replace: false,
1230 temporary: false,
1231 unbounded: false,
1232 options: vec![],
1233 constraints: vec![],
1234 });
1235 expect_parse_ok(sql, expected)?;
1236
1237 let sqls =
1239 vec![
1240 ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1241 ('format.compression' 'GZIP')", "GZIP"),
1242 ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1243 ('format.compression' 'BZIP2')", "BZIP2"),
1244 ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1245 ('format.compression' 'XZ')", "XZ"),
1246 ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1247 ('format.compression' 'ZSTD')", "ZSTD"),
1248 ];
1249 for (sql, compression) in sqls {
1250 let expected = Statement::CreateExternalTable(CreateExternalTable {
1251 name: name.clone(),
1252 columns: vec![make_column_def("c1", DataType::Int(display))],
1253 file_type: "CSV".to_string(),
1254 location: "foo.csv".into(),
1255 table_partition_cols: vec![],
1256 order_exprs: vec![],
1257 if_not_exists: false,
1258 or_replace: false,
1259 temporary: false,
1260 unbounded: false,
1261 options: vec![(
1262 "format.compression".into(),
1263 Value::SingleQuotedString(compression.into()),
1264 )],
1265 constraints: vec![],
1266 });
1267 expect_parse_ok(sql, expected)?;
1268 }
1269
1270 let sql = "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'";
1272 let expected = Statement::CreateExternalTable(CreateExternalTable {
1273 name: name.clone(),
1274 columns: vec![],
1275 file_type: "PARQUET".to_string(),
1276 location: "foo.parquet".into(),
1277 table_partition_cols: vec![],
1278 order_exprs: vec![],
1279 if_not_exists: false,
1280 or_replace: false,
1281 temporary: false,
1282 unbounded: false,
1283 options: vec![],
1284 constraints: vec![],
1285 });
1286 expect_parse_ok(sql, expected)?;
1287
1288 let sql = "CREATE EXTERNAL TABLE t STORED AS parqueT LOCATION 'foo.parquet'";
1290 let expected = Statement::CreateExternalTable(CreateExternalTable {
1291 name: name.clone(),
1292 columns: vec![],
1293 file_type: "PARQUET".to_string(),
1294 location: "foo.parquet".into(),
1295 table_partition_cols: vec![],
1296 order_exprs: vec![],
1297 if_not_exists: false,
1298 or_replace: false,
1299 temporary: false,
1300 unbounded: false,
1301 options: vec![],
1302 constraints: vec![],
1303 });
1304 expect_parse_ok(sql, expected)?;
1305
1306 let sql = "CREATE EXTERNAL TABLE t STORED AS AVRO LOCATION 'foo.avro'";
1308 let expected = Statement::CreateExternalTable(CreateExternalTable {
1309 name: name.clone(),
1310 columns: vec![],
1311 file_type: "AVRO".to_string(),
1312 location: "foo.avro".into(),
1313 table_partition_cols: vec![],
1314 order_exprs: vec![],
1315 if_not_exists: false,
1316 or_replace: false,
1317 temporary: false,
1318 unbounded: false,
1319 options: vec![],
1320 constraints: vec![],
1321 });
1322 expect_parse_ok(sql, expected)?;
1323
1324 let sql =
1326 "CREATE EXTERNAL TABLE IF NOT EXISTS t STORED AS PARQUET LOCATION 'foo.parquet'";
1327 let expected = Statement::CreateExternalTable(CreateExternalTable {
1328 name: name.clone(),
1329 columns: vec![],
1330 file_type: "PARQUET".to_string(),
1331 location: "foo.parquet".into(),
1332 table_partition_cols: vec![],
1333 order_exprs: vec![],
1334 if_not_exists: true,
1335 or_replace: false,
1336 temporary: false,
1337 unbounded: false,
1338 options: vec![],
1339 constraints: vec![],
1340 });
1341 expect_parse_ok(sql, expected)?;
1342
1343 let sql =
1345 "CREATE OR REPLACE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'";
1346 let expected = Statement::CreateExternalTable(CreateExternalTable {
1347 name: name.clone(),
1348 columns: vec![],
1349 file_type: "PARQUET".to_string(),
1350 location: "foo.parquet".into(),
1351 table_partition_cols: vec![],
1352 order_exprs: vec![],
1353 if_not_exists: false,
1354 or_replace: true,
1355 temporary: false,
1356 unbounded: false,
1357 options: vec![],
1358 constraints: vec![],
1359 });
1360 expect_parse_ok(sql, expected)?;
1361
1362 let sql =
1364 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int) LOCATION 'foo.csv'";
1365 let expected = Statement::CreateExternalTable(CreateExternalTable {
1366 name: name.clone(),
1367 columns: vec![
1368 make_column_def("c1", DataType::Int(None)),
1369 make_column_def("p1", DataType::Int(None)),
1370 ],
1371 file_type: "CSV".to_string(),
1372 location: "foo.csv".into(),
1373 table_partition_cols: vec!["p1".to_string()],
1374 order_exprs: vec![],
1375 if_not_exists: false,
1376 or_replace: false,
1377 temporary: false,
1378 unbounded: false,
1379 options: vec![],
1380 constraints: vec![],
1381 });
1382 expect_parse_ok(sql, expected)?;
1383
1384 let sql =
1386 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int, c1) LOCATION 'foo.csv'";
1387 expect_parse_error(
1388 sql,
1389 "SQL error: ParserError(\"Expected: a data type name, found: ) at Line: 1, Column: 73\")",
1390 );
1391
1392 let sql =
1394 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (c1, p1 int) LOCATION 'foo.csv'";
1395 expect_parse_error(sql, "SQL error: ParserError(\"Expected: ',' or ')' after partition definition, found: int at Line: 1, Column: 70\")");
1396
1397 let sql =
1399 "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1') LOCATION 'blahblah'";
1400 let expected = Statement::CreateExternalTable(CreateExternalTable {
1401 name: name.clone(),
1402 columns: vec![],
1403 file_type: "X".to_string(),
1404 location: "blahblah".into(),
1405 table_partition_cols: vec![],
1406 order_exprs: vec![],
1407 if_not_exists: false,
1408 or_replace: false,
1409 temporary: false,
1410 unbounded: false,
1411 options: vec![("k1".into(), Value::SingleQuotedString("v1".into()))],
1412 constraints: vec![],
1413 });
1414 expect_parse_ok(sql, expected)?;
1415
1416 let sql =
1418 "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2) LOCATION 'blahblah'";
1419 let expected = Statement::CreateExternalTable(CreateExternalTable {
1420 name: name.clone(),
1421 columns: vec![],
1422 file_type: "X".to_string(),
1423 location: "blahblah".into(),
1424 table_partition_cols: vec![],
1425 order_exprs: vec![],
1426 if_not_exists: false,
1427 or_replace: false,
1428 temporary: false,
1429 unbounded: false,
1430 options: vec![
1431 ("k1".into(), Value::SingleQuotedString("v1".into())),
1432 ("k2".into(), Value::SingleQuotedString("v2".into())),
1433 ],
1434 constraints: vec![],
1435 });
1436 expect_parse_ok(sql, expected)?;
1437
1438 let sqls = ["CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1) LOCATION 'foo.csv'",
1440 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS FIRST) LOCATION 'foo.csv'",
1441 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS LAST) LOCATION 'foo.csv'",
1442 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC) LOCATION 'foo.csv'",
1443 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC) LOCATION 'foo.csv'",
1444 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS FIRST) LOCATION 'foo.csv'",
1445 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS LAST) LOCATION 'foo.csv'",
1446 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS FIRST) LOCATION 'foo.csv'",
1447 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS LAST) LOCATION 'foo.csv'"];
1448 let expected = vec![
1449 (None, None),
1450 (None, Some(true)),
1451 (None, Some(false)),
1452 (Some(true), None),
1453 (Some(false), None),
1454 (Some(false), Some(true)),
1455 (Some(false), Some(false)),
1456 (Some(true), Some(true)),
1457 (Some(true), Some(false)),
1458 ];
1459 for (sql, (asc, nulls_first)) in sqls.iter().zip(expected.into_iter()) {
1460 let expected = Statement::CreateExternalTable(CreateExternalTable {
1461 name: name.clone(),
1462 columns: vec![make_column_def("c1", DataType::Int(None))],
1463 file_type: "CSV".to_string(),
1464 location: "foo.csv".into(),
1465 table_partition_cols: vec![],
1466 order_exprs: vec![vec![OrderByExpr {
1467 expr: Identifier(Ident {
1468 value: "c1".to_owned(),
1469 quote_style: None,
1470 span: Span::empty(),
1471 }),
1472 options: OrderByOptions { asc, nulls_first },
1473 with_fill: None,
1474 }]],
1475 if_not_exists: false,
1476 or_replace: false,
1477 temporary: false,
1478 unbounded: false,
1479 options: vec![],
1480 constraints: vec![],
1481 });
1482 expect_parse_ok(sql, expected)?;
1483 }
1484
1485 let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 ASC, c2 DESC NULLS FIRST) LOCATION 'foo.csv'";
1487 let display = None;
1488 let expected = Statement::CreateExternalTable(CreateExternalTable {
1489 name: name.clone(),
1490 columns: vec![
1491 make_column_def("c1", DataType::Int(display)),
1492 make_column_def("c2", DataType::Int(display)),
1493 ],
1494 file_type: "CSV".to_string(),
1495 location: "foo.csv".into(),
1496 table_partition_cols: vec![],
1497 order_exprs: vec![vec![
1498 OrderByExpr {
1499 expr: Identifier(Ident {
1500 value: "c1".to_owned(),
1501 quote_style: None,
1502 span: Span::empty(),
1503 }),
1504 options: OrderByOptions {
1505 asc: Some(true),
1506 nulls_first: None,
1507 },
1508 with_fill: None,
1509 },
1510 OrderByExpr {
1511 expr: Identifier(Ident {
1512 value: "c2".to_owned(),
1513 quote_style: None,
1514 span: Span::empty(),
1515 }),
1516 options: OrderByOptions {
1517 asc: Some(false),
1518 nulls_first: Some(true),
1519 },
1520 with_fill: None,
1521 },
1522 ]],
1523 if_not_exists: false,
1524 or_replace: false,
1525 temporary: false,
1526 unbounded: false,
1527 options: vec![],
1528 constraints: vec![],
1529 });
1530 expect_parse_ok(sql, expected)?;
1531
1532 let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 - c2 ASC) LOCATION 'foo.csv'";
1534 let display = None;
1535 let expected = Statement::CreateExternalTable(CreateExternalTable {
1536 name: name.clone(),
1537 columns: vec![
1538 make_column_def("c1", DataType::Int(display)),
1539 make_column_def("c2", DataType::Int(display)),
1540 ],
1541 file_type: "CSV".to_string(),
1542 location: "foo.csv".into(),
1543 table_partition_cols: vec![],
1544 order_exprs: vec![vec![OrderByExpr {
1545 expr: Expr::BinaryOp {
1546 left: Box::new(Identifier(Ident {
1547 value: "c1".to_owned(),
1548 quote_style: None,
1549 span: Span::empty(),
1550 })),
1551 op: BinaryOperator::Minus,
1552 right: Box::new(Identifier(Ident {
1553 value: "c2".to_owned(),
1554 quote_style: None,
1555 span: Span::empty(),
1556 })),
1557 },
1558 options: OrderByOptions {
1559 asc: Some(true),
1560 nulls_first: None,
1561 },
1562 with_fill: None,
1563 }]],
1564 if_not_exists: false,
1565 or_replace: false,
1566 temporary: false,
1567 unbounded: false,
1568 options: vec![],
1569 constraints: vec![],
1570 });
1571 expect_parse_ok(sql, expected)?;
1572
1573 let sql = "
1575 CREATE UNBOUNDED EXTERNAL TABLE IF NOT EXISTS t (c1 int, c2 float)
1576 STORED AS PARQUET
1577 WITH ORDER (c1 - c2 ASC)
1578 PARTITIONED BY (c1)
1579 LOCATION 'foo.parquet'
1580 OPTIONS ('format.compression' 'zstd',
1581 'format.delimiter' '*',
1582 'ROW_GROUP_SIZE' '1024',
1583 'TRUNCATE' 'NO',
1584 'format.has_header' 'true')";
1585 let expected = Statement::CreateExternalTable(CreateExternalTable {
1586 name: name.clone(),
1587 columns: vec![
1588 make_column_def("c1", DataType::Int(None)),
1589 make_column_def("c2", DataType::Float(ExactNumberInfo::None)),
1590 ],
1591 file_type: "PARQUET".to_string(),
1592 location: "foo.parquet".into(),
1593 table_partition_cols: vec!["c1".into()],
1594 order_exprs: vec![vec![OrderByExpr {
1595 expr: Expr::BinaryOp {
1596 left: Box::new(Identifier(Ident {
1597 value: "c1".to_owned(),
1598 quote_style: None,
1599 span: Span::empty(),
1600 })),
1601 op: BinaryOperator::Minus,
1602 right: Box::new(Identifier(Ident {
1603 value: "c2".to_owned(),
1604 quote_style: None,
1605 span: Span::empty(),
1606 })),
1607 },
1608 options: OrderByOptions {
1609 asc: Some(true),
1610 nulls_first: None,
1611 },
1612 with_fill: None,
1613 }]],
1614 if_not_exists: true,
1615 or_replace: false,
1616 temporary: false,
1617 unbounded: true,
1618 options: vec![
1619 (
1620 "format.compression".into(),
1621 Value::SingleQuotedString("zstd".into()),
1622 ),
1623 (
1624 "format.delimiter".into(),
1625 Value::SingleQuotedString("*".into()),
1626 ),
1627 (
1628 "ROW_GROUP_SIZE".into(),
1629 Value::SingleQuotedString("1024".into()),
1630 ),
1631 ("TRUNCATE".into(), Value::SingleQuotedString("NO".into())),
1632 (
1633 "format.has_header".into(),
1634 Value::SingleQuotedString("true".into()),
1635 ),
1636 ],
1637 constraints: vec![],
1638 });
1639 expect_parse_ok(sql, expected)?;
1640
1641 let sql = "
1643 CREATE OR REPLACE UNBOUNDED EXTERNAL TABLE t (c1 int, c2 float)
1644 STORED AS PARQUET
1645 WITH ORDER (c1 - c2 ASC)
1646 PARTITIONED BY (c1)
1647 LOCATION 'foo.parquet'
1648 OPTIONS ('format.compression' 'zstd',
1649 'format.delimiter' '*',
1650 'ROW_GROUP_SIZE' '1024',
1651 'TRUNCATE' 'NO',
1652 'format.has_header' 'true')";
1653 let expected = Statement::CreateExternalTable(CreateExternalTable {
1654 name: name.clone(),
1655 columns: vec![
1656 make_column_def("c1", DataType::Int(None)),
1657 make_column_def("c2", DataType::Float(ExactNumberInfo::None)),
1658 ],
1659 file_type: "PARQUET".to_string(),
1660 location: "foo.parquet".into(),
1661 table_partition_cols: vec!["c1".into()],
1662 order_exprs: vec![vec![OrderByExpr {
1663 expr: Expr::BinaryOp {
1664 left: Box::new(Identifier(Ident {
1665 value: "c1".to_owned(),
1666 quote_style: None,
1667 span: Span::empty(),
1668 })),
1669 op: BinaryOperator::Minus,
1670 right: Box::new(Identifier(Ident {
1671 value: "c2".to_owned(),
1672 quote_style: None,
1673 span: Span::empty(),
1674 })),
1675 },
1676 options: OrderByOptions {
1677 asc: Some(true),
1678 nulls_first: None,
1679 },
1680 with_fill: None,
1681 }]],
1682 if_not_exists: false,
1683 or_replace: true,
1684 temporary: false,
1685 unbounded: true,
1686 options: vec![
1687 (
1688 "format.compression".into(),
1689 Value::SingleQuotedString("zstd".into()),
1690 ),
1691 (
1692 "format.delimiter".into(),
1693 Value::SingleQuotedString("*".into()),
1694 ),
1695 (
1696 "ROW_GROUP_SIZE".into(),
1697 Value::SingleQuotedString("1024".into()),
1698 ),
1699 ("TRUNCATE".into(), Value::SingleQuotedString("NO".into())),
1700 (
1701 "format.has_header".into(),
1702 Value::SingleQuotedString("true".into()),
1703 ),
1704 ],
1705 constraints: vec![],
1706 });
1707 expect_parse_ok(sql, expected)?;
1708
1709 Ok(())
1712 }
1713
1714 #[test]
1715 fn copy_to_table_to_table() -> Result<(), DataFusionError> {
1716 let sql = "COPY foo TO bar STORED AS CSV";
1718 let expected = Statement::CopyTo(CopyToStatement {
1719 source: object_name("foo"),
1720 target: "bar".to_string(),
1721 partitioned_by: vec![],
1722 stored_as: Some("CSV".to_owned()),
1723 options: vec![],
1724 });
1725
1726 assert_eq!(verified_stmt(sql), expected);
1727 Ok(())
1728 }
1729
1730 #[test]
1731 fn skip_copy_into_snowflake() -> Result<(), DataFusionError> {
1732 let sql = "COPY INTO foo FROM @~/staged FILE_FORMAT = (FORMAT_NAME = 'mycsv');";
1733 let dialect = Box::new(SnowflakeDialect);
1734 let statements = DFParser::parse_sql_with_dialect(sql, dialect.as_ref())?;
1735
1736 assert_eq!(
1737 statements.len(),
1738 1,
1739 "Expected to parse exactly one statement"
1740 );
1741 if let Statement::CopyTo(_) = &statements[0] {
1742 panic!("Expected non COPY TO statement, but was successful: {statements:?}");
1743 }
1744 Ok(())
1745 }
1746
1747 #[test]
1748 fn explain_copy_to_table_to_table() -> Result<(), DataFusionError> {
1749 let cases = vec![
1750 ("EXPLAIN COPY foo TO bar STORED AS PARQUET", false, false),
1751 (
1752 "EXPLAIN ANALYZE COPY foo TO bar STORED AS PARQUET",
1753 true,
1754 false,
1755 ),
1756 (
1757 "EXPLAIN VERBOSE COPY foo TO bar STORED AS PARQUET",
1758 false,
1759 true,
1760 ),
1761 (
1762 "EXPLAIN ANALYZE VERBOSE COPY foo TO bar STORED AS PARQUET",
1763 true,
1764 true,
1765 ),
1766 ];
1767 for (sql, analyze, verbose) in cases {
1768 println!("sql: {sql}, analyze: {analyze}, verbose: {verbose}");
1769
1770 let expected_copy = Statement::CopyTo(CopyToStatement {
1771 source: object_name("foo"),
1772 target: "bar".to_string(),
1773 partitioned_by: vec![],
1774 stored_as: Some("PARQUET".to_owned()),
1775 options: vec![],
1776 });
1777 let expected = Statement::Explain(ExplainStatement {
1778 analyze,
1779 verbose,
1780 format: None,
1781 statement: Box::new(expected_copy),
1782 });
1783 assert_eq!(verified_stmt(sql), expected);
1784 }
1785 Ok(())
1786 }
1787
1788 #[test]
1789 fn copy_to_query_to_table() -> Result<(), DataFusionError> {
1790 let statement = verified_stmt("SELECT 1");
1791
1792 let statement = if let Statement::Statement(statement) = statement {
1794 *statement
1795 } else {
1796 panic!("Expected statement, got {statement:?}");
1797 };
1798
1799 let query = if let SQLStatement::Query(query) = statement {
1800 query
1801 } else {
1802 panic!("Expected query, got {statement:?}");
1803 };
1804
1805 let sql =
1806 "COPY (SELECT 1) TO bar STORED AS CSV OPTIONS ('format.has_header' 'true')";
1807 let expected = Statement::CopyTo(CopyToStatement {
1808 source: CopyToSource::Query(query),
1809 target: "bar".to_string(),
1810 partitioned_by: vec![],
1811 stored_as: Some("CSV".to_owned()),
1812 options: vec![(
1813 "format.has_header".into(),
1814 Value::SingleQuotedString("true".into()),
1815 )],
1816 });
1817 assert_eq!(verified_stmt(sql), expected);
1818 Ok(())
1819 }
1820
1821 #[test]
1822 fn copy_to_options() -> Result<(), DataFusionError> {
1823 let sql = "COPY foo TO bar STORED AS CSV OPTIONS ('row_group_size' '55')";
1824 let expected = Statement::CopyTo(CopyToStatement {
1825 source: object_name("foo"),
1826 target: "bar".to_string(),
1827 partitioned_by: vec![],
1828 stored_as: Some("CSV".to_owned()),
1829 options: vec![(
1830 "row_group_size".to_string(),
1831 Value::SingleQuotedString("55".to_string()),
1832 )],
1833 });
1834 assert_eq!(verified_stmt(sql), expected);
1835 Ok(())
1836 }
1837
1838 #[test]
1839 fn copy_to_partitioned_by() -> Result<(), DataFusionError> {
1840 let sql = "COPY foo TO bar STORED AS CSV PARTITIONED BY (a) OPTIONS ('row_group_size' '55')";
1841 let expected = Statement::CopyTo(CopyToStatement {
1842 source: object_name("foo"),
1843 target: "bar".to_string(),
1844 partitioned_by: vec!["a".to_string()],
1845 stored_as: Some("CSV".to_owned()),
1846 options: vec![(
1847 "row_group_size".to_string(),
1848 Value::SingleQuotedString("55".to_string()),
1849 )],
1850 });
1851 assert_eq!(verified_stmt(sql), expected);
1852 Ok(())
1853 }
1854
1855 #[test]
1856 fn copy_to_multi_options() -> Result<(), DataFusionError> {
1857 let sql =
1859 "COPY foo TO bar STORED AS parquet OPTIONS ('format.row_group_size' 55, 'format.compression' snappy, 'execution.keep_partition_by_columns' true)";
1860
1861 let expected_options = vec![
1862 (
1863 "format.row_group_size".to_string(),
1864 Value::Number("55".to_string(), false),
1865 ),
1866 (
1867 "format.compression".to_string(),
1868 Value::SingleQuotedString("snappy".to_string()),
1869 ),
1870 (
1871 "execution.keep_partition_by_columns".to_string(),
1872 Value::SingleQuotedString("true".to_string()),
1873 ),
1874 ];
1875
1876 let mut statements = DFParser::parse_sql(sql).unwrap();
1877 assert_eq!(statements.len(), 1);
1878 let only_statement = statements.pop_front().unwrap();
1879
1880 let options = if let Statement::CopyTo(copy_to) = only_statement {
1881 copy_to.options
1882 } else {
1883 panic!("Expected copy");
1884 };
1885
1886 assert_eq!(options, expected_options);
1887
1888 Ok(())
1889 }
1890
1891 fn object_name(name: &str) -> CopyToSource {
1894 CopyToSource::Relation(ObjectName::from(vec![Ident::new(name)]))
1895 }
1896
1897 fn one_statement_parses_to(sql: &str, canonical: &str) -> Statement {
1911 let mut statements = DFParser::parse_sql(sql).unwrap();
1912 assert_eq!(statements.len(), 1);
1913
1914 if sql != canonical {
1915 assert_eq!(DFParser::parse_sql(canonical).unwrap(), statements);
1916 }
1917
1918 let only_statement = statements.pop_front().unwrap();
1919 assert_eq!(
1920 canonical.to_uppercase(),
1921 only_statement.to_string().to_uppercase()
1922 );
1923 only_statement
1924 }
1925
1926 fn verified_stmt(sql: &str) -> Statement {
1930 one_statement_parses_to(sql, sql)
1931 }
1932
1933 #[test]
1934 fn test_recursion_limit() {
1937 let sql = "SELECT 1 OR 2";
1938
1939 DFParserBuilder::new(sql)
1941 .build()
1942 .unwrap()
1943 .parse_statements()
1944 .unwrap();
1945
1946 let err = DFParserBuilder::new(sql)
1947 .with_recursion_limit(1)
1948 .build()
1949 .unwrap()
1950 .parse_statements()
1951 .unwrap_err();
1952
1953 assert_contains!(
1954 err.to_string(),
1955 "SQL error: RecursionLimitExceeded (current limit: 1)"
1956 );
1957 }
1958
1959 fn expect_parse_expr_ok(sql: &str, expected: ExprWithAlias) {
1960 let expr = DFParser::parse_sql_into_expr(sql).unwrap();
1961 assert_eq!(expr, expected, "actual:\n{expr:#?}");
1962 }
1963
1964 fn expect_parse_expr_error(sql: &str, expected_error: &str) {
1966 match DFParser::parse_sql_into_expr(sql) {
1967 Ok(expr) => {
1968 panic!("Expected parse error for '{sql}', but was successful: {expr:#?}");
1969 }
1970 Err(e) => {
1971 let error_message = e.to_string();
1972 assert!(
1973 error_message.contains(expected_error),
1974 "Expected error '{expected_error}' not found in actual error '{error_message}'"
1975 );
1976 }
1977 }
1978 }
1979
1980 #[test]
1981 fn literal() {
1982 expect_parse_expr_ok(
1983 "1234",
1984 ExprWithAlias {
1985 expr: Expr::Value(ValueWithSpan::from(Value::Number(
1986 "1234".to_string(),
1987 false,
1988 ))),
1989 alias: None,
1990 },
1991 )
1992 }
1993
1994 #[test]
1995 fn literal_with_alias() {
1996 expect_parse_expr_ok(
1997 "1234 as foo",
1998 ExprWithAlias {
1999 expr: Expr::Value(ValueWithSpan::from(Value::Number(
2000 "1234".to_string(),
2001 false,
2002 ))),
2003 alias: Some(Ident::from("foo")),
2004 },
2005 )
2006 }
2007
2008 #[test]
2009 fn literal_with_alias_and_trailing_tokens() {
2010 expect_parse_expr_error(
2011 "1234 as foo.bar",
2012 "Expected: end of expression, found: .",
2013 )
2014 }
2015
2016 #[test]
2017 fn literal_with_alias_and_trailing_whitespace() {
2018 expect_parse_expr_ok(
2019 "1234 as foo ",
2020 ExprWithAlias {
2021 expr: Expr::Value(ValueWithSpan::from(Value::Number(
2022 "1234".to_string(),
2023 false,
2024 ))),
2025 alias: Some(Ident::from("foo")),
2026 },
2027 )
2028 }
2029
2030 #[test]
2031 fn literal_with_alias_and_trailing_whitespace_and_token() {
2032 expect_parse_expr_error(
2033 "1234 as foo bar",
2034 "Expected: end of expression, found: bar",
2035 )
2036 }
2037}