1use datafusion_common::DataFusionError;
24use datafusion_common::config::SqlParserOptions;
25use datafusion_common::{Diagnostic, Span, sql_err};
26use sqlparser::ast::{ExprWithAlias, Ident, OrderByOptions};
27use sqlparser::tokenizer::TokenWithSpan;
28use sqlparser::{
29 ast::{
30 ColumnDef, ColumnOptionDef, ObjectName, OrderByExpr, Query,
31 Statement as SQLStatement, TableConstraint, Value,
32 },
33 dialect::{Dialect, GenericDialect, keywords::Keyword},
34 parser::{Parser, ParserError},
35 tokenizer::{Token, Tokenizer, Word},
36};
37use std::collections::VecDeque;
38use std::fmt;
39
40macro_rules! parser_err {
42 ($MSG:expr $(; diagnostic = $DIAG:expr)?) => {{
43
44 let err = DataFusionError::from(ParserError::ParserError($MSG.to_string()));
45 $(
46 let err = err.with_diagnostic($DIAG);
47 )?
48 Err(err)
49 }};
50}
51
52fn parse_file_type(s: &str) -> Result<String, DataFusionError> {
53 Ok(s.to_uppercase())
54}
55
56#[derive(Debug, Clone, PartialEq, Eq)]
63pub struct ExplainStatement {
64 pub analyze: bool,
66 pub verbose: bool,
68 pub format: Option<String>,
70 pub statement: Box<Statement>,
74}
75
76impl fmt::Display for ExplainStatement {
77 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
78 let Self {
79 analyze,
80 verbose,
81 format,
82 statement,
83 } = self;
84
85 write!(f, "EXPLAIN ")?;
86 if *analyze {
87 write!(f, "ANALYZE ")?;
88 }
89 if *verbose {
90 write!(f, "VERBOSE ")?;
91 }
92 if let Some(format) = format.as_ref() {
93 write!(f, "FORMAT {format} ")?;
94 }
95
96 write!(f, "{statement}")
97 }
98}
99
100#[derive(Debug, Clone, PartialEq, Eq)]
124pub struct CopyToStatement {
125 pub source: CopyToSource,
127 pub target: String,
129 pub partitioned_by: Vec<String>,
131 pub stored_as: Option<String>,
133 pub options: Vec<(String, Value)>,
135}
136
137impl fmt::Display for CopyToStatement {
138 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
139 let Self {
140 source,
141 target,
142 partitioned_by,
143 stored_as,
144 options,
145 ..
146 } = self;
147
148 write!(f, "COPY {source} TO {target}")?;
149 if let Some(file_type) = stored_as {
150 write!(f, " STORED AS {file_type}")?;
151 }
152 if !partitioned_by.is_empty() {
153 write!(f, " PARTITIONED BY ({})", partitioned_by.join(", "))?;
154 }
155
156 if !options.is_empty() {
157 let opts: Vec<_> =
158 options.iter().map(|(k, v)| format!("'{k}' {v}")).collect();
159 write!(f, " OPTIONS ({})", opts.join(", "))?;
160 }
161
162 Ok(())
163 }
164}
165
166#[derive(Debug, Clone, PartialEq, Eq)]
167pub enum CopyToSource {
168 Relation(ObjectName),
170 Query(Box<Query>),
172}
173
174impl fmt::Display for CopyToSource {
175 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
176 match self {
177 CopyToSource::Relation(r) => write!(f, "{r}"),
178 CopyToSource::Query(q) => write!(f, "({q})"),
179 }
180 }
181}
182
183pub(crate) type LexOrdering = Vec<OrderByExpr>;
185
186#[derive(Debug, Clone, PartialEq, Eq)]
211pub struct CreateExternalTable {
212 pub name: ObjectName,
214 pub columns: Vec<ColumnDef>,
216 pub file_type: String,
218 pub location: String,
220 pub table_partition_cols: Vec<String>,
222 pub order_exprs: Vec<LexOrdering>,
224 pub if_not_exists: bool,
226 pub or_replace: bool,
228 pub temporary: bool,
230 pub unbounded: bool,
232 pub options: Vec<(String, Value)>,
234 pub constraints: Vec<TableConstraint>,
236}
237
238impl fmt::Display for CreateExternalTable {
239 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
240 write!(f, "CREATE EXTERNAL TABLE ")?;
241 if self.if_not_exists {
242 write!(f, "IF NOT EXISTS ")?;
243 }
244 write!(f, "{} ", self.name)?;
245 write!(f, "STORED AS {} ", self.file_type)?;
246 if !self.order_exprs.is_empty() {
247 write!(f, "WITH ORDER (")?;
248 let mut first = true;
249 for expr in self.order_exprs.iter().flatten() {
250 if !first {
251 write!(f, ", ")?;
252 }
253 write!(f, "{expr}")?;
254 first = false;
255 }
256 write!(f, ") ")?;
257 }
258 write!(f, "LOCATION {}", self.location)
259 }
260}
261
262#[derive(Debug, Clone, PartialEq, Eq)]
264pub enum ResetStatement {
265 Variable(ObjectName),
267}
268
269impl fmt::Display for ResetStatement {
270 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
271 match self {
272 ResetStatement::Variable(name) => write!(f, "RESET {name}"),
273 }
274 }
275}
276
277#[derive(Debug, Clone, PartialEq, Eq)]
285pub enum Statement {
286 Statement(Box<SQLStatement>),
288 CreateExternalTable(CreateExternalTable),
290 CopyTo(CopyToStatement),
292 Explain(ExplainStatement),
294 Reset(ResetStatement),
296}
297
298impl fmt::Display for Statement {
299 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
300 match self {
301 Statement::Statement(stmt) => write!(f, "{stmt}"),
302 Statement::CreateExternalTable(stmt) => write!(f, "{stmt}"),
303 Statement::CopyTo(stmt) => write!(f, "{stmt}"),
304 Statement::Explain(stmt) => write!(f, "{stmt}"),
305 Statement::Reset(stmt) => write!(f, "{stmt}"),
306 }
307 }
308}
309
310fn ensure_not_set<T>(field: &Option<T>, name: &str) -> Result<(), DataFusionError> {
311 if field.is_some() {
312 parser_err!(format!("{name} specified more than once",))?
313 }
314 Ok(())
315}
316
317pub struct DFParser<'a> {
326 pub parser: Parser<'a>,
327 options: SqlParserOptions,
328}
329
330const DEFAULT_RECURSION_LIMIT: usize = 50;
332const DEFAULT_DIALECT: GenericDialect = GenericDialect {};
333
334pub struct DFParserBuilder<'a> {
367 sql: &'a str,
369 dialect: &'a dyn Dialect,
371 recursion_limit: usize,
373}
374
375impl<'a> DFParserBuilder<'a> {
376 pub fn new(sql: &'a str) -> Self {
379 Self {
380 sql,
381 dialect: &DEFAULT_DIALECT,
382 recursion_limit: DEFAULT_RECURSION_LIMIT,
383 }
384 }
385
386 pub fn with_dialect(mut self, dialect: &'a dyn Dialect) -> Self {
388 self.dialect = dialect;
389 self
390 }
391
392 pub fn with_recursion_limit(mut self, recursion_limit: usize) -> Self {
394 self.recursion_limit = recursion_limit;
395 self
396 }
397
398 pub fn build(self) -> Result<DFParser<'a>, DataFusionError> {
399 let mut tokenizer = Tokenizer::new(self.dialect, self.sql);
400 let tokens = tokenizer
402 .tokenize_with_location()
403 .map_err(ParserError::from)?;
404
405 Ok(DFParser {
406 parser: Parser::new(self.dialect)
407 .with_tokens_with_locations(tokens)
408 .with_recursion_limit(self.recursion_limit),
409 options: SqlParserOptions {
410 recursion_limit: self.recursion_limit,
411 ..Default::default()
412 },
413 })
414 }
415}
416
417impl<'a> DFParser<'a> {
418 #[deprecated(since = "46.0.0", note = "DFParserBuilder")]
419 pub fn new(sql: &'a str) -> Result<Self, DataFusionError> {
420 DFParserBuilder::new(sql).build()
421 }
422
423 #[deprecated(since = "46.0.0", note = "DFParserBuilder")]
424 pub fn new_with_dialect(
425 sql: &'a str,
426 dialect: &'a dyn Dialect,
427 ) -> Result<Self, DataFusionError> {
428 DFParserBuilder::new(sql).with_dialect(dialect).build()
429 }
430
431 pub fn parse_sql(sql: &'a str) -> Result<VecDeque<Statement>, DataFusionError> {
434 let mut parser = DFParserBuilder::new(sql).build()?;
435
436 parser.parse_statements()
437 }
438
439 pub fn parse_sql_with_dialect(
442 sql: &str,
443 dialect: &dyn Dialect,
444 ) -> Result<VecDeque<Statement>, DataFusionError> {
445 let mut parser = DFParserBuilder::new(sql).with_dialect(dialect).build()?;
446 parser.parse_statements()
447 }
448
449 pub fn parse_sql_into_expr(sql: &str) -> Result<ExprWithAlias, DataFusionError> {
450 DFParserBuilder::new(sql).build()?.parse_into_expr()
451 }
452
453 pub fn parse_sql_into_expr_with_dialect(
454 sql: &str,
455 dialect: &dyn Dialect,
456 ) -> Result<ExprWithAlias, DataFusionError> {
457 DFParserBuilder::new(sql)
458 .with_dialect(dialect)
459 .build()?
460 .parse_into_expr()
461 }
462
463 pub fn parse_statements(&mut self) -> Result<VecDeque<Statement>, DataFusionError> {
465 let mut stmts = VecDeque::new();
466 let mut expecting_statement_delimiter = false;
467 loop {
468 while self.parser.consume_token(&Token::SemiColon) {
470 expecting_statement_delimiter = false;
471 }
472
473 if self.parser.peek_token() == Token::EOF {
474 break;
475 }
476 if expecting_statement_delimiter {
477 return self.expected("end of statement", &self.parser.peek_token());
478 }
479
480 let statement = self.parse_statement()?;
481 stmts.push_back(statement);
482 expecting_statement_delimiter = true;
483 }
484 Ok(stmts)
485 }
486
487 fn expected<T>(
489 &self,
490 expected: &str,
491 found: &TokenWithSpan,
492 ) -> Result<T, DataFusionError> {
493 let sql_parser_span = found.span;
494 let span = Span::try_from_sqlparser_span(sql_parser_span);
495 let diagnostic = Diagnostic::new_error(
496 format!("Expected: {expected}, found: {found}{}", found.span.start),
497 span,
498 );
499 parser_err!(
500 format!("Expected: {expected}, found: {found}{}", found.span.start);
501 diagnostic=
502 diagnostic
503 )
504 }
505
506 fn expect_token(
507 &mut self,
508 expected: &str,
509 token: &Token,
510 ) -> Result<(), DataFusionError> {
511 let next_token = self.parser.peek_token_ref();
512 if next_token.token != *token {
513 self.expected(expected, next_token)
514 } else {
515 Ok(())
516 }
517 }
518
519 pub fn parse_statement(&mut self) -> Result<Statement, DataFusionError> {
521 match self.parser.peek_token().token {
522 Token::Word(w) => {
523 match w.keyword {
524 Keyword::CREATE => {
525 self.parser.next_token(); self.parse_create()
527 }
528 Keyword::COPY => {
529 if let Token::Word(w) = self.parser.peek_nth_token(1).token {
530 if w.keyword == Keyword::INTO {
532 return self.parse_and_handle_statement();
533 }
534 }
535 self.parser.next_token(); self.parse_copy()
537 }
538 Keyword::EXPLAIN => {
539 self.parser.next_token(); self.parse_explain()
541 }
542 Keyword::RESET => {
543 self.parser.next_token(); self.parse_reset()
545 }
546 _ => {
547 self.parse_and_handle_statement()
549 }
550 }
551 }
552 _ => {
553 self.parse_and_handle_statement()
555 }
556 }
557 }
558
559 pub fn parse_expr(&mut self) -> Result<ExprWithAlias, DataFusionError> {
560 if let Token::Word(w) = self.parser.peek_token().token {
561 match w.keyword {
562 Keyword::CREATE | Keyword::COPY | Keyword::EXPLAIN => {
563 return parser_err!("Unsupported command in expression")?;
564 }
565 _ => {}
566 }
567 }
568
569 Ok(self.parser.parse_expr_with_alias()?)
570 }
571
572 pub fn parse_into_expr(&mut self) -> Result<ExprWithAlias, DataFusionError> {
577 let expr = self.parse_expr()?;
578 self.expect_token("end of expression", &Token::EOF)?;
579 Ok(expr)
580 }
581
582 fn parse_and_handle_statement(&mut self) -> Result<Statement, DataFusionError> {
584 self.parser
585 .parse_statement()
586 .map(|stmt| Statement::Statement(Box::from(stmt)))
587 .map_err(|e| match e {
588 ParserError::RecursionLimitExceeded => DataFusionError::SQL(
589 Box::new(ParserError::RecursionLimitExceeded),
590 Some(format!(
591 " (current limit: {})",
592 self.options.recursion_limit
593 )),
594 ),
595 other => DataFusionError::SQL(Box::new(other), None),
596 })
597 }
598
599 pub fn parse_copy(&mut self) -> Result<Statement, DataFusionError> {
601 let source = if self.parser.consume_token(&Token::LParen) {
603 let query = self.parser.parse_query()?;
604 self.parser.expect_token(&Token::RParen)?;
605 CopyToSource::Query(query)
606 } else {
607 let table_name = self.parser.parse_object_name(true)?;
609 CopyToSource::Relation(table_name)
610 };
611
612 #[derive(Default)]
613 struct Builder {
614 stored_as: Option<String>,
615 target: Option<String>,
616 partitioned_by: Option<Vec<String>>,
617 options: Option<Vec<(String, Value)>>,
618 }
619
620 let mut builder = Builder::default();
621
622 loop {
623 if let Some(keyword) = self.parser.parse_one_of_keywords(&[
624 Keyword::STORED,
625 Keyword::TO,
626 Keyword::PARTITIONED,
627 Keyword::OPTIONS,
628 Keyword::WITH,
629 ]) {
630 match keyword {
631 Keyword::STORED => {
632 self.parser.expect_keyword(Keyword::AS)?;
633 ensure_not_set(&builder.stored_as, "STORED AS")?;
634 builder.stored_as = Some(self.parse_file_format()?);
635 }
636 Keyword::TO => {
637 ensure_not_set(&builder.target, "TO")?;
638 builder.target = Some(self.parser.parse_literal_string()?);
639 }
640 Keyword::WITH => {
641 self.parser.expect_keyword(Keyword::HEADER)?;
642 self.parser.expect_keyword(Keyword::ROW)?;
643 return parser_err!(
644 "WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')"
645 )?;
646 }
647 Keyword::PARTITIONED => {
648 self.parser.expect_keyword(Keyword::BY)?;
649 ensure_not_set(&builder.partitioned_by, "PARTITIONED BY")?;
650 builder.partitioned_by = Some(self.parse_partitions()?);
651 }
652 Keyword::OPTIONS => {
653 ensure_not_set(&builder.options, "OPTIONS")?;
654 builder.options = Some(self.parse_value_options()?);
655 }
656 _ => {
657 unreachable!()
658 }
659 }
660 } else {
661 let token = self.parser.next_token();
662 if token == Token::EOF || token == Token::SemiColon {
663 break;
664 } else {
665 return self.expected("end of statement or ;", &token)?;
666 }
667 }
668 }
669
670 let Some(target) = builder.target else {
671 return parser_err!("Missing TO clause in COPY statement")?;
672 };
673
674 Ok(Statement::CopyTo(CopyToStatement {
675 source,
676 target,
677 partitioned_by: builder.partitioned_by.unwrap_or(vec![]),
678 stored_as: builder.stored_as,
679 options: builder.options.unwrap_or(vec![]),
680 }))
681 }
682
683 pub fn parse_option_key(&mut self) -> Result<String, DataFusionError> {
690 let next_token = self.parser.next_token();
691 match next_token.token {
692 Token::Word(Word { value, .. }) => {
693 let mut parts = vec![value];
694 while self.parser.consume_token(&Token::Period) {
695 let next_token = self.parser.next_token();
696 if let Token::Word(Word { value, .. }) = next_token.token {
697 parts.push(value);
698 } else {
699 return self.expected("key name", &next_token);
703 }
704 }
705 Ok(parts.join("."))
706 }
707 Token::SingleQuotedString(s) => Ok(s),
708 Token::DoubleQuotedString(s) => Ok(s),
709 Token::EscapedStringLiteral(s) => Ok(s),
710 _ => self.expected("key name", &next_token),
711 }
712 }
713
714 pub fn parse_option_value(&mut self) -> Result<Value, DataFusionError> {
721 let next_token = self.parser.next_token();
722 match next_token.token {
723 Token::Word(word) => Ok(Value::SingleQuotedString(word.value)),
725 Token::SingleQuotedString(s) => Ok(Value::SingleQuotedString(s)),
726 Token::DoubleQuotedString(s) => Ok(Value::DoubleQuotedString(s)),
727 Token::EscapedStringLiteral(s) => Ok(Value::EscapedStringLiteral(s)),
728 Token::Number(n, l) => Ok(Value::Number(n, l)),
729 _ => self.expected("string or numeric value", &next_token),
730 }
731 }
732
733 pub fn parse_explain(&mut self) -> Result<Statement, DataFusionError> {
735 let analyze = self.parser.parse_keyword(Keyword::ANALYZE);
736 let verbose = self.parser.parse_keyword(Keyword::VERBOSE);
737 let format = self.parse_explain_format()?;
738
739 let statement = self.parse_statement()?;
740
741 Ok(Statement::Explain(ExplainStatement {
742 statement: Box::new(statement),
743 analyze,
744 verbose,
745 format,
746 }))
747 }
748
749 pub fn parse_reset(&mut self) -> Result<Statement, DataFusionError> {
751 let mut parts: Vec<String> = Vec::new();
752 let mut expecting_segment = true;
753
754 loop {
755 let next_token = self.parser.peek_token();
756 match &next_token.token {
757 Token::Word(word) => {
758 self.parser.next_token();
759 parts.push(word.value.clone());
760 expecting_segment = false;
761 }
762 Token::SingleQuotedString(s)
763 | Token::DoubleQuotedString(s)
764 | Token::EscapedStringLiteral(s) => {
765 self.parser.next_token();
766 parts.push(s.clone());
767 expecting_segment = false;
768 }
769 Token::Period => {
770 self.parser.next_token();
771 if expecting_segment || parts.is_empty() {
772 return self.expected("configuration parameter", &next_token);
773 }
774 expecting_segment = true;
775 }
776 Token::EOF | Token::SemiColon => break,
777 _ => return self.expected("configuration parameter", &next_token),
778 }
779 }
780
781 if parts.is_empty() || expecting_segment {
782 return self.expected("configuration parameter", &self.parser.peek_token());
783 }
784
785 let idents: Vec<Ident> = parts.into_iter().map(Ident::new).collect();
786 let variable = ObjectName::from(idents);
787 Ok(Statement::Reset(ResetStatement::Variable(variable)))
788 }
789
790 pub fn parse_explain_format(&mut self) -> Result<Option<String>, DataFusionError> {
791 if !self.parser.parse_keyword(Keyword::FORMAT) {
792 return Ok(None);
793 }
794
795 let next_token = self.parser.next_token();
796 let format = match next_token.token {
797 Token::Word(w) => Ok(w.value),
798 Token::SingleQuotedString(w) => Ok(w),
799 Token::DoubleQuotedString(w) => Ok(w),
800 _ => self.expected("an explain format such as TREE", &next_token),
801 }?;
802 Ok(Some(format))
803 }
804
805 pub fn parse_create(&mut self) -> Result<Statement, DataFusionError> {
807 if self
809 .parser
810 .parse_keywords(&[Keyword::OR, Keyword::REPLACE, Keyword::EXTERNAL])
811 {
812 self.parse_create_external_table(false, true)
813 } else if self.parser.parse_keywords(&[
814 Keyword::OR,
815 Keyword::REPLACE,
816 Keyword::UNBOUNDED,
817 Keyword::EXTERNAL,
818 ]) {
819 self.parse_create_external_table(true, true)
820 } else if self.parser.parse_keyword(Keyword::EXTERNAL) {
821 self.parse_create_external_table(false, false)
822 } else if self
823 .parser
824 .parse_keywords(&[Keyword::UNBOUNDED, Keyword::EXTERNAL])
825 {
826 self.parse_create_external_table(true, false)
827 } else {
828 Ok(Statement::Statement(Box::from(self.parser.parse_create()?)))
829 }
830 }
831
832 fn parse_partitions(&mut self) -> Result<Vec<String>, DataFusionError> {
833 let mut partitions: Vec<String> = vec![];
834 if !self.parser.consume_token(&Token::LParen)
835 || self.parser.consume_token(&Token::RParen)
836 {
837 return Ok(partitions);
838 }
839
840 loop {
841 if let Token::Word(_) = self.parser.peek_token().token {
842 let identifier = self.parser.parse_identifier()?;
843 partitions.push(identifier.to_string());
844 } else {
845 return self.expected("partition name", &self.parser.peek_token());
846 }
847 let comma = self.parser.consume_token(&Token::Comma);
848 if self.parser.consume_token(&Token::RParen) {
849 break;
851 } else if !comma {
852 return self.expected(
853 "',' or ')' after partition definition",
854 &self.parser.peek_token(),
855 );
856 }
857 }
858 Ok(partitions)
859 }
860
861 pub fn parse_order_by_exprs(&mut self) -> Result<Vec<OrderByExpr>, DataFusionError> {
863 let mut values = vec![];
864 self.parser.expect_token(&Token::LParen)?;
865 loop {
866 values.push(self.parse_order_by_expr()?);
867 if !self.parser.consume_token(&Token::Comma) {
868 self.parser.expect_token(&Token::RParen)?;
869 return Ok(values);
870 }
871 }
872 }
873
874 pub fn parse_order_by_expr(&mut self) -> Result<OrderByExpr, DataFusionError> {
876 let expr = self.parser.parse_expr()?;
877
878 let asc = if self.parser.parse_keyword(Keyword::ASC) {
879 Some(true)
880 } else if self.parser.parse_keyword(Keyword::DESC) {
881 Some(false)
882 } else {
883 None
884 };
885
886 let nulls_first = if self
887 .parser
888 .parse_keywords(&[Keyword::NULLS, Keyword::FIRST])
889 {
890 Some(true)
891 } else if self.parser.parse_keywords(&[Keyword::NULLS, Keyword::LAST]) {
892 Some(false)
893 } else {
894 None
895 };
896
897 Ok(OrderByExpr {
898 expr,
899 options: OrderByOptions { asc, nulls_first },
900 with_fill: None,
901 })
902 }
903
904 fn parse_columns(
906 &mut self,
907 ) -> Result<(Vec<ColumnDef>, Vec<TableConstraint>), DataFusionError> {
908 let mut columns = vec![];
909 let mut constraints = vec![];
910 if !self.parser.consume_token(&Token::LParen)
911 || self.parser.consume_token(&Token::RParen)
912 {
913 return Ok((columns, constraints));
914 }
915
916 loop {
917 if let Some(constraint) = self.parser.parse_optional_table_constraint()? {
918 constraints.push(constraint);
919 } else if let Token::Word(_) = self.parser.peek_token().token {
920 let column_def = self.parse_column_def()?;
921 columns.push(column_def);
922 } else {
923 return self.expected(
924 "column name or constraint definition",
925 &self.parser.peek_token(),
926 );
927 }
928 let comma = self.parser.consume_token(&Token::Comma);
929 if self.parser.consume_token(&Token::RParen) {
930 break;
932 } else if !comma {
933 return self.expected(
934 "',' or ')' after column definition",
935 &self.parser.peek_token(),
936 );
937 }
938 }
939
940 Ok((columns, constraints))
941 }
942
943 fn parse_column_def(&mut self) -> Result<ColumnDef, DataFusionError> {
944 let name = self.parser.parse_identifier()?;
945 let data_type = self.parser.parse_data_type()?;
946 let mut options = vec![];
947 loop {
948 if self.parser.parse_keyword(Keyword::CONSTRAINT) {
949 let name = Some(self.parser.parse_identifier()?);
950 if let Some(option) = self.parser.parse_optional_column_option()? {
951 options.push(ColumnOptionDef { name, option });
952 } else {
953 return self.expected(
954 "constraint details after CONSTRAINT <name>",
955 &self.parser.peek_token(),
956 );
957 }
958 } else if let Some(option) = self.parser.parse_optional_column_option()? {
959 options.push(ColumnOptionDef { name: None, option });
960 } else {
961 break;
962 };
963 }
964 Ok(ColumnDef {
965 name,
966 data_type,
967 options,
968 })
969 }
970
971 fn parse_create_external_table(
972 &mut self,
973 unbounded: bool,
974 or_replace: bool,
975 ) -> Result<Statement, DataFusionError> {
976 let temporary = self
977 .parser
978 .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY])
979 .is_some();
980
981 self.parser.expect_keyword(Keyword::TABLE)?;
982 let if_not_exists =
983 self.parser
984 .parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]);
985
986 if if_not_exists && or_replace {
987 return parser_err!("'IF NOT EXISTS' cannot coexist with 'REPLACE'");
988 }
989
990 let table_name = self.parser.parse_object_name(true)?;
991 let (mut columns, constraints) = self.parse_columns()?;
992
993 #[derive(Default)]
994 struct Builder {
995 file_type: Option<String>,
996 location: Option<String>,
997 table_partition_cols: Option<Vec<String>>,
998 order_exprs: Vec<LexOrdering>,
999 options: Option<Vec<(String, Value)>>,
1000 }
1001 let mut builder = Builder::default();
1002
1003 loop {
1004 if let Some(keyword) = self.parser.parse_one_of_keywords(&[
1005 Keyword::STORED,
1006 Keyword::LOCATION,
1007 Keyword::WITH,
1008 Keyword::DELIMITER,
1009 Keyword::COMPRESSION,
1010 Keyword::PARTITIONED,
1011 Keyword::OPTIONS,
1012 ]) {
1013 match keyword {
1014 Keyword::STORED => {
1015 self.parser.expect_keyword(Keyword::AS)?;
1016 ensure_not_set(&builder.file_type, "STORED AS")?;
1017 builder.file_type = Some(self.parse_file_format()?);
1018 }
1019 Keyword::LOCATION => {
1020 ensure_not_set(&builder.location, "LOCATION")?;
1021 builder.location = Some(self.parser.parse_literal_string()?);
1022 }
1023 Keyword::WITH => {
1024 if self.parser.parse_keyword(Keyword::ORDER) {
1025 builder.order_exprs.push(self.parse_order_by_exprs()?);
1026 } else {
1027 self.parser.expect_keyword(Keyword::HEADER)?;
1028 self.parser.expect_keyword(Keyword::ROW)?;
1029 return parser_err!(
1030 "WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS (format.has_header true)"
1031 )?;
1032 }
1033 }
1034 Keyword::DELIMITER => {
1035 return parser_err!(
1036 "DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS (format.delimiter ',')"
1037 )?;
1038 }
1039 Keyword::COMPRESSION => {
1040 self.parser.expect_keyword(Keyword::TYPE)?;
1041 return parser_err!(
1042 "COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS (format.compression gzip)"
1043 )?;
1044 }
1045 Keyword::PARTITIONED => {
1046 self.parser.expect_keyword(Keyword::BY)?;
1047 ensure_not_set(&builder.table_partition_cols, "PARTITIONED BY")?;
1048 let peeked = self.parser.peek_nth_token(2);
1053 if peeked == Token::Comma || peeked == Token::RParen {
1054 builder.table_partition_cols = Some(self.parse_partitions()?)
1056 } else {
1057 let (cols, cons) = self.parse_columns()?;
1059 builder.table_partition_cols = Some(
1060 cols.iter().map(|col| col.name.to_string()).collect(),
1061 );
1062
1063 columns.extend(cols);
1064
1065 if !cons.is_empty() {
1066 return sql_err!(ParserError::ParserError(
1067 "Constraints on Partition Columns are not supported"
1068 .to_string(),
1069 ));
1070 }
1071 }
1072 }
1073 Keyword::OPTIONS => {
1074 ensure_not_set(&builder.options, "OPTIONS")?;
1075 builder.options = Some(self.parse_value_options()?);
1076 }
1077 _ => {
1078 unreachable!()
1079 }
1080 }
1081 } else {
1082 let token = self.parser.next_token();
1083 if token == Token::EOF || token == Token::SemiColon {
1084 break;
1085 } else {
1086 return self.expected("end of statement or ;", &token)?;
1087 }
1088 }
1089 }
1090
1091 if builder.file_type.is_none() {
1093 return sql_err!(ParserError::ParserError(
1094 "Missing STORED AS clause in CREATE EXTERNAL TABLE statement".into(),
1095 ));
1096 }
1097 if builder.location.is_none() {
1098 return sql_err!(ParserError::ParserError(
1099 "Missing LOCATION clause in CREATE EXTERNAL TABLE statement".into(),
1100 ));
1101 }
1102
1103 let create = CreateExternalTable {
1104 name: table_name,
1105 columns,
1106 file_type: builder.file_type.unwrap(),
1107 location: builder.location.unwrap(),
1108 table_partition_cols: builder.table_partition_cols.unwrap_or(vec![]),
1109 order_exprs: builder.order_exprs,
1110 if_not_exists,
1111 or_replace,
1112 temporary,
1113 unbounded,
1114 options: builder.options.unwrap_or(Vec::new()),
1115 constraints,
1116 };
1117 Ok(Statement::CreateExternalTable(create))
1118 }
1119
1120 fn parse_file_format(&mut self) -> Result<String, DataFusionError> {
1122 let token = self.parser.next_token();
1123 match &token.token {
1124 Token::Word(w) => parse_file_type(&w.value),
1125 _ => self.expected("one of ARROW, PARQUET, NDJSON, or CSV", &token),
1126 }
1127 }
1128
1129 fn parse_value_options(&mut self) -> Result<Vec<(String, Value)>, DataFusionError> {
1134 let mut options = vec![];
1135 self.parser.expect_token(&Token::LParen)?;
1136
1137 loop {
1138 let key = self.parse_option_key()?;
1139 let value = self.parse_option_value()?;
1140 options.push((key, value));
1141 let comma = self.parser.consume_token(&Token::Comma);
1142 if self.parser.consume_token(&Token::RParen) {
1143 break;
1145 } else if !comma {
1146 return self.expected(
1147 "',' or ')' after option definition",
1148 &self.parser.peek_token(),
1149 );
1150 }
1151 }
1152 Ok(options)
1153 }
1154}
1155
1156#[cfg(test)]
1157mod tests {
1158 use super::*;
1159 use datafusion_common::assert_contains;
1160 use sqlparser::ast::Expr::Identifier;
1161 use sqlparser::ast::{
1162 BinaryOperator, DataType, ExactNumberInfo, Expr, Ident, ValueWithSpan,
1163 };
1164 use sqlparser::dialect::SnowflakeDialect;
1165 use sqlparser::tokenizer::Span;
1166
1167 fn expect_parse_ok(sql: &str, expected: Statement) -> Result<(), DataFusionError> {
1168 let statements = DFParser::parse_sql(sql)?;
1169 assert_eq!(
1170 statements.len(),
1171 1,
1172 "Expected to parse exactly one statement"
1173 );
1174 assert_eq!(statements[0], expected, "actual:\n{:#?}", statements[0]);
1175 Ok(())
1176 }
1177
1178 fn expect_parse_error(sql: &str, expected_error: &str) {
1180 match DFParser::parse_sql(sql) {
1181 Ok(statements) => {
1182 panic!(
1183 "Expected parse error for '{sql}', but was successful: {statements:?}"
1184 );
1185 }
1186 Err(e) => {
1187 let error_message = e.to_string();
1188 assert!(
1189 error_message.contains(expected_error),
1190 "Expected error '{expected_error}' not found in actual error '{error_message}'"
1191 );
1192 }
1193 }
1194 }
1195
1196 fn make_column_def(name: impl Into<String>, data_type: DataType) -> ColumnDef {
1197 ColumnDef {
1198 name: Ident {
1199 value: name.into(),
1200 quote_style: None,
1201 span: Span::empty(),
1202 },
1203 data_type,
1204 options: vec![],
1205 }
1206 }
1207
1208 #[test]
1209 fn create_external_table() -> Result<(), DataFusionError> {
1210 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'";
1212 let display = None;
1213 let name = ObjectName::from(vec![Ident::from("t")]);
1214 let expected = Statement::CreateExternalTable(CreateExternalTable {
1215 name: name.clone(),
1216 columns: vec![make_column_def("c1", DataType::Int(display))],
1217 file_type: "CSV".to_string(),
1218 location: "foo.csv".into(),
1219 table_partition_cols: vec![],
1220 order_exprs: vec![],
1221 if_not_exists: false,
1222 or_replace: false,
1223 temporary: false,
1224 unbounded: false,
1225 options: vec![],
1226 constraints: vec![],
1227 });
1228 expect_parse_ok(sql, expected)?;
1229
1230 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' ";
1232 let expected = Statement::CreateExternalTable(CreateExternalTable {
1233 name: name.clone(),
1234 columns: vec![make_column_def("c1", DataType::Int(None))],
1235 file_type: "CSV".to_string(),
1236 location: "foo.csv".into(),
1237 table_partition_cols: vec![],
1238 order_exprs: vec![],
1239 if_not_exists: false,
1240 or_replace: false,
1241 temporary: false,
1242 unbounded: false,
1243 options: vec![],
1244 constraints: vec![],
1245 });
1246 expect_parse_ok(sql, expected)?;
1247
1248 let sql =
1250 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' ;";
1251 let expected = Statement::CreateExternalTable(CreateExternalTable {
1252 name: name.clone(),
1253 columns: vec![make_column_def("c1", DataType::Int(None))],
1254 file_type: "CSV".to_string(),
1255 location: "foo.csv".into(),
1256 table_partition_cols: vec![],
1257 order_exprs: vec![],
1258 if_not_exists: false,
1259 or_replace: false,
1260 temporary: false,
1261 unbounded: false,
1262 options: vec![],
1263 constraints: vec![],
1264 });
1265 expect_parse_ok(sql, expected)?;
1266
1267 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS (format.delimiter '|')";
1269 let display = None;
1270 let expected = Statement::CreateExternalTable(CreateExternalTable {
1271 name: name.clone(),
1272 columns: vec![make_column_def("c1", DataType::Int(display))],
1273 file_type: "CSV".to_string(),
1274 location: "foo.csv".into(),
1275 table_partition_cols: vec![],
1276 order_exprs: vec![],
1277 if_not_exists: false,
1278 or_replace: false,
1279 temporary: false,
1280 unbounded: false,
1281 options: vec![(
1282 "format.delimiter".into(),
1283 Value::SingleQuotedString("|".into()),
1284 )],
1285 constraints: vec![],
1286 });
1287 expect_parse_ok(sql, expected)?;
1288
1289 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1, p2) LOCATION 'foo.csv'";
1291 let display = None;
1292 let expected = Statement::CreateExternalTable(CreateExternalTable {
1293 name: name.clone(),
1294 columns: vec![make_column_def("c1", DataType::Int(display))],
1295 file_type: "CSV".to_string(),
1296 location: "foo.csv".into(),
1297 table_partition_cols: vec!["p1".to_string(), "p2".to_string()],
1298 order_exprs: vec![],
1299 if_not_exists: false,
1300 or_replace: false,
1301 temporary: false,
1302 unbounded: false,
1303 options: vec![],
1304 constraints: vec![],
1305 });
1306 expect_parse_ok(sql, expected)?;
1307
1308 let sqls =
1310 vec![
1311 ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1312 ('format.compression' 'GZIP')", "GZIP"),
1313 ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1314 ('format.compression' 'BZIP2')", "BZIP2"),
1315 ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1316 ('format.compression' 'XZ')", "XZ"),
1317 ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1318 ('format.compression' 'ZSTD')", "ZSTD"),
1319 ];
1320 for (sql, compression) in sqls {
1321 let expected = Statement::CreateExternalTable(CreateExternalTable {
1322 name: name.clone(),
1323 columns: vec![make_column_def("c1", DataType::Int(display))],
1324 file_type: "CSV".to_string(),
1325 location: "foo.csv".into(),
1326 table_partition_cols: vec![],
1327 order_exprs: vec![],
1328 if_not_exists: false,
1329 or_replace: false,
1330 temporary: false,
1331 unbounded: false,
1332 options: vec![(
1333 "format.compression".into(),
1334 Value::SingleQuotedString(compression.into()),
1335 )],
1336 constraints: vec![],
1337 });
1338 expect_parse_ok(sql, expected)?;
1339 }
1340
1341 let sql = "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'";
1343 let expected = Statement::CreateExternalTable(CreateExternalTable {
1344 name: name.clone(),
1345 columns: vec![],
1346 file_type: "PARQUET".to_string(),
1347 location: "foo.parquet".into(),
1348 table_partition_cols: vec![],
1349 order_exprs: vec![],
1350 if_not_exists: false,
1351 or_replace: false,
1352 temporary: false,
1353 unbounded: false,
1354 options: vec![],
1355 constraints: vec![],
1356 });
1357 expect_parse_ok(sql, expected)?;
1358
1359 let sql = "CREATE EXTERNAL TABLE t STORED AS parqueT LOCATION 'foo.parquet'";
1361 let expected = Statement::CreateExternalTable(CreateExternalTable {
1362 name: name.clone(),
1363 columns: vec![],
1364 file_type: "PARQUET".to_string(),
1365 location: "foo.parquet".into(),
1366 table_partition_cols: vec![],
1367 order_exprs: vec![],
1368 if_not_exists: false,
1369 or_replace: false,
1370 temporary: false,
1371 unbounded: false,
1372 options: vec![],
1373 constraints: vec![],
1374 });
1375 expect_parse_ok(sql, expected)?;
1376
1377 let sql = "CREATE EXTERNAL TABLE t STORED AS AVRO LOCATION 'foo.avro'";
1379 let expected = Statement::CreateExternalTable(CreateExternalTable {
1380 name: name.clone(),
1381 columns: vec![],
1382 file_type: "AVRO".to_string(),
1383 location: "foo.avro".into(),
1384 table_partition_cols: vec![],
1385 order_exprs: vec![],
1386 if_not_exists: false,
1387 or_replace: false,
1388 temporary: false,
1389 unbounded: false,
1390 options: vec![],
1391 constraints: vec![],
1392 });
1393 expect_parse_ok(sql, expected)?;
1394
1395 let sql = "CREATE EXTERNAL TABLE IF NOT EXISTS t STORED AS PARQUET LOCATION 'foo.parquet'";
1397 let expected = Statement::CreateExternalTable(CreateExternalTable {
1398 name: name.clone(),
1399 columns: vec![],
1400 file_type: "PARQUET".to_string(),
1401 location: "foo.parquet".into(),
1402 table_partition_cols: vec![],
1403 order_exprs: vec![],
1404 if_not_exists: true,
1405 or_replace: false,
1406 temporary: false,
1407 unbounded: false,
1408 options: vec![],
1409 constraints: vec![],
1410 });
1411 expect_parse_ok(sql, expected)?;
1412
1413 let sql =
1415 "CREATE OR REPLACE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'";
1416 let expected = Statement::CreateExternalTable(CreateExternalTable {
1417 name: name.clone(),
1418 columns: vec![],
1419 file_type: "PARQUET".to_string(),
1420 location: "foo.parquet".into(),
1421 table_partition_cols: vec![],
1422 order_exprs: vec![],
1423 if_not_exists: false,
1424 or_replace: true,
1425 temporary: false,
1426 unbounded: false,
1427 options: vec![],
1428 constraints: vec![],
1429 });
1430 expect_parse_ok(sql, expected)?;
1431
1432 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int) LOCATION 'foo.csv'";
1434 let expected = Statement::CreateExternalTable(CreateExternalTable {
1435 name: name.clone(),
1436 columns: vec![
1437 make_column_def("c1", DataType::Int(None)),
1438 make_column_def("p1", DataType::Int(None)),
1439 ],
1440 file_type: "CSV".to_string(),
1441 location: "foo.csv".into(),
1442 table_partition_cols: vec!["p1".to_string()],
1443 order_exprs: vec![],
1444 if_not_exists: false,
1445 or_replace: false,
1446 temporary: false,
1447 unbounded: false,
1448 options: vec![],
1449 constraints: vec![],
1450 });
1451 expect_parse_ok(sql, expected)?;
1452
1453 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int, c1) LOCATION 'foo.csv'";
1455 expect_parse_error(
1456 sql,
1457 "SQL error: ParserError(\"Expected: a data type name, found: ) at Line: 1, Column: 73\")",
1458 );
1459
1460 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (c1, p1 int) LOCATION 'foo.csv'";
1462 expect_parse_error(
1463 sql,
1464 "SQL error: ParserError(\"Expected: ',' or ')' after partition definition, found: int at Line: 1, Column: 70\")",
1465 );
1466
1467 let sql =
1469 "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1') LOCATION 'blahblah'";
1470 let expected = Statement::CreateExternalTable(CreateExternalTable {
1471 name: name.clone(),
1472 columns: vec![],
1473 file_type: "X".to_string(),
1474 location: "blahblah".into(),
1475 table_partition_cols: vec![],
1476 order_exprs: vec![],
1477 if_not_exists: false,
1478 or_replace: false,
1479 temporary: false,
1480 unbounded: false,
1481 options: vec![("k1".into(), Value::SingleQuotedString("v1".into()))],
1482 constraints: vec![],
1483 });
1484 expect_parse_ok(sql, expected)?;
1485
1486 let sql = "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2) LOCATION 'blahblah'";
1488 let expected = Statement::CreateExternalTable(CreateExternalTable {
1489 name: name.clone(),
1490 columns: vec![],
1491 file_type: "X".to_string(),
1492 location: "blahblah".into(),
1493 table_partition_cols: vec![],
1494 order_exprs: vec![],
1495 if_not_exists: false,
1496 or_replace: false,
1497 temporary: false,
1498 unbounded: false,
1499 options: vec![
1500 ("k1".into(), Value::SingleQuotedString("v1".into())),
1501 ("k2".into(), Value::SingleQuotedString("v2".into())),
1502 ],
1503 constraints: vec![],
1504 });
1505 expect_parse_ok(sql, expected)?;
1506
1507 let sqls = [
1509 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1) LOCATION 'foo.csv'",
1510 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS FIRST) LOCATION 'foo.csv'",
1511 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS LAST) LOCATION 'foo.csv'",
1512 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC) LOCATION 'foo.csv'",
1513 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC) LOCATION 'foo.csv'",
1514 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS FIRST) LOCATION 'foo.csv'",
1515 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS LAST) LOCATION 'foo.csv'",
1516 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS FIRST) LOCATION 'foo.csv'",
1517 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS LAST) LOCATION 'foo.csv'",
1518 ];
1519 let expected = vec![
1520 (None, None),
1521 (None, Some(true)),
1522 (None, Some(false)),
1523 (Some(true), None),
1524 (Some(false), None),
1525 (Some(false), Some(true)),
1526 (Some(false), Some(false)),
1527 (Some(true), Some(true)),
1528 (Some(true), Some(false)),
1529 ];
1530 for (sql, (asc, nulls_first)) in sqls.iter().zip(expected.into_iter()) {
1531 let expected = Statement::CreateExternalTable(CreateExternalTable {
1532 name: name.clone(),
1533 columns: vec![make_column_def("c1", DataType::Int(None))],
1534 file_type: "CSV".to_string(),
1535 location: "foo.csv".into(),
1536 table_partition_cols: vec![],
1537 order_exprs: vec![vec![OrderByExpr {
1538 expr: Identifier(Ident {
1539 value: "c1".to_owned(),
1540 quote_style: None,
1541 span: Span::empty(),
1542 }),
1543 options: OrderByOptions { asc, nulls_first },
1544 with_fill: None,
1545 }]],
1546 if_not_exists: false,
1547 or_replace: false,
1548 temporary: false,
1549 unbounded: false,
1550 options: vec![],
1551 constraints: vec![],
1552 });
1553 expect_parse_ok(sql, expected)?;
1554 }
1555
1556 let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 ASC, c2 DESC NULLS FIRST) LOCATION 'foo.csv'";
1558 let display = None;
1559 let expected = Statement::CreateExternalTable(CreateExternalTable {
1560 name: name.clone(),
1561 columns: vec![
1562 make_column_def("c1", DataType::Int(display)),
1563 make_column_def("c2", DataType::Int(display)),
1564 ],
1565 file_type: "CSV".to_string(),
1566 location: "foo.csv".into(),
1567 table_partition_cols: vec![],
1568 order_exprs: vec![vec![
1569 OrderByExpr {
1570 expr: Identifier(Ident {
1571 value: "c1".to_owned(),
1572 quote_style: None,
1573 span: Span::empty(),
1574 }),
1575 options: OrderByOptions {
1576 asc: Some(true),
1577 nulls_first: None,
1578 },
1579 with_fill: None,
1580 },
1581 OrderByExpr {
1582 expr: Identifier(Ident {
1583 value: "c2".to_owned(),
1584 quote_style: None,
1585 span: Span::empty(),
1586 }),
1587 options: OrderByOptions {
1588 asc: Some(false),
1589 nulls_first: Some(true),
1590 },
1591 with_fill: None,
1592 },
1593 ]],
1594 if_not_exists: false,
1595 or_replace: false,
1596 temporary: false,
1597 unbounded: false,
1598 options: vec![],
1599 constraints: vec![],
1600 });
1601 expect_parse_ok(sql, expected)?;
1602
1603 let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 - c2 ASC) LOCATION 'foo.csv'";
1605 let display = None;
1606 let expected = Statement::CreateExternalTable(CreateExternalTable {
1607 name: name.clone(),
1608 columns: vec![
1609 make_column_def("c1", DataType::Int(display)),
1610 make_column_def("c2", DataType::Int(display)),
1611 ],
1612 file_type: "CSV".to_string(),
1613 location: "foo.csv".into(),
1614 table_partition_cols: vec![],
1615 order_exprs: vec![vec![OrderByExpr {
1616 expr: Expr::BinaryOp {
1617 left: Box::new(Identifier(Ident {
1618 value: "c1".to_owned(),
1619 quote_style: None,
1620 span: Span::empty(),
1621 })),
1622 op: BinaryOperator::Minus,
1623 right: Box::new(Identifier(Ident {
1624 value: "c2".to_owned(),
1625 quote_style: None,
1626 span: Span::empty(),
1627 })),
1628 },
1629 options: OrderByOptions {
1630 asc: Some(true),
1631 nulls_first: None,
1632 },
1633 with_fill: None,
1634 }]],
1635 if_not_exists: false,
1636 or_replace: false,
1637 temporary: false,
1638 unbounded: false,
1639 options: vec![],
1640 constraints: vec![],
1641 });
1642 expect_parse_ok(sql, expected)?;
1643
1644 let sql = "
1646 CREATE UNBOUNDED EXTERNAL TABLE IF NOT EXISTS t (c1 int, c2 float)
1647 STORED AS PARQUET
1648 WITH ORDER (c1 - c2 ASC)
1649 PARTITIONED BY (c1)
1650 LOCATION 'foo.parquet'
1651 OPTIONS ('format.compression' 'zstd',
1652 'format.delimiter' '*',
1653 'ROW_GROUP_SIZE' '1024',
1654 'TRUNCATE' 'NO',
1655 'format.has_header' 'true')";
1656 let expected = Statement::CreateExternalTable(CreateExternalTable {
1657 name: name.clone(),
1658 columns: vec![
1659 make_column_def("c1", DataType::Int(None)),
1660 make_column_def("c2", DataType::Float(ExactNumberInfo::None)),
1661 ],
1662 file_type: "PARQUET".to_string(),
1663 location: "foo.parquet".into(),
1664 table_partition_cols: vec!["c1".into()],
1665 order_exprs: vec![vec![OrderByExpr {
1666 expr: Expr::BinaryOp {
1667 left: Box::new(Identifier(Ident {
1668 value: "c1".to_owned(),
1669 quote_style: None,
1670 span: Span::empty(),
1671 })),
1672 op: BinaryOperator::Minus,
1673 right: Box::new(Identifier(Ident {
1674 value: "c2".to_owned(),
1675 quote_style: None,
1676 span: Span::empty(),
1677 })),
1678 },
1679 options: OrderByOptions {
1680 asc: Some(true),
1681 nulls_first: None,
1682 },
1683 with_fill: None,
1684 }]],
1685 if_not_exists: true,
1686 or_replace: false,
1687 temporary: false,
1688 unbounded: true,
1689 options: vec![
1690 (
1691 "format.compression".into(),
1692 Value::SingleQuotedString("zstd".into()),
1693 ),
1694 (
1695 "format.delimiter".into(),
1696 Value::SingleQuotedString("*".into()),
1697 ),
1698 (
1699 "ROW_GROUP_SIZE".into(),
1700 Value::SingleQuotedString("1024".into()),
1701 ),
1702 ("TRUNCATE".into(), Value::SingleQuotedString("NO".into())),
1703 (
1704 "format.has_header".into(),
1705 Value::SingleQuotedString("true".into()),
1706 ),
1707 ],
1708 constraints: vec![],
1709 });
1710 expect_parse_ok(sql, expected)?;
1711
1712 let sql = "
1714 CREATE OR REPLACE UNBOUNDED EXTERNAL TABLE t (c1 int, c2 float)
1715 STORED AS PARQUET
1716 WITH ORDER (c1 - c2 ASC)
1717 PARTITIONED BY (c1)
1718 LOCATION 'foo.parquet'
1719 OPTIONS ('format.compression' 'zstd',
1720 'format.delimiter' '*',
1721 'ROW_GROUP_SIZE' '1024',
1722 'TRUNCATE' 'NO',
1723 'format.has_header' 'true')";
1724 let expected = Statement::CreateExternalTable(CreateExternalTable {
1725 name: name.clone(),
1726 columns: vec![
1727 make_column_def("c1", DataType::Int(None)),
1728 make_column_def("c2", DataType::Float(ExactNumberInfo::None)),
1729 ],
1730 file_type: "PARQUET".to_string(),
1731 location: "foo.parquet".into(),
1732 table_partition_cols: vec!["c1".into()],
1733 order_exprs: vec![vec![OrderByExpr {
1734 expr: Expr::BinaryOp {
1735 left: Box::new(Identifier(Ident {
1736 value: "c1".to_owned(),
1737 quote_style: None,
1738 span: Span::empty(),
1739 })),
1740 op: BinaryOperator::Minus,
1741 right: Box::new(Identifier(Ident {
1742 value: "c2".to_owned(),
1743 quote_style: None,
1744 span: Span::empty(),
1745 })),
1746 },
1747 options: OrderByOptions {
1748 asc: Some(true),
1749 nulls_first: None,
1750 },
1751 with_fill: None,
1752 }]],
1753 if_not_exists: false,
1754 or_replace: true,
1755 temporary: false,
1756 unbounded: true,
1757 options: vec![
1758 (
1759 "format.compression".into(),
1760 Value::SingleQuotedString("zstd".into()),
1761 ),
1762 (
1763 "format.delimiter".into(),
1764 Value::SingleQuotedString("*".into()),
1765 ),
1766 (
1767 "ROW_GROUP_SIZE".into(),
1768 Value::SingleQuotedString("1024".into()),
1769 ),
1770 ("TRUNCATE".into(), Value::SingleQuotedString("NO".into())),
1771 (
1772 "format.has_header".into(),
1773 Value::SingleQuotedString("true".into()),
1774 ),
1775 ],
1776 constraints: vec![],
1777 });
1778 expect_parse_ok(sql, expected)?;
1779
1780 Ok(())
1783 }
1784
1785 #[test]
1786 fn copy_to_table_to_table() -> Result<(), DataFusionError> {
1787 let sql = "COPY foo TO bar STORED AS CSV";
1789 let expected = Statement::CopyTo(CopyToStatement {
1790 source: object_name("foo"),
1791 target: "bar".to_string(),
1792 partitioned_by: vec![],
1793 stored_as: Some("CSV".to_owned()),
1794 options: vec![],
1795 });
1796
1797 assert_eq!(verified_stmt(sql), expected);
1798 Ok(())
1799 }
1800
1801 #[test]
1802 fn skip_copy_into_snowflake() -> Result<(), DataFusionError> {
1803 let sql = "COPY INTO foo FROM @~/staged FILE_FORMAT = (FORMAT_NAME = 'mycsv');";
1804 let dialect = Box::new(SnowflakeDialect);
1805 let statements = DFParser::parse_sql_with_dialect(sql, dialect.as_ref())?;
1806
1807 assert_eq!(
1808 statements.len(),
1809 1,
1810 "Expected to parse exactly one statement"
1811 );
1812 if let Statement::CopyTo(_) = &statements[0] {
1813 panic!("Expected non COPY TO statement, but was successful: {statements:?}");
1814 }
1815 Ok(())
1816 }
1817
1818 #[test]
1819 fn explain_copy_to_table_to_table() -> Result<(), DataFusionError> {
1820 let cases = vec![
1821 ("EXPLAIN COPY foo TO bar STORED AS PARQUET", false, false),
1822 (
1823 "EXPLAIN ANALYZE COPY foo TO bar STORED AS PARQUET",
1824 true,
1825 false,
1826 ),
1827 (
1828 "EXPLAIN VERBOSE COPY foo TO bar STORED AS PARQUET",
1829 false,
1830 true,
1831 ),
1832 (
1833 "EXPLAIN ANALYZE VERBOSE COPY foo TO bar STORED AS PARQUET",
1834 true,
1835 true,
1836 ),
1837 ];
1838 for (sql, analyze, verbose) in cases {
1839 println!("sql: {sql}, analyze: {analyze}, verbose: {verbose}");
1840
1841 let expected_copy = Statement::CopyTo(CopyToStatement {
1842 source: object_name("foo"),
1843 target: "bar".to_string(),
1844 partitioned_by: vec![],
1845 stored_as: Some("PARQUET".to_owned()),
1846 options: vec![],
1847 });
1848 let expected = Statement::Explain(ExplainStatement {
1849 analyze,
1850 verbose,
1851 format: None,
1852 statement: Box::new(expected_copy),
1853 });
1854 assert_eq!(verified_stmt(sql), expected);
1855 }
1856 Ok(())
1857 }
1858
1859 #[test]
1860 fn copy_to_query_to_table() -> Result<(), DataFusionError> {
1861 let statement = verified_stmt("SELECT 1");
1862
1863 let statement = if let Statement::Statement(statement) = statement {
1865 *statement
1866 } else {
1867 panic!("Expected statement, got {statement:?}");
1868 };
1869
1870 let query = if let SQLStatement::Query(query) = statement {
1871 query
1872 } else {
1873 panic!("Expected query, got {statement:?}");
1874 };
1875
1876 let sql =
1877 "COPY (SELECT 1) TO bar STORED AS CSV OPTIONS ('format.has_header' 'true')";
1878 let expected = Statement::CopyTo(CopyToStatement {
1879 source: CopyToSource::Query(query),
1880 target: "bar".to_string(),
1881 partitioned_by: vec![],
1882 stored_as: Some("CSV".to_owned()),
1883 options: vec![(
1884 "format.has_header".into(),
1885 Value::SingleQuotedString("true".into()),
1886 )],
1887 });
1888 assert_eq!(verified_stmt(sql), expected);
1889 Ok(())
1890 }
1891
1892 #[test]
1893 fn copy_to_options() -> Result<(), DataFusionError> {
1894 let sql = "COPY foo TO bar STORED AS CSV OPTIONS ('row_group_size' '55')";
1895 let expected = Statement::CopyTo(CopyToStatement {
1896 source: object_name("foo"),
1897 target: "bar".to_string(),
1898 partitioned_by: vec![],
1899 stored_as: Some("CSV".to_owned()),
1900 options: vec![(
1901 "row_group_size".to_string(),
1902 Value::SingleQuotedString("55".to_string()),
1903 )],
1904 });
1905 assert_eq!(verified_stmt(sql), expected);
1906 Ok(())
1907 }
1908
1909 #[test]
1910 fn copy_to_partitioned_by() -> Result<(), DataFusionError> {
1911 let sql = "COPY foo TO bar STORED AS CSV PARTITIONED BY (a) OPTIONS ('row_group_size' '55')";
1912 let expected = Statement::CopyTo(CopyToStatement {
1913 source: object_name("foo"),
1914 target: "bar".to_string(),
1915 partitioned_by: vec!["a".to_string()],
1916 stored_as: Some("CSV".to_owned()),
1917 options: vec![(
1918 "row_group_size".to_string(),
1919 Value::SingleQuotedString("55".to_string()),
1920 )],
1921 });
1922 assert_eq!(verified_stmt(sql), expected);
1923 Ok(())
1924 }
1925
1926 #[test]
1927 fn copy_to_multi_options() -> Result<(), DataFusionError> {
1928 let sql = "COPY foo TO bar STORED AS parquet OPTIONS ('format.row_group_size' 55, 'format.compression' snappy, 'execution.keep_partition_by_columns' true)";
1930
1931 let expected_options = vec![
1932 (
1933 "format.row_group_size".to_string(),
1934 Value::Number("55".to_string(), false),
1935 ),
1936 (
1937 "format.compression".to_string(),
1938 Value::SingleQuotedString("snappy".to_string()),
1939 ),
1940 (
1941 "execution.keep_partition_by_columns".to_string(),
1942 Value::SingleQuotedString("true".to_string()),
1943 ),
1944 ];
1945
1946 let mut statements = DFParser::parse_sql(sql).unwrap();
1947 assert_eq!(statements.len(), 1);
1948 let only_statement = statements.pop_front().unwrap();
1949
1950 let options = if let Statement::CopyTo(copy_to) = only_statement {
1951 copy_to.options
1952 } else {
1953 panic!("Expected copy");
1954 };
1955
1956 assert_eq!(options, expected_options);
1957
1958 Ok(())
1959 }
1960
1961 fn object_name(name: &str) -> CopyToSource {
1964 CopyToSource::Relation(ObjectName::from(vec![Ident::new(name)]))
1965 }
1966
1967 fn one_statement_parses_to(sql: &str, canonical: &str) -> Statement {
1981 let mut statements = DFParser::parse_sql(sql).unwrap();
1982 assert_eq!(statements.len(), 1);
1983
1984 if sql != canonical {
1985 assert_eq!(DFParser::parse_sql(canonical).unwrap(), statements);
1986 }
1987
1988 let only_statement = statements.pop_front().unwrap();
1989 assert_eq!(
1990 canonical.to_uppercase(),
1991 only_statement.to_string().to_uppercase()
1992 );
1993 only_statement
1994 }
1995
1996 fn verified_stmt(sql: &str) -> Statement {
2000 one_statement_parses_to(sql, sql)
2001 }
2002
2003 #[test]
2004 fn test_recursion_limit() {
2007 let sql = "SELECT 1 OR 2";
2008
2009 DFParserBuilder::new(sql)
2011 .build()
2012 .unwrap()
2013 .parse_statements()
2014 .unwrap();
2015
2016 let err = DFParserBuilder::new(sql)
2017 .with_recursion_limit(1)
2018 .build()
2019 .unwrap()
2020 .parse_statements()
2021 .unwrap_err();
2022
2023 assert_contains!(
2024 err.to_string(),
2025 "SQL error: RecursionLimitExceeded (current limit: 1)"
2026 );
2027 }
2028
2029 fn expect_parse_expr_ok(sql: &str, expected: ExprWithAlias) {
2030 let expr = DFParser::parse_sql_into_expr(sql).unwrap();
2031 assert_eq!(expr, expected, "actual:\n{expr:#?}");
2032 }
2033
2034 fn expect_parse_expr_error(sql: &str, expected_error: &str) {
2036 match DFParser::parse_sql_into_expr(sql) {
2037 Ok(expr) => {
2038 panic!("Expected parse error for '{sql}', but was successful: {expr:#?}");
2039 }
2040 Err(e) => {
2041 let error_message = e.to_string();
2042 assert!(
2043 error_message.contains(expected_error),
2044 "Expected error '{expected_error}' not found in actual error '{error_message}'"
2045 );
2046 }
2047 }
2048 }
2049
2050 #[test]
2051 fn literal() {
2052 expect_parse_expr_ok(
2053 "1234",
2054 ExprWithAlias {
2055 expr: Expr::Value(ValueWithSpan::from(Value::Number(
2056 "1234".to_string(),
2057 false,
2058 ))),
2059 alias: None,
2060 },
2061 )
2062 }
2063
2064 #[test]
2065 fn literal_with_alias() {
2066 expect_parse_expr_ok(
2067 "1234 as foo",
2068 ExprWithAlias {
2069 expr: Expr::Value(ValueWithSpan::from(Value::Number(
2070 "1234".to_string(),
2071 false,
2072 ))),
2073 alias: Some(Ident::from("foo")),
2074 },
2075 )
2076 }
2077
2078 #[test]
2079 fn literal_with_alias_and_trailing_tokens() {
2080 expect_parse_expr_error(
2081 "1234 as foo.bar",
2082 "Expected: end of expression, found: .",
2083 )
2084 }
2085
2086 #[test]
2087 fn literal_with_alias_and_trailing_whitespace() {
2088 expect_parse_expr_ok(
2089 "1234 as foo ",
2090 ExprWithAlias {
2091 expr: Expr::Value(ValueWithSpan::from(Value::Number(
2092 "1234".to_string(),
2093 false,
2094 ))),
2095 alias: Some(Ident::from("foo")),
2096 },
2097 )
2098 }
2099
2100 #[test]
2101 fn literal_with_alias_and_trailing_whitespace_and_token() {
2102 expect_parse_expr_error(
2103 "1234 as foo bar",
2104 "Expected: end of expression, found: bar",
2105 )
2106 }
2107}