1use datafusion_common::config::SqlParserOptions;
24use datafusion_common::DataFusionError;
25use datafusion_common::{sql_err, Diagnostic, Span};
26use sqlparser::ast::{ExprWithAlias, OrderByOptions};
27use sqlparser::tokenizer::TokenWithSpan;
28use sqlparser::{
29 ast::{
30 ColumnDef, ColumnOptionDef, ObjectName, OrderByExpr, Query,
31 Statement as SQLStatement, TableConstraint, Value,
32 },
33 dialect::{keywords::Keyword, Dialect, GenericDialect},
34 parser::{Parser, ParserError},
35 tokenizer::{Token, Tokenizer, Word},
36};
37use std::collections::VecDeque;
38use std::fmt;
39
40macro_rules! parser_err {
42 ($MSG:expr $(; diagnostic = $DIAG:expr)?) => {{
43
44 let err = DataFusionError::from(ParserError::ParserError($MSG.to_string()));
45 $(
46 let err = err.with_diagnostic($DIAG);
47 )?
48 Err(err)
49 }};
50}
51
52fn parse_file_type(s: &str) -> Result<String, DataFusionError> {
53 Ok(s.to_uppercase())
54}
55
56#[derive(Debug, Clone, PartialEq, Eq)]
63pub struct ExplainStatement {
64 pub analyze: bool,
66 pub verbose: bool,
68 pub format: Option<String>,
70 pub statement: Box<Statement>,
74}
75
76impl fmt::Display for ExplainStatement {
77 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
78 let Self {
79 analyze,
80 verbose,
81 format,
82 statement,
83 } = self;
84
85 write!(f, "EXPLAIN ")?;
86 if *analyze {
87 write!(f, "ANALYZE ")?;
88 }
89 if *verbose {
90 write!(f, "VERBOSE ")?;
91 }
92 if let Some(format) = format.as_ref() {
93 write!(f, "FORMAT {format} ")?;
94 }
95
96 write!(f, "{statement}")
97 }
98}
99
100#[derive(Debug, Clone, PartialEq, Eq)]
124pub struct CopyToStatement {
125 pub source: CopyToSource,
127 pub target: String,
129 pub partitioned_by: Vec<String>,
131 pub stored_as: Option<String>,
133 pub options: Vec<(String, Value)>,
135}
136
137impl fmt::Display for CopyToStatement {
138 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
139 let Self {
140 source,
141 target,
142 partitioned_by,
143 stored_as,
144 options,
145 ..
146 } = self;
147
148 write!(f, "COPY {source} TO {target}")?;
149 if let Some(file_type) = stored_as {
150 write!(f, " STORED AS {file_type}")?;
151 }
152 if !partitioned_by.is_empty() {
153 write!(f, " PARTITIONED BY ({})", partitioned_by.join(", "))?;
154 }
155
156 if !options.is_empty() {
157 let opts: Vec<_> =
158 options.iter().map(|(k, v)| format!("'{k}' {v}")).collect();
159 write!(f, " OPTIONS ({})", opts.join(", "))?;
160 }
161
162 Ok(())
163 }
164}
165
166#[derive(Debug, Clone, PartialEq, Eq)]
167pub enum CopyToSource {
168 Relation(ObjectName),
170 Query(Box<Query>),
172}
173
174impl fmt::Display for CopyToSource {
175 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
176 match self {
177 CopyToSource::Relation(r) => write!(f, "{r}"),
178 CopyToSource::Query(q) => write!(f, "({q})"),
179 }
180 }
181}
182
183pub(crate) type LexOrdering = Vec<OrderByExpr>;
185
186#[derive(Debug, Clone, PartialEq, Eq)]
209pub struct CreateExternalTable {
210 pub name: ObjectName,
212 pub columns: Vec<ColumnDef>,
214 pub file_type: String,
216 pub location: String,
218 pub table_partition_cols: Vec<String>,
220 pub order_exprs: Vec<LexOrdering>,
222 pub if_not_exists: bool,
224 pub temporary: bool,
226 pub unbounded: bool,
228 pub options: Vec<(String, Value)>,
230 pub constraints: Vec<TableConstraint>,
232}
233
234impl fmt::Display for CreateExternalTable {
235 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
236 write!(f, "CREATE EXTERNAL TABLE ")?;
237 if self.if_not_exists {
238 write!(f, "IF NOT EXISTS ")?;
239 }
240 write!(f, "{} ", self.name)?;
241 write!(f, "STORED AS {} ", self.file_type)?;
242 write!(f, "LOCATION {} ", self.location)
243 }
244}
245
246#[derive(Debug, Clone, PartialEq, Eq)]
254pub enum Statement {
255 Statement(Box<SQLStatement>),
257 CreateExternalTable(CreateExternalTable),
259 CopyTo(CopyToStatement),
261 Explain(ExplainStatement),
263}
264
265impl fmt::Display for Statement {
266 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
267 match self {
268 Statement::Statement(stmt) => write!(f, "{stmt}"),
269 Statement::CreateExternalTable(stmt) => write!(f, "{stmt}"),
270 Statement::CopyTo(stmt) => write!(f, "{stmt}"),
271 Statement::Explain(stmt) => write!(f, "{stmt}"),
272 }
273 }
274}
275
276fn ensure_not_set<T>(field: &Option<T>, name: &str) -> Result<(), DataFusionError> {
277 if field.is_some() {
278 parser_err!(format!("{name} specified more than once",))?
279 }
280 Ok(())
281}
282
283pub struct DFParser<'a> {
292 pub parser: Parser<'a>,
293 options: SqlParserOptions,
294}
295
296const DEFAULT_RECURSION_LIMIT: usize = 50;
298const DEFAULT_DIALECT: GenericDialect = GenericDialect {};
299
300pub struct DFParserBuilder<'a> {
334 sql: &'a str,
336 dialect: &'a dyn Dialect,
338 recursion_limit: usize,
340}
341
342impl<'a> DFParserBuilder<'a> {
343 pub fn new(sql: &'a str) -> Self {
346 Self {
347 sql,
348 dialect: &DEFAULT_DIALECT,
349 recursion_limit: DEFAULT_RECURSION_LIMIT,
350 }
351 }
352
353 pub fn with_dialect(mut self, dialect: &'a dyn Dialect) -> Self {
355 self.dialect = dialect;
356 self
357 }
358
359 pub fn with_recursion_limit(mut self, recursion_limit: usize) -> Self {
361 self.recursion_limit = recursion_limit;
362 self
363 }
364
365 pub fn build(self) -> Result<DFParser<'a>, DataFusionError> {
366 let mut tokenizer = Tokenizer::new(self.dialect, self.sql);
367 let tokens = tokenizer
369 .tokenize_with_location()
370 .map_err(ParserError::from)?;
371
372 Ok(DFParser {
373 parser: Parser::new(self.dialect)
374 .with_tokens_with_locations(tokens)
375 .with_recursion_limit(self.recursion_limit),
376 options: SqlParserOptions {
377 recursion_limit: self.recursion_limit,
378 ..Default::default()
379 },
380 })
381 }
382}
383
384impl<'a> DFParser<'a> {
385 #[deprecated(since = "46.0.0", note = "DFParserBuilder")]
386 pub fn new(sql: &'a str) -> Result<Self, DataFusionError> {
387 DFParserBuilder::new(sql).build()
388 }
389
390 #[deprecated(since = "46.0.0", note = "DFParserBuilder")]
391 pub fn new_with_dialect(
392 sql: &'a str,
393 dialect: &'a dyn Dialect,
394 ) -> Result<Self, DataFusionError> {
395 DFParserBuilder::new(sql).with_dialect(dialect).build()
396 }
397
398 pub fn parse_sql(sql: &'a str) -> Result<VecDeque<Statement>, DataFusionError> {
401 let mut parser = DFParserBuilder::new(sql).build()?;
402
403 parser.parse_statements()
404 }
405
406 pub fn parse_sql_with_dialect(
409 sql: &str,
410 dialect: &dyn Dialect,
411 ) -> Result<VecDeque<Statement>, DataFusionError> {
412 let mut parser = DFParserBuilder::new(sql).with_dialect(dialect).build()?;
413 parser.parse_statements()
414 }
415
416 pub fn parse_sql_into_expr(sql: &str) -> Result<ExprWithAlias, DataFusionError> {
417 DFParserBuilder::new(sql).build()?.parse_into_expr()
418 }
419
420 pub fn parse_sql_into_expr_with_dialect(
421 sql: &str,
422 dialect: &dyn Dialect,
423 ) -> Result<ExprWithAlias, DataFusionError> {
424 DFParserBuilder::new(sql)
425 .with_dialect(dialect)
426 .build()?
427 .parse_into_expr()
428 }
429
430 pub fn parse_statements(&mut self) -> Result<VecDeque<Statement>, DataFusionError> {
432 let mut stmts = VecDeque::new();
433 let mut expecting_statement_delimiter = false;
434 loop {
435 while self.parser.consume_token(&Token::SemiColon) {
437 expecting_statement_delimiter = false;
438 }
439
440 if self.parser.peek_token() == Token::EOF {
441 break;
442 }
443 if expecting_statement_delimiter {
444 return self.expected("end of statement", self.parser.peek_token());
445 }
446
447 let statement = self.parse_statement()?;
448 stmts.push_back(statement);
449 expecting_statement_delimiter = true;
450 }
451 Ok(stmts)
452 }
453
454 fn expected<T>(
456 &self,
457 expected: &str,
458 found: TokenWithSpan,
459 ) -> Result<T, DataFusionError> {
460 let sql_parser_span = found.span;
461 let span = Span::try_from_sqlparser_span(sql_parser_span);
462 let diagnostic = Diagnostic::new_error(
463 format!("Expected: {expected}, found: {found}{}", found.span.start),
464 span,
465 );
466 parser_err!(
467 format!("Expected: {expected}, found: {found}{}", found.span.start);
468 diagnostic=
469 diagnostic
470 )
471 }
472
473 fn expect_token(
474 &mut self,
475 expected: &str,
476 token: Token,
477 ) -> Result<(), DataFusionError> {
478 let next_token = self.parser.peek_token_ref();
479 if next_token.token != token {
480 self.expected(expected, next_token.clone())
481 } else {
482 Ok(())
483 }
484 }
485
486 pub fn parse_statement(&mut self) -> Result<Statement, DataFusionError> {
488 match self.parser.peek_token().token {
489 Token::Word(w) => {
490 match w.keyword {
491 Keyword::CREATE => {
492 self.parser.next_token(); self.parse_create()
494 }
495 Keyword::COPY => {
496 if let Token::Word(w) = self.parser.peek_nth_token(1).token {
497 if w.keyword == Keyword::INTO {
499 return self.parse_and_handle_statement();
500 }
501 }
502 self.parser.next_token(); self.parse_copy()
504 }
505 Keyword::EXPLAIN => {
506 self.parser.next_token(); self.parse_explain()
508 }
509 _ => {
510 self.parse_and_handle_statement()
512 }
513 }
514 }
515 _ => {
516 self.parse_and_handle_statement()
518 }
519 }
520 }
521
522 pub fn parse_expr(&mut self) -> Result<ExprWithAlias, DataFusionError> {
523 if let Token::Word(w) = self.parser.peek_token().token {
524 match w.keyword {
525 Keyword::CREATE | Keyword::COPY | Keyword::EXPLAIN => {
526 return parser_err!("Unsupported command in expression")?;
527 }
528 _ => {}
529 }
530 }
531
532 Ok(self.parser.parse_expr_with_alias()?)
533 }
534
535 pub fn parse_into_expr(&mut self) -> Result<ExprWithAlias, DataFusionError> {
540 let expr = self.parse_expr()?;
541 self.expect_token("end of expression", Token::EOF)?;
542 Ok(expr)
543 }
544
545 fn parse_and_handle_statement(&mut self) -> Result<Statement, DataFusionError> {
547 self.parser
548 .parse_statement()
549 .map(|stmt| Statement::Statement(Box::from(stmt)))
550 .map_err(|e| match e {
551 ParserError::RecursionLimitExceeded => DataFusionError::SQL(
552 Box::new(ParserError::RecursionLimitExceeded),
553 Some(format!(
554 " (current limit: {})",
555 self.options.recursion_limit
556 )),
557 ),
558 other => DataFusionError::SQL(Box::new(other), None),
559 })
560 }
561
562 pub fn parse_copy(&mut self) -> Result<Statement, DataFusionError> {
564 let source = if self.parser.consume_token(&Token::LParen) {
566 let query = self.parser.parse_query()?;
567 self.parser.expect_token(&Token::RParen)?;
568 CopyToSource::Query(query)
569 } else {
570 let table_name = self.parser.parse_object_name(true)?;
572 CopyToSource::Relation(table_name)
573 };
574
575 #[derive(Default)]
576 struct Builder {
577 stored_as: Option<String>,
578 target: Option<String>,
579 partitioned_by: Option<Vec<String>>,
580 options: Option<Vec<(String, Value)>>,
581 }
582
583 let mut builder = Builder::default();
584
585 loop {
586 if let Some(keyword) = self.parser.parse_one_of_keywords(&[
587 Keyword::STORED,
588 Keyword::TO,
589 Keyword::PARTITIONED,
590 Keyword::OPTIONS,
591 Keyword::WITH,
592 ]) {
593 match keyword {
594 Keyword::STORED => {
595 self.parser.expect_keyword(Keyword::AS)?;
596 ensure_not_set(&builder.stored_as, "STORED AS")?;
597 builder.stored_as = Some(self.parse_file_format()?);
598 }
599 Keyword::TO => {
600 ensure_not_set(&builder.target, "TO")?;
601 builder.target = Some(self.parser.parse_literal_string()?);
602 }
603 Keyword::WITH => {
604 self.parser.expect_keyword(Keyword::HEADER)?;
605 self.parser.expect_keyword(Keyword::ROW)?;
606 return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')")?;
607 }
608 Keyword::PARTITIONED => {
609 self.parser.expect_keyword(Keyword::BY)?;
610 ensure_not_set(&builder.partitioned_by, "PARTITIONED BY")?;
611 builder.partitioned_by = Some(self.parse_partitions()?);
612 }
613 Keyword::OPTIONS => {
614 ensure_not_set(&builder.options, "OPTIONS")?;
615 builder.options = Some(self.parse_value_options()?);
616 }
617 _ => {
618 unreachable!()
619 }
620 }
621 } else {
622 let token = self.parser.next_token();
623 if token == Token::EOF || token == Token::SemiColon {
624 break;
625 } else {
626 return self.expected("end of statement or ;", token)?;
627 }
628 }
629 }
630
631 let Some(target) = builder.target else {
632 return parser_err!("Missing TO clause in COPY statement")?;
633 };
634
635 Ok(Statement::CopyTo(CopyToStatement {
636 source,
637 target,
638 partitioned_by: builder.partitioned_by.unwrap_or(vec![]),
639 stored_as: builder.stored_as,
640 options: builder.options.unwrap_or(vec![]),
641 }))
642 }
643
644 pub fn parse_option_key(&mut self) -> Result<String, DataFusionError> {
651 let next_token = self.parser.next_token();
652 match next_token.token {
653 Token::Word(Word { value, .. }) => {
654 let mut parts = vec![value];
655 while self.parser.consume_token(&Token::Period) {
656 let next_token = self.parser.next_token();
657 if let Token::Word(Word { value, .. }) = next_token.token {
658 parts.push(value);
659 } else {
660 return self.expected("key name", next_token);
664 }
665 }
666 Ok(parts.join("."))
667 }
668 Token::SingleQuotedString(s) => Ok(s),
669 Token::DoubleQuotedString(s) => Ok(s),
670 Token::EscapedStringLiteral(s) => Ok(s),
671 _ => self.expected("key name", next_token),
672 }
673 }
674
675 pub fn parse_option_value(&mut self) -> Result<Value, DataFusionError> {
682 let next_token = self.parser.next_token();
683 match next_token.token {
684 Token::Word(word) => Ok(Value::SingleQuotedString(word.value)),
686 Token::SingleQuotedString(s) => Ok(Value::SingleQuotedString(s)),
687 Token::DoubleQuotedString(s) => Ok(Value::DoubleQuotedString(s)),
688 Token::EscapedStringLiteral(s) => Ok(Value::EscapedStringLiteral(s)),
689 Token::Number(n, l) => Ok(Value::Number(n, l)),
690 _ => self.expected("string or numeric value", next_token),
691 }
692 }
693
694 pub fn parse_explain(&mut self) -> Result<Statement, DataFusionError> {
696 let analyze = self.parser.parse_keyword(Keyword::ANALYZE);
697 let verbose = self.parser.parse_keyword(Keyword::VERBOSE);
698 let format = self.parse_explain_format()?;
699
700 let statement = self.parse_statement()?;
701
702 Ok(Statement::Explain(ExplainStatement {
703 statement: Box::new(statement),
704 analyze,
705 verbose,
706 format,
707 }))
708 }
709
710 pub fn parse_explain_format(&mut self) -> Result<Option<String>, DataFusionError> {
711 if !self.parser.parse_keyword(Keyword::FORMAT) {
712 return Ok(None);
713 }
714
715 let next_token = self.parser.next_token();
716 let format = match next_token.token {
717 Token::Word(w) => Ok(w.value),
718 Token::SingleQuotedString(w) => Ok(w),
719 Token::DoubleQuotedString(w) => Ok(w),
720 _ => self.expected("an explain format such as TREE", next_token),
721 }?;
722 Ok(Some(format))
723 }
724
725 pub fn parse_create(&mut self) -> Result<Statement, DataFusionError> {
727 if self.parser.parse_keyword(Keyword::EXTERNAL) {
728 self.parse_create_external_table(false)
729 } else if self.parser.parse_keyword(Keyword::UNBOUNDED) {
730 self.parser.expect_keyword(Keyword::EXTERNAL)?;
731 self.parse_create_external_table(true)
732 } else {
733 Ok(Statement::Statement(Box::from(self.parser.parse_create()?)))
734 }
735 }
736
737 fn parse_partitions(&mut self) -> Result<Vec<String>, DataFusionError> {
738 let mut partitions: Vec<String> = vec![];
739 if !self.parser.consume_token(&Token::LParen)
740 || self.parser.consume_token(&Token::RParen)
741 {
742 return Ok(partitions);
743 }
744
745 loop {
746 if let Token::Word(_) = self.parser.peek_token().token {
747 let identifier = self.parser.parse_identifier()?;
748 partitions.push(identifier.to_string());
749 } else {
750 return self.expected("partition name", self.parser.peek_token());
751 }
752 let comma = self.parser.consume_token(&Token::Comma);
753 if self.parser.consume_token(&Token::RParen) {
754 break;
756 } else if !comma {
757 return self.expected(
758 "',' or ')' after partition definition",
759 self.parser.peek_token(),
760 );
761 }
762 }
763 Ok(partitions)
764 }
765
766 pub fn parse_order_by_exprs(&mut self) -> Result<Vec<OrderByExpr>, DataFusionError> {
768 let mut values = vec![];
769 self.parser.expect_token(&Token::LParen)?;
770 loop {
771 values.push(self.parse_order_by_expr()?);
772 if !self.parser.consume_token(&Token::Comma) {
773 self.parser.expect_token(&Token::RParen)?;
774 return Ok(values);
775 }
776 }
777 }
778
779 pub fn parse_order_by_expr(&mut self) -> Result<OrderByExpr, DataFusionError> {
781 let expr = self.parser.parse_expr()?;
782
783 let asc = if self.parser.parse_keyword(Keyword::ASC) {
784 Some(true)
785 } else if self.parser.parse_keyword(Keyword::DESC) {
786 Some(false)
787 } else {
788 None
789 };
790
791 let nulls_first = if self
792 .parser
793 .parse_keywords(&[Keyword::NULLS, Keyword::FIRST])
794 {
795 Some(true)
796 } else if self.parser.parse_keywords(&[Keyword::NULLS, Keyword::LAST]) {
797 Some(false)
798 } else {
799 None
800 };
801
802 Ok(OrderByExpr {
803 expr,
804 options: OrderByOptions { asc, nulls_first },
805 with_fill: None,
806 })
807 }
808
809 fn parse_columns(
811 &mut self,
812 ) -> Result<(Vec<ColumnDef>, Vec<TableConstraint>), DataFusionError> {
813 let mut columns = vec![];
814 let mut constraints = vec![];
815 if !self.parser.consume_token(&Token::LParen)
816 || self.parser.consume_token(&Token::RParen)
817 {
818 return Ok((columns, constraints));
819 }
820
821 loop {
822 if let Some(constraint) = self.parser.parse_optional_table_constraint()? {
823 constraints.push(constraint);
824 } else if let Token::Word(_) = self.parser.peek_token().token {
825 let column_def = self.parse_column_def()?;
826 columns.push(column_def);
827 } else {
828 return self.expected(
829 "column name or constraint definition",
830 self.parser.peek_token(),
831 );
832 }
833 let comma = self.parser.consume_token(&Token::Comma);
834 if self.parser.consume_token(&Token::RParen) {
835 break;
837 } else if !comma {
838 return self.expected(
839 "',' or ')' after column definition",
840 self.parser.peek_token(),
841 );
842 }
843 }
844
845 Ok((columns, constraints))
846 }
847
848 fn parse_column_def(&mut self) -> Result<ColumnDef, DataFusionError> {
849 let name = self.parser.parse_identifier()?;
850 let data_type = self.parser.parse_data_type()?;
851 let mut options = vec![];
852 loop {
853 if self.parser.parse_keyword(Keyword::CONSTRAINT) {
854 let name = Some(self.parser.parse_identifier()?);
855 if let Some(option) = self.parser.parse_optional_column_option()? {
856 options.push(ColumnOptionDef { name, option });
857 } else {
858 return self.expected(
859 "constraint details after CONSTRAINT <name>",
860 self.parser.peek_token(),
861 );
862 }
863 } else if let Some(option) = self.parser.parse_optional_column_option()? {
864 options.push(ColumnOptionDef { name: None, option });
865 } else {
866 break;
867 };
868 }
869 Ok(ColumnDef {
870 name,
871 data_type,
872 options,
873 })
874 }
875
876 fn parse_create_external_table(
877 &mut self,
878 unbounded: bool,
879 ) -> Result<Statement, DataFusionError> {
880 let temporary = self
881 .parser
882 .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY])
883 .is_some();
884 self.parser.expect_keyword(Keyword::TABLE)?;
885 let if_not_exists =
886 self.parser
887 .parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]);
888 let table_name = self.parser.parse_object_name(true)?;
889 let (mut columns, constraints) = self.parse_columns()?;
890
891 #[derive(Default)]
892 struct Builder {
893 file_type: Option<String>,
894 location: Option<String>,
895 table_partition_cols: Option<Vec<String>>,
896 order_exprs: Vec<LexOrdering>,
897 options: Option<Vec<(String, Value)>>,
898 }
899 let mut builder = Builder::default();
900
901 loop {
902 if let Some(keyword) = self.parser.parse_one_of_keywords(&[
903 Keyword::STORED,
904 Keyword::LOCATION,
905 Keyword::WITH,
906 Keyword::DELIMITER,
907 Keyword::COMPRESSION,
908 Keyword::PARTITIONED,
909 Keyword::OPTIONS,
910 ]) {
911 match keyword {
912 Keyword::STORED => {
913 self.parser.expect_keyword(Keyword::AS)?;
914 ensure_not_set(&builder.file_type, "STORED AS")?;
915 builder.file_type = Some(self.parse_file_format()?);
916 }
917 Keyword::LOCATION => {
918 ensure_not_set(&builder.location, "LOCATION")?;
919 builder.location = Some(self.parser.parse_literal_string()?);
920 }
921 Keyword::WITH => {
922 if self.parser.parse_keyword(Keyword::ORDER) {
923 builder.order_exprs.push(self.parse_order_by_exprs()?);
924 } else {
925 self.parser.expect_keyword(Keyword::HEADER)?;
926 self.parser.expect_keyword(Keyword::ROW)?;
927 return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS (format.has_header true)")?;
928 }
929 }
930 Keyword::DELIMITER => {
931 return parser_err!("DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS (format.delimiter ',')")?;
932 }
933 Keyword::COMPRESSION => {
934 self.parser.expect_keyword(Keyword::TYPE)?;
935 return parser_err!("COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS (format.compression gzip)")?;
936 }
937 Keyword::PARTITIONED => {
938 self.parser.expect_keyword(Keyword::BY)?;
939 ensure_not_set(&builder.table_partition_cols, "PARTITIONED BY")?;
940 let peeked = self.parser.peek_nth_token(2);
945 if peeked == Token::Comma || peeked == Token::RParen {
946 builder.table_partition_cols = Some(self.parse_partitions()?)
948 } else {
949 let (cols, cons) = self.parse_columns()?;
951 builder.table_partition_cols = Some(
952 cols.iter().map(|col| col.name.to_string()).collect(),
953 );
954
955 columns.extend(cols);
956
957 if !cons.is_empty() {
958 return sql_err!(ParserError::ParserError(
959 "Constraints on Partition Columns are not supported"
960 .to_string(),
961 ));
962 }
963 }
964 }
965 Keyword::OPTIONS => {
966 ensure_not_set(&builder.options, "OPTIONS")?;
967 builder.options = Some(self.parse_value_options()?);
968 }
969 _ => {
970 unreachable!()
971 }
972 }
973 } else {
974 let token = self.parser.next_token();
975 if token == Token::EOF || token == Token::SemiColon {
976 break;
977 } else {
978 return self.expected("end of statement or ;", token)?;
979 }
980 }
981 }
982
983 if builder.file_type.is_none() {
985 return sql_err!(ParserError::ParserError(
986 "Missing STORED AS clause in CREATE EXTERNAL TABLE statement".into(),
987 ));
988 }
989 if builder.location.is_none() {
990 return sql_err!(ParserError::ParserError(
991 "Missing LOCATION clause in CREATE EXTERNAL TABLE statement".into(),
992 ));
993 }
994
995 let create = CreateExternalTable {
996 name: table_name,
997 columns,
998 file_type: builder.file_type.unwrap(),
999 location: builder.location.unwrap(),
1000 table_partition_cols: builder.table_partition_cols.unwrap_or(vec![]),
1001 order_exprs: builder.order_exprs,
1002 if_not_exists,
1003 temporary,
1004 unbounded,
1005 options: builder.options.unwrap_or(Vec::new()),
1006 constraints,
1007 };
1008 Ok(Statement::CreateExternalTable(create))
1009 }
1010
1011 fn parse_file_format(&mut self) -> Result<String, DataFusionError> {
1013 let token = self.parser.next_token();
1014 match &token.token {
1015 Token::Word(w) => parse_file_type(&w.value),
1016 _ => self.expected("one of ARROW, PARQUET, NDJSON, or CSV", token),
1017 }
1018 }
1019
1020 fn parse_value_options(&mut self) -> Result<Vec<(String, Value)>, DataFusionError> {
1025 let mut options = vec![];
1026 self.parser.expect_token(&Token::LParen)?;
1027
1028 loop {
1029 let key = self.parse_option_key()?;
1030 let value = self.parse_option_value()?;
1031 options.push((key, value));
1032 let comma = self.parser.consume_token(&Token::Comma);
1033 if self.parser.consume_token(&Token::RParen) {
1034 break;
1036 } else if !comma {
1037 return self.expected(
1038 "',' or ')' after option definition",
1039 self.parser.peek_token(),
1040 );
1041 }
1042 }
1043 Ok(options)
1044 }
1045}
1046
1047#[cfg(test)]
1048mod tests {
1049 use super::*;
1050 use datafusion_common::assert_contains;
1051 use sqlparser::ast::Expr::Identifier;
1052 use sqlparser::ast::{BinaryOperator, DataType, Expr, Ident, ValueWithSpan};
1053 use sqlparser::dialect::SnowflakeDialect;
1054 use sqlparser::tokenizer::Span;
1055
1056 fn expect_parse_ok(sql: &str, expected: Statement) -> Result<(), DataFusionError> {
1057 let statements = DFParser::parse_sql(sql)?;
1058 assert_eq!(
1059 statements.len(),
1060 1,
1061 "Expected to parse exactly one statement"
1062 );
1063 assert_eq!(statements[0], expected, "actual:\n{:#?}", statements[0]);
1064 Ok(())
1065 }
1066
1067 fn expect_parse_error(sql: &str, expected_error: &str) {
1069 match DFParser::parse_sql(sql) {
1070 Ok(statements) => {
1071 panic!(
1072 "Expected parse error for '{sql}', but was successful: {statements:?}"
1073 );
1074 }
1075 Err(e) => {
1076 let error_message = e.to_string();
1077 assert!(
1078 error_message.contains(expected_error),
1079 "Expected error '{expected_error}' not found in actual error '{error_message}'"
1080 );
1081 }
1082 }
1083 }
1084
1085 fn make_column_def(name: impl Into<String>, data_type: DataType) -> ColumnDef {
1086 ColumnDef {
1087 name: Ident {
1088 value: name.into(),
1089 quote_style: None,
1090 span: Span::empty(),
1091 },
1092 data_type,
1093 options: vec![],
1094 }
1095 }
1096
1097 #[test]
1098 fn create_external_table() -> Result<(), DataFusionError> {
1099 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'";
1101 let display = None;
1102 let name = ObjectName::from(vec![Ident::from("t")]);
1103 let expected = Statement::CreateExternalTable(CreateExternalTable {
1104 name: name.clone(),
1105 columns: vec![make_column_def("c1", DataType::Int(display))],
1106 file_type: "CSV".to_string(),
1107 location: "foo.csv".into(),
1108 table_partition_cols: vec![],
1109 order_exprs: vec![],
1110 if_not_exists: false,
1111 temporary: false,
1112 unbounded: false,
1113 options: vec![],
1114 constraints: vec![],
1115 });
1116 expect_parse_ok(sql, expected)?;
1117
1118 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' ";
1120 let expected = Statement::CreateExternalTable(CreateExternalTable {
1121 name: name.clone(),
1122 columns: vec![make_column_def("c1", DataType::Int(None))],
1123 file_type: "CSV".to_string(),
1124 location: "foo.csv".into(),
1125 table_partition_cols: vec![],
1126 order_exprs: vec![],
1127 if_not_exists: false,
1128 temporary: false,
1129 unbounded: false,
1130 options: vec![],
1131 constraints: vec![],
1132 });
1133 expect_parse_ok(sql, expected)?;
1134
1135 let sql =
1137 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' ;";
1138 let expected = Statement::CreateExternalTable(CreateExternalTable {
1139 name: name.clone(),
1140 columns: vec![make_column_def("c1", DataType::Int(None))],
1141 file_type: "CSV".to_string(),
1142 location: "foo.csv".into(),
1143 table_partition_cols: vec![],
1144 order_exprs: vec![],
1145 if_not_exists: false,
1146 temporary: false,
1147 unbounded: false,
1148 options: vec![],
1149 constraints: vec![],
1150 });
1151 expect_parse_ok(sql, expected)?;
1152
1153 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS (format.delimiter '|')";
1155 let display = None;
1156 let expected = Statement::CreateExternalTable(CreateExternalTable {
1157 name: name.clone(),
1158 columns: vec![make_column_def("c1", DataType::Int(display))],
1159 file_type: "CSV".to_string(),
1160 location: "foo.csv".into(),
1161 table_partition_cols: vec![],
1162 order_exprs: vec![],
1163 if_not_exists: false,
1164 temporary: false,
1165 unbounded: false,
1166 options: vec![(
1167 "format.delimiter".into(),
1168 Value::SingleQuotedString("|".into()),
1169 )],
1170 constraints: vec![],
1171 });
1172 expect_parse_ok(sql, expected)?;
1173
1174 let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1, p2) LOCATION 'foo.csv'";
1176 let display = None;
1177 let expected = Statement::CreateExternalTable(CreateExternalTable {
1178 name: name.clone(),
1179 columns: vec![make_column_def("c1", DataType::Int(display))],
1180 file_type: "CSV".to_string(),
1181 location: "foo.csv".into(),
1182 table_partition_cols: vec!["p1".to_string(), "p2".to_string()],
1183 order_exprs: vec![],
1184 if_not_exists: false,
1185 temporary: false,
1186 unbounded: false,
1187 options: vec![],
1188 constraints: vec![],
1189 });
1190 expect_parse_ok(sql, expected)?;
1191
1192 let sqls =
1194 vec![
1195 ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1196 ('format.compression' 'GZIP')", "GZIP"),
1197 ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1198 ('format.compression' 'BZIP2')", "BZIP2"),
1199 ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1200 ('format.compression' 'XZ')", "XZ"),
1201 ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1202 ('format.compression' 'ZSTD')", "ZSTD"),
1203 ];
1204 for (sql, compression) in sqls {
1205 let expected = Statement::CreateExternalTable(CreateExternalTable {
1206 name: name.clone(),
1207 columns: vec![make_column_def("c1", DataType::Int(display))],
1208 file_type: "CSV".to_string(),
1209 location: "foo.csv".into(),
1210 table_partition_cols: vec![],
1211 order_exprs: vec![],
1212 if_not_exists: false,
1213 temporary: false,
1214 unbounded: false,
1215 options: vec![(
1216 "format.compression".into(),
1217 Value::SingleQuotedString(compression.into()),
1218 )],
1219 constraints: vec![],
1220 });
1221 expect_parse_ok(sql, expected)?;
1222 }
1223
1224 let sql = "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'";
1226 let expected = Statement::CreateExternalTable(CreateExternalTable {
1227 name: name.clone(),
1228 columns: vec![],
1229 file_type: "PARQUET".to_string(),
1230 location: "foo.parquet".into(),
1231 table_partition_cols: vec![],
1232 order_exprs: vec![],
1233 if_not_exists: false,
1234 temporary: false,
1235 unbounded: false,
1236 options: vec![],
1237 constraints: vec![],
1238 });
1239 expect_parse_ok(sql, expected)?;
1240
1241 let sql = "CREATE EXTERNAL TABLE t STORED AS parqueT LOCATION 'foo.parquet'";
1243 let expected = Statement::CreateExternalTable(CreateExternalTable {
1244 name: name.clone(),
1245 columns: vec![],
1246 file_type: "PARQUET".to_string(),
1247 location: "foo.parquet".into(),
1248 table_partition_cols: vec![],
1249 order_exprs: vec![],
1250 if_not_exists: false,
1251 temporary: false,
1252 unbounded: false,
1253 options: vec![],
1254 constraints: vec![],
1255 });
1256 expect_parse_ok(sql, expected)?;
1257
1258 let sql = "CREATE EXTERNAL TABLE t STORED AS AVRO LOCATION 'foo.avro'";
1260 let expected = Statement::CreateExternalTable(CreateExternalTable {
1261 name: name.clone(),
1262 columns: vec![],
1263 file_type: "AVRO".to_string(),
1264 location: "foo.avro".into(),
1265 table_partition_cols: vec![],
1266 order_exprs: vec![],
1267 if_not_exists: false,
1268 temporary: false,
1269 unbounded: false,
1270 options: vec![],
1271 constraints: vec![],
1272 });
1273 expect_parse_ok(sql, expected)?;
1274
1275 let sql =
1277 "CREATE EXTERNAL TABLE IF NOT EXISTS t STORED AS PARQUET LOCATION 'foo.parquet'";
1278 let expected = Statement::CreateExternalTable(CreateExternalTable {
1279 name: name.clone(),
1280 columns: vec![],
1281 file_type: "PARQUET".to_string(),
1282 location: "foo.parquet".into(),
1283 table_partition_cols: vec![],
1284 order_exprs: vec![],
1285 if_not_exists: true,
1286 temporary: false,
1287 unbounded: false,
1288 options: vec![],
1289 constraints: vec![],
1290 });
1291 expect_parse_ok(sql, expected)?;
1292
1293 let sql =
1295 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int) LOCATION 'foo.csv'";
1296 let expected = Statement::CreateExternalTable(CreateExternalTable {
1297 name: name.clone(),
1298 columns: vec![
1299 make_column_def("c1", DataType::Int(None)),
1300 make_column_def("p1", DataType::Int(None)),
1301 ],
1302 file_type: "CSV".to_string(),
1303 location: "foo.csv".into(),
1304 table_partition_cols: vec!["p1".to_string()],
1305 order_exprs: vec![],
1306 if_not_exists: false,
1307 temporary: false,
1308 unbounded: false,
1309 options: vec![],
1310 constraints: vec![],
1311 });
1312 expect_parse_ok(sql, expected)?;
1313
1314 let sql =
1316 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int, c1) LOCATION 'foo.csv'";
1317 expect_parse_error(
1318 sql,
1319 "SQL error: ParserError(\"Expected: a data type name, found: ) at Line: 1, Column: 73\")",
1320 );
1321
1322 let sql =
1324 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (c1, p1 int) LOCATION 'foo.csv'";
1325 expect_parse_error(sql, "SQL error: ParserError(\"Expected: ',' or ')' after partition definition, found: int at Line: 1, Column: 70\")");
1326
1327 let sql =
1329 "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1') LOCATION 'blahblah'";
1330 let expected = Statement::CreateExternalTable(CreateExternalTable {
1331 name: name.clone(),
1332 columns: vec![],
1333 file_type: "X".to_string(),
1334 location: "blahblah".into(),
1335 table_partition_cols: vec![],
1336 order_exprs: vec![],
1337 if_not_exists: false,
1338 temporary: false,
1339 unbounded: false,
1340 options: vec![("k1".into(), Value::SingleQuotedString("v1".into()))],
1341 constraints: vec![],
1342 });
1343 expect_parse_ok(sql, expected)?;
1344
1345 let sql =
1347 "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2) LOCATION 'blahblah'";
1348 let expected = Statement::CreateExternalTable(CreateExternalTable {
1349 name: name.clone(),
1350 columns: vec![],
1351 file_type: "X".to_string(),
1352 location: "blahblah".into(),
1353 table_partition_cols: vec![],
1354 order_exprs: vec![],
1355 if_not_exists: false,
1356 temporary: false,
1357 unbounded: false,
1358 options: vec![
1359 ("k1".into(), Value::SingleQuotedString("v1".into())),
1360 ("k2".into(), Value::SingleQuotedString("v2".into())),
1361 ],
1362 constraints: vec![],
1363 });
1364 expect_parse_ok(sql, expected)?;
1365
1366 let sqls = ["CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1) LOCATION 'foo.csv'",
1368 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS FIRST) LOCATION 'foo.csv'",
1369 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS LAST) LOCATION 'foo.csv'",
1370 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC) LOCATION 'foo.csv'",
1371 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC) LOCATION 'foo.csv'",
1372 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS FIRST) LOCATION 'foo.csv'",
1373 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS LAST) LOCATION 'foo.csv'",
1374 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS FIRST) LOCATION 'foo.csv'",
1375 "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS LAST) LOCATION 'foo.csv'"];
1376 let expected = vec![
1377 (None, None),
1378 (None, Some(true)),
1379 (None, Some(false)),
1380 (Some(true), None),
1381 (Some(false), None),
1382 (Some(false), Some(true)),
1383 (Some(false), Some(false)),
1384 (Some(true), Some(true)),
1385 (Some(true), Some(false)),
1386 ];
1387 for (sql, (asc, nulls_first)) in sqls.iter().zip(expected.into_iter()) {
1388 let expected = Statement::CreateExternalTable(CreateExternalTable {
1389 name: name.clone(),
1390 columns: vec![make_column_def("c1", DataType::Int(None))],
1391 file_type: "CSV".to_string(),
1392 location: "foo.csv".into(),
1393 table_partition_cols: vec![],
1394 order_exprs: vec![vec![OrderByExpr {
1395 expr: Identifier(Ident {
1396 value: "c1".to_owned(),
1397 quote_style: None,
1398 span: Span::empty(),
1399 }),
1400 options: OrderByOptions { asc, nulls_first },
1401 with_fill: None,
1402 }]],
1403 if_not_exists: false,
1404 temporary: false,
1405 unbounded: false,
1406 options: vec![],
1407 constraints: vec![],
1408 });
1409 expect_parse_ok(sql, expected)?;
1410 }
1411
1412 let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 ASC, c2 DESC NULLS FIRST) LOCATION 'foo.csv'";
1414 let display = None;
1415 let expected = Statement::CreateExternalTable(CreateExternalTable {
1416 name: name.clone(),
1417 columns: vec![
1418 make_column_def("c1", DataType::Int(display)),
1419 make_column_def("c2", DataType::Int(display)),
1420 ],
1421 file_type: "CSV".to_string(),
1422 location: "foo.csv".into(),
1423 table_partition_cols: vec![],
1424 order_exprs: vec![vec![
1425 OrderByExpr {
1426 expr: Identifier(Ident {
1427 value: "c1".to_owned(),
1428 quote_style: None,
1429 span: Span::empty(),
1430 }),
1431 options: OrderByOptions {
1432 asc: Some(true),
1433 nulls_first: None,
1434 },
1435 with_fill: None,
1436 },
1437 OrderByExpr {
1438 expr: Identifier(Ident {
1439 value: "c2".to_owned(),
1440 quote_style: None,
1441 span: Span::empty(),
1442 }),
1443 options: OrderByOptions {
1444 asc: Some(false),
1445 nulls_first: Some(true),
1446 },
1447 with_fill: None,
1448 },
1449 ]],
1450 if_not_exists: false,
1451 temporary: false,
1452 unbounded: false,
1453 options: vec![],
1454 constraints: vec![],
1455 });
1456 expect_parse_ok(sql, expected)?;
1457
1458 let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 - c2 ASC) LOCATION 'foo.csv'";
1460 let display = None;
1461 let expected = Statement::CreateExternalTable(CreateExternalTable {
1462 name: name.clone(),
1463 columns: vec![
1464 make_column_def("c1", DataType::Int(display)),
1465 make_column_def("c2", DataType::Int(display)),
1466 ],
1467 file_type: "CSV".to_string(),
1468 location: "foo.csv".into(),
1469 table_partition_cols: vec![],
1470 order_exprs: vec![vec![OrderByExpr {
1471 expr: Expr::BinaryOp {
1472 left: Box::new(Identifier(Ident {
1473 value: "c1".to_owned(),
1474 quote_style: None,
1475 span: Span::empty(),
1476 })),
1477 op: BinaryOperator::Minus,
1478 right: Box::new(Identifier(Ident {
1479 value: "c2".to_owned(),
1480 quote_style: None,
1481 span: Span::empty(),
1482 })),
1483 },
1484 options: OrderByOptions {
1485 asc: Some(true),
1486 nulls_first: None,
1487 },
1488 with_fill: None,
1489 }]],
1490 if_not_exists: false,
1491 temporary: false,
1492 unbounded: false,
1493 options: vec![],
1494 constraints: vec![],
1495 });
1496 expect_parse_ok(sql, expected)?;
1497
1498 let sql = "
1500 CREATE UNBOUNDED EXTERNAL TABLE IF NOT EXISTS t (c1 int, c2 float)
1501 STORED AS PARQUET
1502 WITH ORDER (c1 - c2 ASC)
1503 PARTITIONED BY (c1)
1504 LOCATION 'foo.parquet'
1505 OPTIONS ('format.compression' 'zstd',
1506 'format.delimiter' '*',
1507 'ROW_GROUP_SIZE' '1024',
1508 'TRUNCATE' 'NO',
1509 'format.has_header' 'true')";
1510 let expected = Statement::CreateExternalTable(CreateExternalTable {
1511 name: name.clone(),
1512 columns: vec![
1513 make_column_def("c1", DataType::Int(None)),
1514 make_column_def("c2", DataType::Float(None)),
1515 ],
1516 file_type: "PARQUET".to_string(),
1517 location: "foo.parquet".into(),
1518 table_partition_cols: vec!["c1".into()],
1519 order_exprs: vec![vec![OrderByExpr {
1520 expr: Expr::BinaryOp {
1521 left: Box::new(Identifier(Ident {
1522 value: "c1".to_owned(),
1523 quote_style: None,
1524 span: Span::empty(),
1525 })),
1526 op: BinaryOperator::Minus,
1527 right: Box::new(Identifier(Ident {
1528 value: "c2".to_owned(),
1529 quote_style: None,
1530 span: Span::empty(),
1531 })),
1532 },
1533 options: OrderByOptions {
1534 asc: Some(true),
1535 nulls_first: None,
1536 },
1537 with_fill: None,
1538 }]],
1539 if_not_exists: true,
1540 temporary: false,
1541 unbounded: true,
1542 options: vec![
1543 (
1544 "format.compression".into(),
1545 Value::SingleQuotedString("zstd".into()),
1546 ),
1547 (
1548 "format.delimiter".into(),
1549 Value::SingleQuotedString("*".into()),
1550 ),
1551 (
1552 "ROW_GROUP_SIZE".into(),
1553 Value::SingleQuotedString("1024".into()),
1554 ),
1555 ("TRUNCATE".into(), Value::SingleQuotedString("NO".into())),
1556 (
1557 "format.has_header".into(),
1558 Value::SingleQuotedString("true".into()),
1559 ),
1560 ],
1561 constraints: vec![],
1562 });
1563 expect_parse_ok(sql, expected)?;
1564
1565 Ok(())
1568 }
1569
1570 #[test]
1571 fn copy_to_table_to_table() -> Result<(), DataFusionError> {
1572 let sql = "COPY foo TO bar STORED AS CSV";
1574 let expected = Statement::CopyTo(CopyToStatement {
1575 source: object_name("foo"),
1576 target: "bar".to_string(),
1577 partitioned_by: vec![],
1578 stored_as: Some("CSV".to_owned()),
1579 options: vec![],
1580 });
1581
1582 assert_eq!(verified_stmt(sql), expected);
1583 Ok(())
1584 }
1585
1586 #[test]
1587 fn skip_copy_into_snowflake() -> Result<(), DataFusionError> {
1588 let sql = "COPY INTO foo FROM @~/staged FILE_FORMAT = (FORMAT_NAME = 'mycsv');";
1589 let dialect = Box::new(SnowflakeDialect);
1590 let statements = DFParser::parse_sql_with_dialect(sql, dialect.as_ref())?;
1591
1592 assert_eq!(
1593 statements.len(),
1594 1,
1595 "Expected to parse exactly one statement"
1596 );
1597 if let Statement::CopyTo(_) = &statements[0] {
1598 panic!("Expected non COPY TO statement, but was successful: {statements:?}");
1599 }
1600 Ok(())
1601 }
1602
1603 #[test]
1604 fn explain_copy_to_table_to_table() -> Result<(), DataFusionError> {
1605 let cases = vec![
1606 ("EXPLAIN COPY foo TO bar STORED AS PARQUET", false, false),
1607 (
1608 "EXPLAIN ANALYZE COPY foo TO bar STORED AS PARQUET",
1609 true,
1610 false,
1611 ),
1612 (
1613 "EXPLAIN VERBOSE COPY foo TO bar STORED AS PARQUET",
1614 false,
1615 true,
1616 ),
1617 (
1618 "EXPLAIN ANALYZE VERBOSE COPY foo TO bar STORED AS PARQUET",
1619 true,
1620 true,
1621 ),
1622 ];
1623 for (sql, analyze, verbose) in cases {
1624 println!("sql: {sql}, analyze: {analyze}, verbose: {verbose}");
1625
1626 let expected_copy = Statement::CopyTo(CopyToStatement {
1627 source: object_name("foo"),
1628 target: "bar".to_string(),
1629 partitioned_by: vec![],
1630 stored_as: Some("PARQUET".to_owned()),
1631 options: vec![],
1632 });
1633 let expected = Statement::Explain(ExplainStatement {
1634 analyze,
1635 verbose,
1636 format: None,
1637 statement: Box::new(expected_copy),
1638 });
1639 assert_eq!(verified_stmt(sql), expected);
1640 }
1641 Ok(())
1642 }
1643
1644 #[test]
1645 fn copy_to_query_to_table() -> Result<(), DataFusionError> {
1646 let statement = verified_stmt("SELECT 1");
1647
1648 let statement = if let Statement::Statement(statement) = statement {
1650 *statement
1651 } else {
1652 panic!("Expected statement, got {statement:?}");
1653 };
1654
1655 let query = if let SQLStatement::Query(query) = statement {
1656 query
1657 } else {
1658 panic!("Expected query, got {statement:?}");
1659 };
1660
1661 let sql =
1662 "COPY (SELECT 1) TO bar STORED AS CSV OPTIONS ('format.has_header' 'true')";
1663 let expected = Statement::CopyTo(CopyToStatement {
1664 source: CopyToSource::Query(query),
1665 target: "bar".to_string(),
1666 partitioned_by: vec![],
1667 stored_as: Some("CSV".to_owned()),
1668 options: vec![(
1669 "format.has_header".into(),
1670 Value::SingleQuotedString("true".into()),
1671 )],
1672 });
1673 assert_eq!(verified_stmt(sql), expected);
1674 Ok(())
1675 }
1676
1677 #[test]
1678 fn copy_to_options() -> Result<(), DataFusionError> {
1679 let sql = "COPY foo TO bar STORED AS CSV OPTIONS ('row_group_size' '55')";
1680 let expected = Statement::CopyTo(CopyToStatement {
1681 source: object_name("foo"),
1682 target: "bar".to_string(),
1683 partitioned_by: vec![],
1684 stored_as: Some("CSV".to_owned()),
1685 options: vec![(
1686 "row_group_size".to_string(),
1687 Value::SingleQuotedString("55".to_string()),
1688 )],
1689 });
1690 assert_eq!(verified_stmt(sql), expected);
1691 Ok(())
1692 }
1693
1694 #[test]
1695 fn copy_to_partitioned_by() -> Result<(), DataFusionError> {
1696 let sql = "COPY foo TO bar STORED AS CSV PARTITIONED BY (a) OPTIONS ('row_group_size' '55')";
1697 let expected = Statement::CopyTo(CopyToStatement {
1698 source: object_name("foo"),
1699 target: "bar".to_string(),
1700 partitioned_by: vec!["a".to_string()],
1701 stored_as: Some("CSV".to_owned()),
1702 options: vec![(
1703 "row_group_size".to_string(),
1704 Value::SingleQuotedString("55".to_string()),
1705 )],
1706 });
1707 assert_eq!(verified_stmt(sql), expected);
1708 Ok(())
1709 }
1710
1711 #[test]
1712 fn copy_to_multi_options() -> Result<(), DataFusionError> {
1713 let sql =
1715 "COPY foo TO bar STORED AS parquet OPTIONS ('format.row_group_size' 55, 'format.compression' snappy, 'execution.keep_partition_by_columns' true)";
1716
1717 let expected_options = vec![
1718 (
1719 "format.row_group_size".to_string(),
1720 Value::Number("55".to_string(), false),
1721 ),
1722 (
1723 "format.compression".to_string(),
1724 Value::SingleQuotedString("snappy".to_string()),
1725 ),
1726 (
1727 "execution.keep_partition_by_columns".to_string(),
1728 Value::SingleQuotedString("true".to_string()),
1729 ),
1730 ];
1731
1732 let mut statements = DFParser::parse_sql(sql).unwrap();
1733 assert_eq!(statements.len(), 1);
1734 let only_statement = statements.pop_front().unwrap();
1735
1736 let options = if let Statement::CopyTo(copy_to) = only_statement {
1737 copy_to.options
1738 } else {
1739 panic!("Expected copy");
1740 };
1741
1742 assert_eq!(options, expected_options);
1743
1744 Ok(())
1745 }
1746
1747 fn object_name(name: &str) -> CopyToSource {
1750 CopyToSource::Relation(ObjectName::from(vec![Ident::new(name)]))
1751 }
1752
1753 fn one_statement_parses_to(sql: &str, canonical: &str) -> Statement {
1767 let mut statements = DFParser::parse_sql(sql).unwrap();
1768 assert_eq!(statements.len(), 1);
1769
1770 if sql != canonical {
1771 assert_eq!(DFParser::parse_sql(canonical).unwrap(), statements);
1772 }
1773
1774 let only_statement = statements.pop_front().unwrap();
1775 assert_eq!(
1776 canonical.to_uppercase(),
1777 only_statement.to_string().to_uppercase()
1778 );
1779 only_statement
1780 }
1781
1782 fn verified_stmt(sql: &str) -> Statement {
1786 one_statement_parses_to(sql, sql)
1787 }
1788
1789 #[test]
1790 fn test_recursion_limit() {
1793 let sql = "SELECT 1 OR 2";
1794
1795 DFParserBuilder::new(sql)
1797 .build()
1798 .unwrap()
1799 .parse_statements()
1800 .unwrap();
1801
1802 let err = DFParserBuilder::new(sql)
1803 .with_recursion_limit(1)
1804 .build()
1805 .unwrap()
1806 .parse_statements()
1807 .unwrap_err();
1808
1809 assert_contains!(
1810 err.to_string(),
1811 "SQL error: RecursionLimitExceeded (current limit: 1)"
1812 );
1813 }
1814
1815 fn expect_parse_expr_ok(sql: &str, expected: ExprWithAlias) {
1816 let expr = DFParser::parse_sql_into_expr(sql).unwrap();
1817 assert_eq!(expr, expected, "actual:\n{expr:#?}");
1818 }
1819
1820 fn expect_parse_expr_error(sql: &str, expected_error: &str) {
1822 match DFParser::parse_sql_into_expr(sql) {
1823 Ok(expr) => {
1824 panic!("Expected parse error for '{sql}', but was successful: {expr:#?}");
1825 }
1826 Err(e) => {
1827 let error_message = e.to_string();
1828 assert!(
1829 error_message.contains(expected_error),
1830 "Expected error '{expected_error}' not found in actual error '{error_message}'"
1831 );
1832 }
1833 }
1834 }
1835
1836 #[test]
1837 fn literal() {
1838 expect_parse_expr_ok(
1839 "1234",
1840 ExprWithAlias {
1841 expr: Expr::Value(ValueWithSpan::from(Value::Number(
1842 "1234".to_string(),
1843 false,
1844 ))),
1845 alias: None,
1846 },
1847 )
1848 }
1849
1850 #[test]
1851 fn literal_with_alias() {
1852 expect_parse_expr_ok(
1853 "1234 as foo",
1854 ExprWithAlias {
1855 expr: Expr::Value(ValueWithSpan::from(Value::Number(
1856 "1234".to_string(),
1857 false,
1858 ))),
1859 alias: Some(Ident::from("foo")),
1860 },
1861 )
1862 }
1863
1864 #[test]
1865 fn literal_with_alias_and_trailing_tokens() {
1866 expect_parse_expr_error(
1867 "1234 as foo.bar",
1868 "Expected: end of expression, found: .",
1869 )
1870 }
1871
1872 #[test]
1873 fn literal_with_alias_and_trailing_whitespace() {
1874 expect_parse_expr_ok(
1875 "1234 as foo ",
1876 ExprWithAlias {
1877 expr: Expr::Value(ValueWithSpan::from(Value::Number(
1878 "1234".to_string(),
1879 false,
1880 ))),
1881 alias: Some(Ident::from("foo")),
1882 },
1883 )
1884 }
1885
1886 #[test]
1887 fn literal_with_alias_and_trailing_whitespace_and_token() {
1888 expect_parse_expr_error(
1889 "1234 as foo bar",
1890 "Expected: end of expression, found: bar",
1891 )
1892 }
1893}