Skip to main content

datafusion_sql/
parser.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`DFParser`]: DataFusion SQL Parser based on [`sqlparser`]
19//!
20//! This parser implements DataFusion specific statements such as
21//! `CREATE EXTERNAL TABLE`
22
23use datafusion_common::DataFusionError;
24use datafusion_common::config::SqlParserOptions;
25use datafusion_common::{Diagnostic, Span, sql_err};
26use sqlparser::ast::{ExprWithAlias, Ident, OrderByOptions};
27use sqlparser::tokenizer::TokenWithSpan;
28use sqlparser::{
29    ast::{
30        ColumnDef, ColumnOptionDef, ObjectName, OrderByExpr, Query,
31        Statement as SQLStatement, TableConstraint, Value,
32    },
33    dialect::{Dialect, GenericDialect, keywords::Keyword},
34    parser::{Parser, ParserError},
35    tokenizer::{Token, Tokenizer, Word},
36};
37use std::collections::VecDeque;
38use std::fmt;
39
40// Use `Parser::expected` instead, if possible
41macro_rules! parser_err {
42    ($MSG:expr $(; diagnostic = $DIAG:expr)?) => {{
43
44        let err = DataFusionError::from(ParserError::ParserError($MSG.to_string()));
45        $(
46            let err = err.with_diagnostic($DIAG);
47        )?
48        Err(err)
49    }};
50}
51
52fn parse_file_type(s: &str) -> Result<String, DataFusionError> {
53    Ok(s.to_uppercase())
54}
55
56/// DataFusion specific `EXPLAIN`
57///
58/// Syntax:
59/// ```sql
60/// EXPLAIN <ANALYZE> <VERBOSE> [FORMAT format] statement
61/// ```
62#[derive(Debug, Clone, PartialEq, Eq)]
63pub struct ExplainStatement {
64    /// `EXPLAIN ANALYZE ..`
65    pub analyze: bool,
66    /// `EXPLAIN .. VERBOSE ..`
67    pub verbose: bool,
68    /// `EXPLAIN .. FORMAT `
69    pub format: Option<String>,
70    /// The statement to analyze. Note this is a DataFusion [`Statement`] (not a
71    /// [`sqlparser::ast::Statement`] so that we can use `EXPLAIN`, `COPY`, and other
72    /// DataFusion specific statements
73    pub statement: Box<Statement>,
74}
75
76impl fmt::Display for ExplainStatement {
77    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
78        let Self {
79            analyze,
80            verbose,
81            format,
82            statement,
83        } = self;
84
85        write!(f, "EXPLAIN ")?;
86        if *analyze {
87            write!(f, "ANALYZE ")?;
88        }
89        if *verbose {
90            write!(f, "VERBOSE ")?;
91        }
92        if let Some(format) = format.as_ref() {
93            write!(f, "FORMAT {format} ")?;
94        }
95
96        write!(f, "{statement}")
97    }
98}
99
100/// DataFusion extension DDL for `COPY`
101///
102/// # Syntax:
103///
104/// ```text
105/// COPY <table_name | (<query>)>
106/// TO
107/// <destination_url>
108/// (key_value_list)
109/// ```
110///
111/// # Examples
112///
113/// ```sql
114/// COPY lineitem  TO 'lineitem'
115/// STORED AS PARQUET (
116///   partitions 16,
117///   row_group_limit_rows 100000,
118///   row_group_limit_bytes 200000
119/// )
120///
121/// COPY (SELECT l_orderkey from lineitem) to 'lineitem.parquet';
122/// ```
123#[derive(Debug, Clone, PartialEq, Eq)]
124pub struct CopyToStatement {
125    /// From where the data comes from
126    pub source: CopyToSource,
127    /// The URL to where the data is heading
128    pub target: String,
129    /// Partition keys
130    pub partitioned_by: Vec<String>,
131    /// File type (Parquet, NDJSON, CSV etc.)
132    pub stored_as: Option<String>,
133    /// Target specific options
134    pub options: Vec<(String, Value)>,
135}
136
137impl fmt::Display for CopyToStatement {
138    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
139        let Self {
140            source,
141            target,
142            partitioned_by,
143            stored_as,
144            options,
145            ..
146        } = self;
147
148        write!(f, "COPY {source} TO {target}")?;
149        if let Some(file_type) = stored_as {
150            write!(f, " STORED AS {file_type}")?;
151        }
152        if !partitioned_by.is_empty() {
153            write!(f, " PARTITIONED BY ({})", partitioned_by.join(", "))?;
154        }
155
156        if !options.is_empty() {
157            let opts: Vec<_> =
158                options.iter().map(|(k, v)| format!("'{k}' {v}")).collect();
159            write!(f, " OPTIONS ({})", opts.join(", "))?;
160        }
161
162        Ok(())
163    }
164}
165
166#[derive(Debug, Clone, PartialEq, Eq)]
167pub enum CopyToSource {
168    /// `COPY <table> TO ...`
169    Relation(ObjectName),
170    /// COPY (...query...) TO ...
171    Query(Box<Query>),
172}
173
174impl fmt::Display for CopyToSource {
175    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
176        match self {
177            CopyToSource::Relation(r) => write!(f, "{r}"),
178            CopyToSource::Query(q) => write!(f, "({q})"),
179        }
180    }
181}
182
183/// This type defines a lexicographical ordering.
184pub(crate) type LexOrdering = Vec<OrderByExpr>;
185
186/// DataFusion extension DDL for `CREATE EXTERNAL TABLE`
187///
188/// Syntax:
189///
190/// ```text
191/// CREATE
192/// [ OR REPLACE ]
193/// EXTERNAL TABLE
194/// [ IF NOT EXISTS ]
195/// <TABLE_NAME>[ (<column_definition>) ]
196/// STORED AS <file_type>
197/// [ PARTITIONED BY (<column_definition list> | <column list>) ]
198/// [ WITH ORDER (<ordered column list>)
199/// [ OPTIONS (<key_value_list>) ]
200/// LOCATION <literal>
201///
202/// <column_definition> := (<column_name> <data_type>, ...)
203///
204/// <column_list> := (<column_name>, ...)
205///
206/// <ordered_column_list> := (<column_name> <sort_clause>, ...)
207///
208/// <key_value_list> := (<literal> <literal, <literal> <literal>, ...)
209/// ```
210#[derive(Debug, Clone, PartialEq, Eq)]
211pub struct CreateExternalTable {
212    /// Table name
213    pub name: ObjectName,
214    /// Optional schema
215    pub columns: Vec<ColumnDef>,
216    /// File type (Parquet, NDJSON, CSV, etc)
217    pub file_type: String,
218    /// Path to file
219    pub location: String,
220    /// Partition Columns
221    pub table_partition_cols: Vec<String>,
222    /// Ordered expressions
223    pub order_exprs: Vec<LexOrdering>,
224    /// Option to not error if table already exists
225    pub if_not_exists: bool,
226    /// Option to replace table content if table already exists
227    pub or_replace: bool,
228    /// Whether the table is a temporary table
229    pub temporary: bool,
230    /// Infinite streams?
231    pub unbounded: bool,
232    /// Table(provider) specific options
233    pub options: Vec<(String, Value)>,
234    /// A table-level constraint
235    pub constraints: Vec<TableConstraint>,
236}
237
238impl fmt::Display for CreateExternalTable {
239    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
240        write!(f, "CREATE EXTERNAL TABLE ")?;
241        if self.if_not_exists {
242            write!(f, "IF NOT EXISTS ")?;
243        }
244        write!(f, "{} ", self.name)?;
245        write!(f, "STORED AS {} ", self.file_type)?;
246        if !self.order_exprs.is_empty() {
247            write!(f, "WITH ORDER (")?;
248            let mut first = true;
249            for expr in self.order_exprs.iter().flatten() {
250                if !first {
251                    write!(f, ", ")?;
252                }
253                write!(f, "{expr}")?;
254                first = false;
255            }
256            write!(f, ") ")?;
257        }
258        write!(f, "LOCATION {}", self.location)
259    }
260}
261
262/// DataFusion extension for `RESET`
263#[derive(Debug, Clone, PartialEq, Eq)]
264pub enum ResetStatement {
265    /// Reset a single configuration variable (stored as provided)
266    Variable(ObjectName),
267}
268
269impl fmt::Display for ResetStatement {
270    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
271        match self {
272            ResetStatement::Variable(name) => write!(f, "RESET {name}"),
273        }
274    }
275}
276
277/// DataFusion SQL Statement.
278///
279/// This can either be a [`Statement`] from [`sqlparser`] from a
280/// standard SQL dialect, or a DataFusion extension such as `CREATE
281/// EXTERNAL TABLE`. See [`DFParser`] for more information.
282///
283/// [`Statement`]: sqlparser::ast::Statement
284#[derive(Debug, Clone, PartialEq, Eq)]
285pub enum Statement {
286    /// ANSI SQL AST node (from sqlparser-rs)
287    Statement(Box<SQLStatement>),
288    /// Extension: `CREATE EXTERNAL TABLE`
289    CreateExternalTable(CreateExternalTable),
290    /// Extension: `COPY TO`
291    CopyTo(CopyToStatement),
292    /// EXPLAIN for extensions
293    Explain(ExplainStatement),
294    /// Extension: `RESET`
295    Reset(ResetStatement),
296}
297
298impl fmt::Display for Statement {
299    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
300        match self {
301            Statement::Statement(stmt) => write!(f, "{stmt}"),
302            Statement::CreateExternalTable(stmt) => write!(f, "{stmt}"),
303            Statement::CopyTo(stmt) => write!(f, "{stmt}"),
304            Statement::Explain(stmt) => write!(f, "{stmt}"),
305            Statement::Reset(stmt) => write!(f, "{stmt}"),
306        }
307    }
308}
309
310fn ensure_not_set<T>(field: &Option<T>, name: &str) -> Result<(), DataFusionError> {
311    if field.is_some() {
312        parser_err!(format!("{name} specified more than once",))?
313    }
314    Ok(())
315}
316
317/// DataFusion SQL Parser based on [`sqlparser`]
318///
319/// Parses DataFusion's SQL dialect, often delegating to [`sqlparser`]'s [`Parser`].
320///
321/// DataFusion mostly follows existing SQL dialects via
322/// `sqlparser`. However, certain statements such as `COPY` and
323/// `CREATE EXTERNAL TABLE` have special syntax in DataFusion. See
324/// [`Statement`] for a list of this special syntax
325pub struct DFParser<'a> {
326    pub parser: Parser<'a>,
327    options: SqlParserOptions,
328}
329
330/// Same as `sqlparser`
331const DEFAULT_RECURSION_LIMIT: usize = 50;
332const DEFAULT_DIALECT: GenericDialect = GenericDialect {};
333
334/// Builder for [`DFParser`]
335///
336/// # Example: Create and Parse SQL statements
337/// ```
338/// # use datafusion_sql::parser::DFParserBuilder;
339/// # use datafusion_common::Result;
340/// # fn test() -> Result<()> {
341/// let mut parser = DFParserBuilder::new("SELECT * FROM foo; SELECT 1 + 2").build()?;
342/// // parse the SQL into DFStatements
343/// let statements = parser.parse_statements()?;
344/// assert_eq!(statements.len(), 2);
345/// # Ok(())
346/// # }
347/// ```
348///
349/// # Example: Create and Parse expression with a different dialect
350/// ```
351/// # use datafusion_sql::parser::DFParserBuilder;
352/// # use datafusion_common::Result;
353/// # use datafusion_sql::sqlparser::dialect::MySqlDialect;
354/// # use datafusion_sql::sqlparser::ast::Expr;
355/// # fn test() -> Result<()> {
356/// let dialect = MySqlDialect {}; // Parse using MySQL dialect
357/// let mut parser = DFParserBuilder::new("1 + 2")
358///     .with_dialect(&dialect)
359///     .build()?;
360/// // parse 1+2 into an sqlparser::ast::Expr
361/// let res = parser.parse_expr()?;
362/// assert!(matches!(res.expr, Expr::BinaryOp { .. }));
363/// # Ok(())
364/// # }
365/// ```
366pub struct DFParserBuilder<'a, 'b> {
367    /// Parser input: either raw SQL or tokens
368    input: ParserInput<'a>,
369    /// The Dialect to use (defaults to [`GenericDialect`]
370    dialect: &'b dyn Dialect,
371    /// The recursion limit while parsing
372    recursion_limit: usize,
373}
374
375/// Describes a possible input for parser
376pub enum ParserInput<'a> {
377    /// Raw SQL. Tokenization will be performed automatically as a
378    /// part of [`DFParserBuilder::build`]
379    Sql(&'a str),
380    /// Tokens
381    Tokens(Vec<TokenWithSpan>),
382}
383
384impl<'a> From<&'a str> for ParserInput<'a> {
385    fn from(sql: &'a str) -> Self {
386        Self::Sql(sql)
387    }
388}
389
390impl From<Vec<TokenWithSpan>> for ParserInput<'static> {
391    fn from(tokens: Vec<TokenWithSpan>) -> Self {
392        Self::Tokens(tokens)
393    }
394}
395
396impl<'a, 'b> DFParserBuilder<'a, 'b> {
397    /// Create a new parser builder for the specified tokens using the
398    /// [`GenericDialect`].
399    pub fn new(input: impl Into<ParserInput<'a>>) -> Self {
400        Self {
401            input: input.into(),
402            dialect: &DEFAULT_DIALECT,
403            recursion_limit: DEFAULT_RECURSION_LIMIT,
404        }
405    }
406
407    /// Adjust the parser builder's dialect. Defaults to [`GenericDialect`]
408    pub fn with_dialect(mut self, dialect: &'b dyn Dialect) -> Self {
409        self.dialect = dialect;
410        self
411    }
412
413    /// Adjust the recursion limit of sql parsing.  Defaults to 50
414    pub fn with_recursion_limit(mut self, recursion_limit: usize) -> Self {
415        self.recursion_limit = recursion_limit;
416        self
417    }
418
419    /// Build resulting parser
420    pub fn build(self) -> Result<DFParser<'b>, DataFusionError> {
421        let tokens = match self.input {
422            ParserInput::Tokens(tokens) => tokens,
423            ParserInput::Sql(sql) => {
424                let mut tokenizer = Tokenizer::new(self.dialect, sql);
425                // Convert TokenizerError -> ParserError
426                tokenizer
427                    .tokenize_with_location()
428                    .map_err(ParserError::from)?
429            }
430        };
431
432        Ok(DFParser {
433            parser: Parser::new(self.dialect)
434                .with_tokens_with_locations(tokens)
435                .with_recursion_limit(self.recursion_limit),
436            options: SqlParserOptions {
437                recursion_limit: self.recursion_limit,
438                ..Default::default()
439            },
440        })
441    }
442}
443
444impl<'a> DFParser<'a> {
445    #[deprecated(since = "46.0.0", note = "DFParserBuilder")]
446    pub fn new(sql: &'a str) -> Result<Self, DataFusionError> {
447        DFParserBuilder::new(sql).build()
448    }
449
450    #[deprecated(since = "46.0.0", note = "DFParserBuilder")]
451    pub fn new_with_dialect(
452        sql: &'a str,
453        dialect: &'a dyn Dialect,
454    ) -> Result<Self, DataFusionError> {
455        DFParserBuilder::new(sql).with_dialect(dialect).build()
456    }
457
458    /// Parse a sql string into one or [`Statement`]s using the
459    /// [`GenericDialect`].
460    pub fn parse_sql(sql: &'a str) -> Result<VecDeque<Statement>, DataFusionError> {
461        let mut parser = DFParserBuilder::new(sql).build()?;
462
463        parser.parse_statements()
464    }
465
466    /// Parse a SQL string and produce one or more [`Statement`]s with
467    /// with the specified dialect.
468    pub fn parse_sql_with_dialect(
469        sql: &str,
470        dialect: &dyn Dialect,
471    ) -> Result<VecDeque<Statement>, DataFusionError> {
472        let mut parser = DFParserBuilder::new(sql).with_dialect(dialect).build()?;
473        parser.parse_statements()
474    }
475
476    pub fn parse_sql_into_expr(sql: &str) -> Result<ExprWithAlias, DataFusionError> {
477        DFParserBuilder::new(sql).build()?.parse_into_expr()
478    }
479
480    pub fn parse_sql_into_expr_with_dialect(
481        sql: &str,
482        dialect: &dyn Dialect,
483    ) -> Result<ExprWithAlias, DataFusionError> {
484        DFParserBuilder::new(sql)
485            .with_dialect(dialect)
486            .build()?
487            .parse_into_expr()
488    }
489
490    /// Parse a sql string into one or [`Statement`]s
491    pub fn parse_statements(&mut self) -> Result<VecDeque<Statement>, DataFusionError> {
492        let mut stmts = VecDeque::new();
493        let mut expecting_statement_delimiter = false;
494        loop {
495            // ignore empty statements (between successive statement delimiters)
496            while self.parser.consume_token(&Token::SemiColon) {
497                expecting_statement_delimiter = false;
498            }
499
500            if self.parser.peek_token() == Token::EOF {
501                break;
502            }
503            if expecting_statement_delimiter {
504                return self.expected("end of statement", &self.parser.peek_token());
505            }
506
507            let statement = self.parse_statement()?;
508            stmts.push_back(statement);
509            expecting_statement_delimiter = true;
510        }
511        Ok(stmts)
512    }
513
514    /// Report an unexpected token
515    fn expected<T>(
516        &self,
517        expected: &str,
518        found: &TokenWithSpan,
519    ) -> Result<T, DataFusionError> {
520        let sql_parser_span = found.span;
521        let span = Span::try_from_sqlparser_span(sql_parser_span);
522        let diagnostic = Diagnostic::new_error(
523            format!("Expected: {expected}, found: {found}{}", found.span.start),
524            span,
525        );
526        parser_err!(
527            format!("Expected: {expected}, found: {found}{}", found.span.start);
528            diagnostic=
529            diagnostic
530        )
531    }
532
533    fn expect_token(
534        &mut self,
535        expected: &str,
536        token: &Token,
537    ) -> Result<(), DataFusionError> {
538        let next_token = self.parser.peek_token_ref();
539        if next_token.token != *token {
540            self.expected(expected, next_token)
541        } else {
542            Ok(())
543        }
544    }
545
546    /// Parse a new expression
547    pub fn parse_statement(&mut self) -> Result<Statement, DataFusionError> {
548        match self.parser.peek_token().token {
549            Token::Word(w) => {
550                match w.keyword {
551                    Keyword::CREATE => {
552                        self.parser.next_token(); // CREATE
553                        self.parse_create()
554                    }
555                    Keyword::COPY => {
556                        if let Token::Word(w) = self.parser.peek_nth_token(1).token {
557                            // use native parser for COPY INTO
558                            if w.keyword == Keyword::INTO {
559                                return self.parse_and_handle_statement();
560                            }
561                        }
562                        self.parser.next_token(); // COPY
563                        self.parse_copy()
564                    }
565                    Keyword::EXPLAIN => {
566                        self.parser.next_token(); // EXPLAIN
567                        self.parse_explain()
568                    }
569                    Keyword::RESET => {
570                        self.parser.next_token(); // RESET
571                        self.parse_reset()
572                    }
573                    _ => {
574                        // use sqlparser-rs parser
575                        self.parse_and_handle_statement()
576                    }
577                }
578            }
579            _ => {
580                // use the native parser
581                self.parse_and_handle_statement()
582            }
583        }
584    }
585
586    pub fn parse_expr(&mut self) -> Result<ExprWithAlias, DataFusionError> {
587        if let Token::Word(w) = self.parser.peek_token().token {
588            match w.keyword {
589                Keyword::CREATE | Keyword::COPY | Keyword::EXPLAIN => {
590                    return parser_err!("Unsupported command in expression")?;
591                }
592                _ => {}
593            }
594        }
595
596        Ok(self.parser.parse_expr_with_alias()?)
597    }
598
599    /// Parses the entire SQL string into an expression.
600    ///
601    /// In contrast to [`DFParser::parse_expr`], this function will report an error if the input
602    /// contains any trailing, unparsed tokens.
603    pub fn parse_into_expr(&mut self) -> Result<ExprWithAlias, DataFusionError> {
604        let expr = self.parse_expr()?;
605        self.expect_token("end of expression", &Token::EOF)?;
606        Ok(expr)
607    }
608
609    /// Helper method to parse a statement and handle errors consistently, especially for recursion limits
610    fn parse_and_handle_statement(&mut self) -> Result<Statement, DataFusionError> {
611        self.parser
612            .parse_statement()
613            .map(|stmt| Statement::Statement(Box::from(stmt)))
614            .map_err(|e| match e {
615                ParserError::RecursionLimitExceeded => DataFusionError::SQL(
616                    Box::new(ParserError::RecursionLimitExceeded),
617                    Some(format!(
618                        " (current limit: {})",
619                        self.options.recursion_limit
620                    )),
621                ),
622                other => DataFusionError::SQL(Box::new(other), None),
623            })
624    }
625
626    /// Parse a SQL `COPY TO` statement
627    pub fn parse_copy(&mut self) -> Result<Statement, DataFusionError> {
628        // parse as a query
629        let source = if self.parser.consume_token(&Token::LParen) {
630            let query = self.parser.parse_query()?;
631            self.parser.expect_token(&Token::RParen)?;
632            CopyToSource::Query(query)
633        } else {
634            // parse as table reference
635            let table_name = self.parser.parse_object_name(true)?;
636            CopyToSource::Relation(table_name)
637        };
638
639        #[derive(Default)]
640        struct Builder {
641            stored_as: Option<String>,
642            target: Option<String>,
643            partitioned_by: Option<Vec<String>>,
644            options: Option<Vec<(String, Value)>>,
645        }
646
647        let mut builder = Builder::default();
648
649        loop {
650            if let Some(keyword) = self.parser.parse_one_of_keywords(&[
651                Keyword::STORED,
652                Keyword::TO,
653                Keyword::PARTITIONED,
654                Keyword::OPTIONS,
655                Keyword::WITH,
656            ]) {
657                match keyword {
658                    Keyword::STORED => {
659                        self.parser.expect_keyword(Keyword::AS)?;
660                        ensure_not_set(&builder.stored_as, "STORED AS")?;
661                        builder.stored_as = Some(self.parse_file_format()?);
662                    }
663                    Keyword::TO => {
664                        ensure_not_set(&builder.target, "TO")?;
665                        builder.target = Some(self.parser.parse_literal_string()?);
666                    }
667                    Keyword::WITH => {
668                        self.parser.expect_keyword(Keyword::HEADER)?;
669                        self.parser.expect_keyword(Keyword::ROW)?;
670                        return parser_err!(
671                            "WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')"
672                        )?;
673                    }
674                    Keyword::PARTITIONED => {
675                        self.parser.expect_keyword(Keyword::BY)?;
676                        ensure_not_set(&builder.partitioned_by, "PARTITIONED BY")?;
677                        builder.partitioned_by = Some(self.parse_partitions()?);
678                    }
679                    Keyword::OPTIONS => {
680                        ensure_not_set(&builder.options, "OPTIONS")?;
681                        builder.options = Some(self.parse_value_options()?);
682                    }
683                    _ => {
684                        unreachable!()
685                    }
686                }
687            } else {
688                let token = self.parser.peek_token();
689                if token == Token::EOF || token == Token::SemiColon {
690                    break;
691                } else {
692                    return self.expected("end of statement or ;", &token)?;
693                }
694            }
695        }
696
697        let Some(target) = builder.target else {
698            return parser_err!("Missing TO clause in COPY statement")?;
699        };
700
701        Ok(Statement::CopyTo(CopyToStatement {
702            source,
703            target,
704            partitioned_by: builder.partitioned_by.unwrap_or(vec![]),
705            stored_as: builder.stored_as,
706            options: builder.options.unwrap_or(vec![]),
707        }))
708    }
709
710    /// Parse the next token as a key name for an option list
711    ///
712    /// Note this is different than [`parse_literal_string`]
713    /// because it allows keywords as well as other non words
714    ///
715    /// [`parse_literal_string`]: sqlparser::parser::Parser::parse_literal_string
716    pub fn parse_option_key(&mut self) -> Result<String, DataFusionError> {
717        let next_token = self.parser.next_token();
718        match next_token.token {
719            Token::Word(Word { value, .. }) => {
720                let mut parts = vec![value];
721                while self.parser.consume_token(&Token::Period) {
722                    let next_token = self.parser.next_token();
723                    if let Token::Word(Word { value, .. }) = next_token.token {
724                        parts.push(value);
725                    } else {
726                        // Unquoted namespaced keys have to conform to the syntax
727                        // "<WORD>[\.<WORD>]*". If we have a key that breaks this
728                        // pattern, error out:
729                        return self.expected("key name", &next_token);
730                    }
731                }
732                Ok(parts.join("."))
733            }
734            Token::SingleQuotedString(s) => Ok(s),
735            Token::DoubleQuotedString(s) => Ok(s),
736            Token::EscapedStringLiteral(s) => Ok(s),
737            _ => self.expected("key name", &next_token),
738        }
739    }
740
741    /// Parse the next token as a value for an option list
742    ///
743    /// Note this is different than [`parse_value`] as it allows any
744    /// word or keyword in this location.
745    ///
746    /// [`parse_value`]: sqlparser::parser::Parser::parse_value
747    pub fn parse_option_value(&mut self) -> Result<Value, DataFusionError> {
748        let next_token = self.parser.next_token();
749        match next_token.token {
750            // e.g. things like "snappy" or "gzip" that may be keywords
751            Token::Word(word) => Ok(Value::SingleQuotedString(word.value)),
752            Token::SingleQuotedString(s) => Ok(Value::SingleQuotedString(s)),
753            Token::DoubleQuotedString(s) => Ok(Value::DoubleQuotedString(s)),
754            Token::EscapedStringLiteral(s) => Ok(Value::EscapedStringLiteral(s)),
755            Token::Number(n, l) => Ok(Value::Number(n, l)),
756            _ => self.expected("string or numeric value", &next_token),
757        }
758    }
759
760    /// Parse a SQL `EXPLAIN`
761    pub fn parse_explain(&mut self) -> Result<Statement, DataFusionError> {
762        let analyze = self.parser.parse_keyword(Keyword::ANALYZE);
763        let verbose = self.parser.parse_keyword(Keyword::VERBOSE);
764        let format = self.parse_explain_format()?;
765
766        let statement = self.parse_statement()?;
767
768        Ok(Statement::Explain(ExplainStatement {
769            statement: Box::new(statement),
770            analyze,
771            verbose,
772            format,
773        }))
774    }
775
776    /// Parse a SQL `RESET`
777    pub fn parse_reset(&mut self) -> Result<Statement, DataFusionError> {
778        let mut parts: Vec<String> = Vec::new();
779        let mut expecting_segment = true;
780
781        loop {
782            let next_token = self.parser.peek_token();
783            match &next_token.token {
784                Token::Word(word) => {
785                    self.parser.next_token();
786                    parts.push(word.value.clone());
787                    expecting_segment = false;
788                }
789                Token::SingleQuotedString(s)
790                | Token::DoubleQuotedString(s)
791                | Token::EscapedStringLiteral(s) => {
792                    self.parser.next_token();
793                    parts.push(s.clone());
794                    expecting_segment = false;
795                }
796                Token::Period => {
797                    self.parser.next_token();
798                    if expecting_segment || parts.is_empty() {
799                        return self.expected("configuration parameter", &next_token);
800                    }
801                    expecting_segment = true;
802                }
803                Token::EOF | Token::SemiColon => break,
804                _ => return self.expected("configuration parameter", &next_token),
805            }
806        }
807
808        if parts.is_empty() || expecting_segment {
809            return self.expected("configuration parameter", &self.parser.peek_token());
810        }
811
812        let idents: Vec<Ident> = parts.into_iter().map(Ident::new).collect();
813        let variable = ObjectName::from(idents);
814        Ok(Statement::Reset(ResetStatement::Variable(variable)))
815    }
816
817    pub fn parse_explain_format(&mut self) -> Result<Option<String>, DataFusionError> {
818        if !self.parser.parse_keyword(Keyword::FORMAT) {
819            return Ok(None);
820        }
821
822        let next_token = self.parser.next_token();
823        let format = match next_token.token {
824            Token::Word(w) => Ok(w.value),
825            Token::SingleQuotedString(w) => Ok(w),
826            Token::DoubleQuotedString(w) => Ok(w),
827            _ => self.expected("an explain format such as TREE", &next_token),
828        }?;
829        Ok(Some(format))
830    }
831
832    /// Parse a SQL `CREATE` statement handling `CREATE EXTERNAL TABLE`
833    pub fn parse_create(&mut self) -> Result<Statement, DataFusionError> {
834        // TODO: Change sql parser to take in `or_replace: bool` inside parse_create()
835        if self
836            .parser
837            .parse_keywords(&[Keyword::OR, Keyword::REPLACE, Keyword::EXTERNAL])
838        {
839            self.parse_create_external_table(false, true)
840        } else if self.parser.parse_keywords(&[
841            Keyword::OR,
842            Keyword::REPLACE,
843            Keyword::UNBOUNDED,
844            Keyword::EXTERNAL,
845        ]) {
846            self.parse_create_external_table(true, true)
847        } else if self.parser.parse_keyword(Keyword::EXTERNAL) {
848            self.parse_create_external_table(false, false)
849        } else if self
850            .parser
851            .parse_keywords(&[Keyword::UNBOUNDED, Keyword::EXTERNAL])
852        {
853            self.parse_create_external_table(true, false)
854        } else {
855            Ok(Statement::Statement(Box::from(self.parser.parse_create()?)))
856        }
857    }
858
859    fn parse_partitions(&mut self) -> Result<Vec<String>, DataFusionError> {
860        let mut partitions: Vec<String> = vec![];
861        if !self.parser.consume_token(&Token::LParen)
862            || self.parser.consume_token(&Token::RParen)
863        {
864            return Ok(partitions);
865        }
866
867        loop {
868            if let Token::Word(_) = self.parser.peek_token().token {
869                let identifier = self.parser.parse_identifier()?;
870                partitions.push(identifier.to_string());
871            } else {
872                return self.expected("partition name", &self.parser.peek_token());
873            }
874            let comma = self.parser.consume_token(&Token::Comma);
875            if self.parser.consume_token(&Token::RParen) {
876                // allow a trailing comma, even though it's not in standard
877                break;
878            } else if !comma {
879                return self.expected(
880                    "',' or ')' after partition definition",
881                    &self.parser.peek_token(),
882                );
883            }
884        }
885        Ok(partitions)
886    }
887
888    /// Parse the ordering clause of a `CREATE EXTERNAL TABLE` SQL statement
889    pub fn parse_order_by_exprs(&mut self) -> Result<Vec<OrderByExpr>, DataFusionError> {
890        let mut values = vec![];
891        self.parser.expect_token(&Token::LParen)?;
892        loop {
893            values.push(self.parse_order_by_expr()?);
894            if !self.parser.consume_token(&Token::Comma) {
895                self.parser.expect_token(&Token::RParen)?;
896                return Ok(values);
897            }
898        }
899    }
900
901    /// Parse an ORDER BY sub-expression optionally followed by ASC or DESC.
902    pub fn parse_order_by_expr(&mut self) -> Result<OrderByExpr, DataFusionError> {
903        let expr = self.parser.parse_expr()?;
904
905        let asc = if self.parser.parse_keyword(Keyword::ASC) {
906            Some(true)
907        } else if self.parser.parse_keyword(Keyword::DESC) {
908            Some(false)
909        } else {
910            None
911        };
912
913        let nulls_first = if self
914            .parser
915            .parse_keywords(&[Keyword::NULLS, Keyword::FIRST])
916        {
917            Some(true)
918        } else if self.parser.parse_keywords(&[Keyword::NULLS, Keyword::LAST]) {
919            Some(false)
920        } else {
921            None
922        };
923
924        Ok(OrderByExpr {
925            expr,
926            options: OrderByOptions { asc, nulls_first },
927            with_fill: None,
928        })
929    }
930
931    // This is a copy of the equivalent implementation in sqlparser.
932    fn parse_columns(
933        &mut self,
934    ) -> Result<(Vec<ColumnDef>, Vec<TableConstraint>), DataFusionError> {
935        let mut columns = vec![];
936        let mut constraints = vec![];
937        if !self.parser.consume_token(&Token::LParen)
938            || self.parser.consume_token(&Token::RParen)
939        {
940            return Ok((columns, constraints));
941        }
942
943        loop {
944            if let Some(constraint) = self.parser.parse_optional_table_constraint()? {
945                constraints.push(constraint);
946            } else if let Token::Word(_) = self.parser.peek_token().token {
947                let column_def = self.parse_column_def()?;
948                columns.push(column_def);
949            } else {
950                return self.expected(
951                    "column name or constraint definition",
952                    &self.parser.peek_token(),
953                );
954            }
955            let comma = self.parser.consume_token(&Token::Comma);
956            if self.parser.consume_token(&Token::RParen) {
957                // allow a trailing comma, even though it's not in standard
958                break;
959            } else if !comma {
960                return self.expected(
961                    "',' or ')' after column definition",
962                    &self.parser.peek_token(),
963                );
964            }
965        }
966
967        Ok((columns, constraints))
968    }
969
970    fn parse_column_def(&mut self) -> Result<ColumnDef, DataFusionError> {
971        let name = self.parser.parse_identifier()?;
972        let data_type = self.parser.parse_data_type()?;
973        let mut options = vec![];
974        loop {
975            if self.parser.parse_keyword(Keyword::CONSTRAINT) {
976                let name = Some(self.parser.parse_identifier()?);
977                if let Some(option) = self.parser.parse_optional_column_option()? {
978                    options.push(ColumnOptionDef { name, option });
979                } else {
980                    return self.expected(
981                        "constraint details after CONSTRAINT <name>",
982                        &self.parser.peek_token(),
983                    );
984                }
985            } else if let Some(option) = self.parser.parse_optional_column_option()? {
986                options.push(ColumnOptionDef { name: None, option });
987            } else {
988                break;
989            };
990        }
991        Ok(ColumnDef {
992            name,
993            data_type,
994            options,
995        })
996    }
997
998    fn parse_create_external_table(
999        &mut self,
1000        unbounded: bool,
1001        or_replace: bool,
1002    ) -> Result<Statement, DataFusionError> {
1003        let temporary = self
1004            .parser
1005            .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY])
1006            .is_some();
1007
1008        self.parser.expect_keyword(Keyword::TABLE)?;
1009        let if_not_exists =
1010            self.parser
1011                .parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]);
1012
1013        if if_not_exists && or_replace {
1014            return parser_err!("'IF NOT EXISTS' cannot coexist with 'REPLACE'");
1015        }
1016
1017        let table_name = self.parser.parse_object_name(true)?;
1018        let (mut columns, constraints) = self.parse_columns()?;
1019
1020        #[derive(Default)]
1021        struct Builder {
1022            file_type: Option<String>,
1023            location: Option<String>,
1024            table_partition_cols: Option<Vec<String>>,
1025            order_exprs: Vec<LexOrdering>,
1026            options: Option<Vec<(String, Value)>>,
1027        }
1028        let mut builder = Builder::default();
1029
1030        loop {
1031            if let Some(keyword) = self.parser.parse_one_of_keywords(&[
1032                Keyword::STORED,
1033                Keyword::LOCATION,
1034                Keyword::WITH,
1035                Keyword::DELIMITER,
1036                Keyword::COMPRESSION,
1037                Keyword::PARTITIONED,
1038                Keyword::OPTIONS,
1039            ]) {
1040                match keyword {
1041                    Keyword::STORED => {
1042                        self.parser.expect_keyword(Keyword::AS)?;
1043                        ensure_not_set(&builder.file_type, "STORED AS")?;
1044                        builder.file_type = Some(self.parse_file_format()?);
1045                    }
1046                    Keyword::LOCATION => {
1047                        ensure_not_set(&builder.location, "LOCATION")?;
1048                        builder.location = Some(self.parser.parse_literal_string()?);
1049                    }
1050                    Keyword::WITH => {
1051                        if self.parser.parse_keyword(Keyword::ORDER) {
1052                            builder.order_exprs.push(self.parse_order_by_exprs()?);
1053                        } else {
1054                            self.parser.expect_keyword(Keyword::HEADER)?;
1055                            self.parser.expect_keyword(Keyword::ROW)?;
1056                            return parser_err!(
1057                                "WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS (format.has_header true)"
1058                            )?;
1059                        }
1060                    }
1061                    Keyword::DELIMITER => {
1062                        return parser_err!(
1063                            "DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS (format.delimiter ',')"
1064                        )?;
1065                    }
1066                    Keyword::COMPRESSION => {
1067                        self.parser.expect_keyword(Keyword::TYPE)?;
1068                        return parser_err!(
1069                            "COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS (format.compression gzip)"
1070                        )?;
1071                    }
1072                    Keyword::PARTITIONED => {
1073                        self.parser.expect_keyword(Keyword::BY)?;
1074                        ensure_not_set(&builder.table_partition_cols, "PARTITIONED BY")?;
1075                        // Expects either list of column names (col_name [, col_name]*)
1076                        // or list of column definitions (col_name datatype [, col_name datatype]* )
1077                        // use the token after the name to decide which parsing rule to use
1078                        // Note that mixing both names and definitions is not allowed
1079                        let peeked = self.parser.peek_nth_token(2);
1080                        if peeked == Token::Comma || peeked == Token::RParen {
1081                            // List of column names
1082                            builder.table_partition_cols = Some(self.parse_partitions()?)
1083                        } else {
1084                            // List of column defs
1085                            let (cols, cons) = self.parse_columns()?;
1086                            builder.table_partition_cols = Some(
1087                                cols.iter().map(|col| col.name.to_string()).collect(),
1088                            );
1089
1090                            columns.extend(cols);
1091
1092                            if !cons.is_empty() {
1093                                return sql_err!(ParserError::ParserError(
1094                                    "Constraints on Partition Columns are not supported"
1095                                        .to_string(),
1096                                ));
1097                            }
1098                        }
1099                    }
1100                    Keyword::OPTIONS => {
1101                        ensure_not_set(&builder.options, "OPTIONS")?;
1102                        builder.options = Some(self.parse_value_options()?);
1103                    }
1104                    _ => {
1105                        unreachable!()
1106                    }
1107                }
1108            } else {
1109                let token = self.parser.peek_token();
1110                if token == Token::EOF || token == Token::SemiColon {
1111                    break;
1112                } else {
1113                    return self.expected("end of statement or ;", &token)?;
1114                }
1115            }
1116        }
1117
1118        // Validations: location and file_type are required
1119        if builder.file_type.is_none() {
1120            return sql_err!(ParserError::ParserError(
1121                "Missing STORED AS clause in CREATE EXTERNAL TABLE statement".into(),
1122            ));
1123        }
1124        if builder.location.is_none() {
1125            return sql_err!(ParserError::ParserError(
1126                "Missing LOCATION clause in CREATE EXTERNAL TABLE statement".into(),
1127            ));
1128        }
1129
1130        let create = CreateExternalTable {
1131            name: table_name,
1132            columns,
1133            file_type: builder.file_type.unwrap(),
1134            location: builder.location.unwrap(),
1135            table_partition_cols: builder.table_partition_cols.unwrap_or(vec![]),
1136            order_exprs: builder.order_exprs,
1137            if_not_exists,
1138            or_replace,
1139            temporary,
1140            unbounded,
1141            options: builder.options.unwrap_or(Vec::new()),
1142            constraints,
1143        };
1144        Ok(Statement::CreateExternalTable(create))
1145    }
1146
1147    /// Parses the set of valid formats
1148    fn parse_file_format(&mut self) -> Result<String, DataFusionError> {
1149        let token = self.parser.next_token();
1150        match &token.token {
1151            Token::Word(w) => parse_file_type(&w.value),
1152            _ => self.expected("one of ARROW, PARQUET, NDJSON, or CSV", &token),
1153        }
1154    }
1155
1156    /// Parses (key value) style options into a map of String --> [`Value`].
1157    ///
1158    /// This method supports keywords as key names as well as multiple
1159    /// value types such as Numbers as well as Strings.
1160    fn parse_value_options(&mut self) -> Result<Vec<(String, Value)>, DataFusionError> {
1161        let mut options = vec![];
1162        self.parser.expect_token(&Token::LParen)?;
1163
1164        loop {
1165            let key = self.parse_option_key()?;
1166            let value = self.parse_option_value()?;
1167            options.push((key, value));
1168            let comma = self.parser.consume_token(&Token::Comma);
1169            if self.parser.consume_token(&Token::RParen) {
1170                // Allow a trailing comma, even though it's not in standard
1171                break;
1172            } else if !comma {
1173                return self.expected(
1174                    "',' or ')' after option definition",
1175                    &self.parser.peek_token(),
1176                );
1177            }
1178        }
1179        Ok(options)
1180    }
1181}
1182
1183#[cfg(test)]
1184mod tests {
1185    use super::*;
1186    use datafusion_common::assert_contains;
1187    use sqlparser::ast::Expr::Identifier;
1188    use sqlparser::ast::{
1189        BinaryOperator, DataType, ExactNumberInfo, Expr, Ident, ValueWithSpan,
1190    };
1191    use sqlparser::dialect::SnowflakeDialect;
1192    use sqlparser::tokenizer::{Location, Span, Whitespace};
1193
1194    fn expect_parse_ok(sql: &str, expected: Statement) -> Result<(), DataFusionError> {
1195        let statements = DFParser::parse_sql(sql)?;
1196        assert_eq!(
1197            statements.len(),
1198            1,
1199            "Expected to parse exactly one statement"
1200        );
1201        assert_eq!(statements[0], expected, "actual:\n{:#?}", statements[0]);
1202        Ok(())
1203    }
1204
1205    /// Parses sql and asserts that the expected error message was found
1206    fn expect_parse_error(sql: &str, expected_error: &str) {
1207        match DFParser::parse_sql(sql) {
1208            Ok(statements) => {
1209                panic!(
1210                    "Expected parse error for '{sql}', but was successful: {statements:?}"
1211                );
1212            }
1213            Err(e) => {
1214                let error_message = e.to_string();
1215                assert!(
1216                    error_message.contains(expected_error),
1217                    "Expected error '{expected_error}' not found in actual error '{error_message}'"
1218                );
1219            }
1220        }
1221    }
1222
1223    fn make_column_def(name: impl Into<String>, data_type: DataType) -> ColumnDef {
1224        ColumnDef {
1225            name: Ident {
1226                value: name.into(),
1227                quote_style: None,
1228                span: Span::empty(),
1229            },
1230            data_type,
1231            options: vec![],
1232        }
1233    }
1234
1235    #[test]
1236    fn create_external_table() -> Result<(), DataFusionError> {
1237        // positive case
1238        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'";
1239        let display = None;
1240        let name = ObjectName::from(vec![Ident::from("t")]);
1241        let expected = Statement::CreateExternalTable(CreateExternalTable {
1242            name: name.clone(),
1243            columns: vec![make_column_def("c1", DataType::Int(display))],
1244            file_type: "CSV".to_string(),
1245            location: "foo.csv".into(),
1246            table_partition_cols: vec![],
1247            order_exprs: vec![],
1248            if_not_exists: false,
1249            or_replace: false,
1250            temporary: false,
1251            unbounded: false,
1252            options: vec![],
1253            constraints: vec![],
1254        });
1255        expect_parse_ok(sql, expected)?;
1256
1257        // positive case: leading space
1258        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'     ";
1259        let expected = Statement::CreateExternalTable(CreateExternalTable {
1260            name: name.clone(),
1261            columns: vec![make_column_def("c1", DataType::Int(None))],
1262            file_type: "CSV".to_string(),
1263            location: "foo.csv".into(),
1264            table_partition_cols: vec![],
1265            order_exprs: vec![],
1266            if_not_exists: false,
1267            or_replace: false,
1268            temporary: false,
1269            unbounded: false,
1270            options: vec![],
1271            constraints: vec![],
1272        });
1273        expect_parse_ok(sql, expected)?;
1274
1275        // positive case: leading space + semicolon
1276        let sql =
1277            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'      ;";
1278        let expected = Statement::CreateExternalTable(CreateExternalTable {
1279            name: name.clone(),
1280            columns: vec![make_column_def("c1", DataType::Int(None))],
1281            file_type: "CSV".to_string(),
1282            location: "foo.csv".into(),
1283            table_partition_cols: vec![],
1284            order_exprs: vec![],
1285            if_not_exists: false,
1286            or_replace: false,
1287            temporary: false,
1288            unbounded: false,
1289            options: vec![],
1290            constraints: vec![],
1291        });
1292        expect_parse_ok(sql, expected)?;
1293
1294        // positive case with delimiter
1295        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS (format.delimiter '|')";
1296        let display = None;
1297        let expected = Statement::CreateExternalTable(CreateExternalTable {
1298            name: name.clone(),
1299            columns: vec![make_column_def("c1", DataType::Int(display))],
1300            file_type: "CSV".to_string(),
1301            location: "foo.csv".into(),
1302            table_partition_cols: vec![],
1303            order_exprs: vec![],
1304            if_not_exists: false,
1305            or_replace: false,
1306            temporary: false,
1307            unbounded: false,
1308            options: vec![(
1309                "format.delimiter".into(),
1310                Value::SingleQuotedString("|".into()),
1311            )],
1312            constraints: vec![],
1313        });
1314        expect_parse_ok(sql, expected)?;
1315
1316        // positive case: partitioned by
1317        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1, p2) LOCATION 'foo.csv'";
1318        let display = None;
1319        let expected = Statement::CreateExternalTable(CreateExternalTable {
1320            name: name.clone(),
1321            columns: vec![make_column_def("c1", DataType::Int(display))],
1322            file_type: "CSV".to_string(),
1323            location: "foo.csv".into(),
1324            table_partition_cols: vec!["p1".to_string(), "p2".to_string()],
1325            order_exprs: vec![],
1326            if_not_exists: false,
1327            or_replace: false,
1328            temporary: false,
1329            unbounded: false,
1330            options: vec![],
1331            constraints: vec![],
1332        });
1333        expect_parse_ok(sql, expected)?;
1334
1335        // positive case: it is ok for sql stmt with `COMPRESSION TYPE GZIP` tokens
1336        let sqls =
1337            vec![
1338             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1339             ('format.compression' 'GZIP')", "GZIP"),
1340             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1341             ('format.compression' 'BZIP2')", "BZIP2"),
1342             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1343             ('format.compression' 'XZ')", "XZ"),
1344             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1345             ('format.compression' 'ZSTD')", "ZSTD"),
1346         ];
1347        for (sql, compression) in sqls {
1348            let expected = Statement::CreateExternalTable(CreateExternalTable {
1349                name: name.clone(),
1350                columns: vec![make_column_def("c1", DataType::Int(display))],
1351                file_type: "CSV".to_string(),
1352                location: "foo.csv".into(),
1353                table_partition_cols: vec![],
1354                order_exprs: vec![],
1355                if_not_exists: false,
1356                or_replace: false,
1357                temporary: false,
1358                unbounded: false,
1359                options: vec![(
1360                    "format.compression".into(),
1361                    Value::SingleQuotedString(compression.into()),
1362                )],
1363                constraints: vec![],
1364            });
1365            expect_parse_ok(sql, expected)?;
1366        }
1367
1368        // positive case: it is ok for parquet files not to have columns specified
1369        let sql = "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'";
1370        let expected = Statement::CreateExternalTable(CreateExternalTable {
1371            name: name.clone(),
1372            columns: vec![],
1373            file_type: "PARQUET".to_string(),
1374            location: "foo.parquet".into(),
1375            table_partition_cols: vec![],
1376            order_exprs: vec![],
1377            if_not_exists: false,
1378            or_replace: false,
1379            temporary: false,
1380            unbounded: false,
1381            options: vec![],
1382            constraints: vec![],
1383        });
1384        expect_parse_ok(sql, expected)?;
1385
1386        // positive case: it is ok for parquet files to be other than upper case
1387        let sql = "CREATE EXTERNAL TABLE t STORED AS parqueT LOCATION 'foo.parquet'";
1388        let expected = Statement::CreateExternalTable(CreateExternalTable {
1389            name: name.clone(),
1390            columns: vec![],
1391            file_type: "PARQUET".to_string(),
1392            location: "foo.parquet".into(),
1393            table_partition_cols: vec![],
1394            order_exprs: vec![],
1395            if_not_exists: false,
1396            or_replace: false,
1397            temporary: false,
1398            unbounded: false,
1399            options: vec![],
1400            constraints: vec![],
1401        });
1402        expect_parse_ok(sql, expected)?;
1403
1404        // positive case: it is ok for avro files not to have columns specified
1405        let sql = "CREATE EXTERNAL TABLE t STORED AS AVRO LOCATION 'foo.avro'";
1406        let expected = Statement::CreateExternalTable(CreateExternalTable {
1407            name: name.clone(),
1408            columns: vec![],
1409            file_type: "AVRO".to_string(),
1410            location: "foo.avro".into(),
1411            table_partition_cols: vec![],
1412            order_exprs: vec![],
1413            if_not_exists: false,
1414            or_replace: false,
1415            temporary: false,
1416            unbounded: false,
1417            options: vec![],
1418            constraints: vec![],
1419        });
1420        expect_parse_ok(sql, expected)?;
1421
1422        // positive case: it is ok for avro files not to have columns specified
1423        let sql = "CREATE EXTERNAL TABLE IF NOT EXISTS t STORED AS PARQUET LOCATION 'foo.parquet'";
1424        let expected = Statement::CreateExternalTable(CreateExternalTable {
1425            name: name.clone(),
1426            columns: vec![],
1427            file_type: "PARQUET".to_string(),
1428            location: "foo.parquet".into(),
1429            table_partition_cols: vec![],
1430            order_exprs: vec![],
1431            if_not_exists: true,
1432            or_replace: false,
1433            temporary: false,
1434            unbounded: false,
1435            options: vec![],
1436            constraints: vec![],
1437        });
1438        expect_parse_ok(sql, expected)?;
1439
1440        // positive case: or replace
1441        let sql =
1442            "CREATE OR REPLACE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'";
1443        let expected = Statement::CreateExternalTable(CreateExternalTable {
1444            name: name.clone(),
1445            columns: vec![],
1446            file_type: "PARQUET".to_string(),
1447            location: "foo.parquet".into(),
1448            table_partition_cols: vec![],
1449            order_exprs: vec![],
1450            if_not_exists: false,
1451            or_replace: true,
1452            temporary: false,
1453            unbounded: false,
1454            options: vec![],
1455            constraints: vec![],
1456        });
1457        expect_parse_ok(sql, expected)?;
1458
1459        // positive case: column definition allowed in 'partition by' clause
1460        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int) LOCATION 'foo.csv'";
1461        let expected = Statement::CreateExternalTable(CreateExternalTable {
1462            name: name.clone(),
1463            columns: vec![
1464                make_column_def("c1", DataType::Int(None)),
1465                make_column_def("p1", DataType::Int(None)),
1466            ],
1467            file_type: "CSV".to_string(),
1468            location: "foo.csv".into(),
1469            table_partition_cols: vec!["p1".to_string()],
1470            order_exprs: vec![],
1471            if_not_exists: false,
1472            or_replace: false,
1473            temporary: false,
1474            unbounded: false,
1475            options: vec![],
1476            constraints: vec![],
1477        });
1478        expect_parse_ok(sql, expected)?;
1479
1480        // negative case: mixed column defs and column names in `PARTITIONED BY` clause
1481        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int, c1) LOCATION 'foo.csv'";
1482        expect_parse_error(
1483            sql,
1484            "SQL error: ParserError(\"Expected: a data type name, found: ) at Line: 1, Column: 73\")",
1485        );
1486
1487        // negative case: mixed column defs and column names in `PARTITIONED BY` clause
1488        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (c1, p1 int) LOCATION 'foo.csv'";
1489        expect_parse_error(
1490            sql,
1491            "SQL error: ParserError(\"Expected: ',' or ')' after partition definition, found: int at Line: 1, Column: 70\")",
1492        );
1493
1494        // positive case: additional options (one entry) can be specified
1495        let sql =
1496            "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1') LOCATION 'blahblah'";
1497        let expected = Statement::CreateExternalTable(CreateExternalTable {
1498            name: name.clone(),
1499            columns: vec![],
1500            file_type: "X".to_string(),
1501            location: "blahblah".into(),
1502            table_partition_cols: vec![],
1503            order_exprs: vec![],
1504            if_not_exists: false,
1505            or_replace: false,
1506            temporary: false,
1507            unbounded: false,
1508            options: vec![("k1".into(), Value::SingleQuotedString("v1".into()))],
1509            constraints: vec![],
1510        });
1511        expect_parse_ok(sql, expected)?;
1512
1513        // positive case: additional options (multiple entries) can be specified
1514        let sql = "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2) LOCATION 'blahblah'";
1515        let expected = Statement::CreateExternalTable(CreateExternalTable {
1516            name: name.clone(),
1517            columns: vec![],
1518            file_type: "X".to_string(),
1519            location: "blahblah".into(),
1520            table_partition_cols: vec![],
1521            order_exprs: vec![],
1522            if_not_exists: false,
1523            or_replace: false,
1524            temporary: false,
1525            unbounded: false,
1526            options: vec![
1527                ("k1".into(), Value::SingleQuotedString("v1".into())),
1528                ("k2".into(), Value::SingleQuotedString("v2".into())),
1529            ],
1530            constraints: vec![],
1531        });
1532        expect_parse_ok(sql, expected)?;
1533
1534        // Ordered Col
1535        let sqls = [
1536            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1) LOCATION 'foo.csv'",
1537            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS FIRST) LOCATION 'foo.csv'",
1538            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS LAST) LOCATION 'foo.csv'",
1539            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC) LOCATION 'foo.csv'",
1540            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC) LOCATION 'foo.csv'",
1541            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS FIRST) LOCATION 'foo.csv'",
1542            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS LAST) LOCATION 'foo.csv'",
1543            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS FIRST) LOCATION 'foo.csv'",
1544            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS LAST) LOCATION 'foo.csv'",
1545        ];
1546        let expected = vec![
1547            (None, None),
1548            (None, Some(true)),
1549            (None, Some(false)),
1550            (Some(true), None),
1551            (Some(false), None),
1552            (Some(false), Some(true)),
1553            (Some(false), Some(false)),
1554            (Some(true), Some(true)),
1555            (Some(true), Some(false)),
1556        ];
1557        for (sql, (asc, nulls_first)) in sqls.iter().zip(expected.into_iter()) {
1558            let expected = Statement::CreateExternalTable(CreateExternalTable {
1559                name: name.clone(),
1560                columns: vec![make_column_def("c1", DataType::Int(None))],
1561                file_type: "CSV".to_string(),
1562                location: "foo.csv".into(),
1563                table_partition_cols: vec![],
1564                order_exprs: vec![vec![OrderByExpr {
1565                    expr: Identifier(Ident {
1566                        value: "c1".to_owned(),
1567                        quote_style: None,
1568                        span: Span::empty(),
1569                    }),
1570                    options: OrderByOptions { asc, nulls_first },
1571                    with_fill: None,
1572                }]],
1573                if_not_exists: false,
1574                or_replace: false,
1575                temporary: false,
1576                unbounded: false,
1577                options: vec![],
1578                constraints: vec![],
1579            });
1580            expect_parse_ok(sql, expected)?;
1581        }
1582
1583        // Ordered Col
1584        let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 ASC, c2 DESC NULLS FIRST) LOCATION 'foo.csv'";
1585        let display = None;
1586        let expected = Statement::CreateExternalTable(CreateExternalTable {
1587            name: name.clone(),
1588            columns: vec![
1589                make_column_def("c1", DataType::Int(display)),
1590                make_column_def("c2", DataType::Int(display)),
1591            ],
1592            file_type: "CSV".to_string(),
1593            location: "foo.csv".into(),
1594            table_partition_cols: vec![],
1595            order_exprs: vec![vec![
1596                OrderByExpr {
1597                    expr: Identifier(Ident {
1598                        value: "c1".to_owned(),
1599                        quote_style: None,
1600                        span: Span::empty(),
1601                    }),
1602                    options: OrderByOptions {
1603                        asc: Some(true),
1604                        nulls_first: None,
1605                    },
1606                    with_fill: None,
1607                },
1608                OrderByExpr {
1609                    expr: Identifier(Ident {
1610                        value: "c2".to_owned(),
1611                        quote_style: None,
1612                        span: Span::empty(),
1613                    }),
1614                    options: OrderByOptions {
1615                        asc: Some(false),
1616                        nulls_first: Some(true),
1617                    },
1618                    with_fill: None,
1619                },
1620            ]],
1621            if_not_exists: false,
1622            or_replace: false,
1623            temporary: false,
1624            unbounded: false,
1625            options: vec![],
1626            constraints: vec![],
1627        });
1628        expect_parse_ok(sql, expected)?;
1629
1630        // Ordered Binary op
1631        let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 - c2 ASC) LOCATION 'foo.csv'";
1632        let display = None;
1633        let expected = Statement::CreateExternalTable(CreateExternalTable {
1634            name: name.clone(),
1635            columns: vec![
1636                make_column_def("c1", DataType::Int(display)),
1637                make_column_def("c2", DataType::Int(display)),
1638            ],
1639            file_type: "CSV".to_string(),
1640            location: "foo.csv".into(),
1641            table_partition_cols: vec![],
1642            order_exprs: vec![vec![OrderByExpr {
1643                expr: Expr::BinaryOp {
1644                    left: Box::new(Identifier(Ident {
1645                        value: "c1".to_owned(),
1646                        quote_style: None,
1647                        span: Span::empty(),
1648                    })),
1649                    op: BinaryOperator::Minus,
1650                    right: Box::new(Identifier(Ident {
1651                        value: "c2".to_owned(),
1652                        quote_style: None,
1653                        span: Span::empty(),
1654                    })),
1655                },
1656                options: OrderByOptions {
1657                    asc: Some(true),
1658                    nulls_first: None,
1659                },
1660                with_fill: None,
1661            }]],
1662            if_not_exists: false,
1663            or_replace: false,
1664            temporary: false,
1665            unbounded: false,
1666            options: vec![],
1667            constraints: vec![],
1668        });
1669        expect_parse_ok(sql, expected)?;
1670
1671        // Most complete CREATE EXTERNAL TABLE statement possible (using IF NOT EXISTS)
1672        let sql = "
1673            CREATE UNBOUNDED EXTERNAL TABLE IF NOT EXISTS t (c1 int, c2 float)
1674            STORED AS PARQUET
1675            WITH ORDER (c1 - c2 ASC)
1676            PARTITIONED BY (c1)
1677            LOCATION 'foo.parquet'
1678            OPTIONS ('format.compression' 'zstd',
1679                     'format.delimiter' '*',
1680                     'ROW_GROUP_SIZE' '1024',
1681                     'TRUNCATE' 'NO',
1682                     'format.has_header' 'true')";
1683        let expected = Statement::CreateExternalTable(CreateExternalTable {
1684            name: name.clone(),
1685            columns: vec![
1686                make_column_def("c1", DataType::Int(None)),
1687                make_column_def("c2", DataType::Float(ExactNumberInfo::None)),
1688            ],
1689            file_type: "PARQUET".to_string(),
1690            location: "foo.parquet".into(),
1691            table_partition_cols: vec!["c1".into()],
1692            order_exprs: vec![vec![OrderByExpr {
1693                expr: Expr::BinaryOp {
1694                    left: Box::new(Identifier(Ident {
1695                        value: "c1".to_owned(),
1696                        quote_style: None,
1697                        span: Span::empty(),
1698                    })),
1699                    op: BinaryOperator::Minus,
1700                    right: Box::new(Identifier(Ident {
1701                        value: "c2".to_owned(),
1702                        quote_style: None,
1703                        span: Span::empty(),
1704                    })),
1705                },
1706                options: OrderByOptions {
1707                    asc: Some(true),
1708                    nulls_first: None,
1709                },
1710                with_fill: None,
1711            }]],
1712            if_not_exists: true,
1713            or_replace: false,
1714            temporary: false,
1715            unbounded: true,
1716            options: vec![
1717                (
1718                    "format.compression".into(),
1719                    Value::SingleQuotedString("zstd".into()),
1720                ),
1721                (
1722                    "format.delimiter".into(),
1723                    Value::SingleQuotedString("*".into()),
1724                ),
1725                (
1726                    "ROW_GROUP_SIZE".into(),
1727                    Value::SingleQuotedString("1024".into()),
1728                ),
1729                ("TRUNCATE".into(), Value::SingleQuotedString("NO".into())),
1730                (
1731                    "format.has_header".into(),
1732                    Value::SingleQuotedString("true".into()),
1733                ),
1734            ],
1735            constraints: vec![],
1736        });
1737        expect_parse_ok(sql, expected)?;
1738
1739        // Most complete CREATE EXTERNAL TABLE statement possible (using OR REPLACE)
1740        let sql = "
1741            CREATE OR REPLACE UNBOUNDED EXTERNAL TABLE t (c1 int, c2 float)
1742            STORED AS PARQUET
1743            WITH ORDER (c1 - c2 ASC)
1744            PARTITIONED BY (c1)
1745            LOCATION 'foo.parquet'
1746            OPTIONS ('format.compression' 'zstd',
1747                     'format.delimiter' '*',
1748                     'ROW_GROUP_SIZE' '1024',
1749                     'TRUNCATE' 'NO',
1750                     'format.has_header' 'true')";
1751        let expected = Statement::CreateExternalTable(CreateExternalTable {
1752            name: name.clone(),
1753            columns: vec![
1754                make_column_def("c1", DataType::Int(None)),
1755                make_column_def("c2", DataType::Float(ExactNumberInfo::None)),
1756            ],
1757            file_type: "PARQUET".to_string(),
1758            location: "foo.parquet".into(),
1759            table_partition_cols: vec!["c1".into()],
1760            order_exprs: vec![vec![OrderByExpr {
1761                expr: Expr::BinaryOp {
1762                    left: Box::new(Identifier(Ident {
1763                        value: "c1".to_owned(),
1764                        quote_style: None,
1765                        span: Span::empty(),
1766                    })),
1767                    op: BinaryOperator::Minus,
1768                    right: Box::new(Identifier(Ident {
1769                        value: "c2".to_owned(),
1770                        quote_style: None,
1771                        span: Span::empty(),
1772                    })),
1773                },
1774                options: OrderByOptions {
1775                    asc: Some(true),
1776                    nulls_first: None,
1777                },
1778                with_fill: None,
1779            }]],
1780            if_not_exists: false,
1781            or_replace: true,
1782            temporary: false,
1783            unbounded: true,
1784            options: vec![
1785                (
1786                    "format.compression".into(),
1787                    Value::SingleQuotedString("zstd".into()),
1788                ),
1789                (
1790                    "format.delimiter".into(),
1791                    Value::SingleQuotedString("*".into()),
1792                ),
1793                (
1794                    "ROW_GROUP_SIZE".into(),
1795                    Value::SingleQuotedString("1024".into()),
1796                ),
1797                ("TRUNCATE".into(), Value::SingleQuotedString("NO".into())),
1798                (
1799                    "format.has_header".into(),
1800                    Value::SingleQuotedString("true".into()),
1801                ),
1802            ],
1803            constraints: vec![],
1804        });
1805        expect_parse_ok(sql, expected)?;
1806
1807        // For error cases, see: `create_external_table.slt`
1808
1809        Ok(())
1810    }
1811
1812    #[test]
1813    fn copy_to_table_to_table() -> Result<(), DataFusionError> {
1814        // positive case
1815        let sql = "COPY foo TO bar STORED AS CSV";
1816        let expected = Statement::CopyTo(CopyToStatement {
1817            source: object_name("foo"),
1818            target: "bar".to_string(),
1819            partitioned_by: vec![],
1820            stored_as: Some("CSV".to_owned()),
1821            options: vec![],
1822        });
1823
1824        assert_eq!(verified_stmt(sql), expected);
1825        Ok(())
1826    }
1827
1828    #[test]
1829    fn skip_copy_into_snowflake() -> Result<(), DataFusionError> {
1830        let sql = "COPY INTO foo FROM @~/staged FILE_FORMAT = (FORMAT_NAME = 'mycsv');";
1831        let dialect = Box::new(SnowflakeDialect);
1832        let statements = DFParser::parse_sql_with_dialect(sql, dialect.as_ref())?;
1833
1834        assert_eq!(
1835            statements.len(),
1836            1,
1837            "Expected to parse exactly one statement"
1838        );
1839        if let Statement::CopyTo(_) = &statements[0] {
1840            panic!("Expected non COPY TO statement, but was successful: {statements:?}");
1841        }
1842        Ok(())
1843    }
1844
1845    #[test]
1846    fn explain_copy_to_table_to_table() -> Result<(), DataFusionError> {
1847        let cases = vec![
1848            ("EXPLAIN COPY foo TO bar STORED AS PARQUET", false, false),
1849            (
1850                "EXPLAIN ANALYZE COPY foo TO bar STORED AS PARQUET",
1851                true,
1852                false,
1853            ),
1854            (
1855                "EXPLAIN VERBOSE COPY foo TO bar STORED AS PARQUET",
1856                false,
1857                true,
1858            ),
1859            (
1860                "EXPLAIN ANALYZE VERBOSE COPY foo TO bar STORED AS PARQUET",
1861                true,
1862                true,
1863            ),
1864        ];
1865        for (sql, analyze, verbose) in cases {
1866            println!("sql: {sql}, analyze: {analyze}, verbose: {verbose}");
1867
1868            let expected_copy = Statement::CopyTo(CopyToStatement {
1869                source: object_name("foo"),
1870                target: "bar".to_string(),
1871                partitioned_by: vec![],
1872                stored_as: Some("PARQUET".to_owned()),
1873                options: vec![],
1874            });
1875            let expected = Statement::Explain(ExplainStatement {
1876                analyze,
1877                verbose,
1878                format: None,
1879                statement: Box::new(expected_copy),
1880            });
1881            assert_eq!(verified_stmt(sql), expected);
1882        }
1883        Ok(())
1884    }
1885
1886    #[test]
1887    fn copy_to_query_to_table() -> Result<(), DataFusionError> {
1888        let statement = verified_stmt("SELECT 1");
1889
1890        // unwrap the various layers
1891        let statement = if let Statement::Statement(statement) = statement {
1892            *statement
1893        } else {
1894            panic!("Expected statement, got {statement:?}");
1895        };
1896
1897        let query = if let SQLStatement::Query(query) = statement {
1898            query
1899        } else {
1900            panic!("Expected query, got {statement:?}");
1901        };
1902
1903        let sql =
1904            "COPY (SELECT 1) TO bar STORED AS CSV OPTIONS ('format.has_header' 'true')";
1905        let expected = Statement::CopyTo(CopyToStatement {
1906            source: CopyToSource::Query(query),
1907            target: "bar".to_string(),
1908            partitioned_by: vec![],
1909            stored_as: Some("CSV".to_owned()),
1910            options: vec![(
1911                "format.has_header".into(),
1912                Value::SingleQuotedString("true".into()),
1913            )],
1914        });
1915        assert_eq!(verified_stmt(sql), expected);
1916        Ok(())
1917    }
1918
1919    #[test]
1920    fn copy_to_options() -> Result<(), DataFusionError> {
1921        let sql = "COPY foo TO bar STORED AS CSV OPTIONS ('row_group_size' '55')";
1922        let expected = Statement::CopyTo(CopyToStatement {
1923            source: object_name("foo"),
1924            target: "bar".to_string(),
1925            partitioned_by: vec![],
1926            stored_as: Some("CSV".to_owned()),
1927            options: vec![(
1928                "row_group_size".to_string(),
1929                Value::SingleQuotedString("55".to_string()),
1930            )],
1931        });
1932        assert_eq!(verified_stmt(sql), expected);
1933        Ok(())
1934    }
1935
1936    #[test]
1937    fn copy_to_partitioned_by() -> Result<(), DataFusionError> {
1938        let sql = "COPY foo TO bar STORED AS CSV PARTITIONED BY (a) OPTIONS ('row_group_size' '55')";
1939        let expected = Statement::CopyTo(CopyToStatement {
1940            source: object_name("foo"),
1941            target: "bar".to_string(),
1942            partitioned_by: vec!["a".to_string()],
1943            stored_as: Some("CSV".to_owned()),
1944            options: vec![(
1945                "row_group_size".to_string(),
1946                Value::SingleQuotedString("55".to_string()),
1947            )],
1948        });
1949        assert_eq!(verified_stmt(sql), expected);
1950        Ok(())
1951    }
1952
1953    #[test]
1954    fn copy_to_multi_options() -> Result<(), DataFusionError> {
1955        // order of options is preserved
1956        let sql = "COPY foo TO bar STORED AS parquet OPTIONS ('format.row_group_size' 55, 'format.compression' snappy, 'execution.keep_partition_by_columns' true)";
1957
1958        let expected_options = vec![
1959            (
1960                "format.row_group_size".to_string(),
1961                Value::Number("55".to_string(), false),
1962            ),
1963            (
1964                "format.compression".to_string(),
1965                Value::SingleQuotedString("snappy".to_string()),
1966            ),
1967            (
1968                "execution.keep_partition_by_columns".to_string(),
1969                Value::SingleQuotedString("true".to_string()),
1970            ),
1971        ];
1972
1973        let mut statements = DFParser::parse_sql(sql).unwrap();
1974        assert_eq!(statements.len(), 1);
1975        let only_statement = statements.pop_front().unwrap();
1976
1977        let options = if let Statement::CopyTo(copy_to) = only_statement {
1978            copy_to.options
1979        } else {
1980            panic!("Expected copy");
1981        };
1982
1983        assert_eq!(options, expected_options);
1984
1985        Ok(())
1986    }
1987
1988    // For error cases, see: `copy.slt`
1989
1990    fn object_name(name: &str) -> CopyToSource {
1991        CopyToSource::Relation(ObjectName::from(vec![Ident::new(name)]))
1992    }
1993
1994    // Based on  sqlparser-rs
1995    // https://github.com/sqlparser-rs/sqlparser-rs/blob/ae3b5844c839072c235965fe0d1bddc473dced87/src/test_utils.rs#L104-L116
1996
1997    /// Ensures that `sql` parses as a single [Statement]
1998    ///
1999    /// If `canonical` is non empty,this function additionally asserts
2000    /// that:
2001    ///
2002    /// 1. parsing `sql` results in the same [`Statement`] as parsing
2003    ///    `canonical`.
2004    ///
2005    /// 2. re-serializing the result of parsing `sql` produces the same
2006    ///    `canonical` sql string
2007    fn one_statement_parses_to(sql: &str, canonical: &str) -> Statement {
2008        let mut statements = DFParser::parse_sql(sql).unwrap();
2009        assert_eq!(statements.len(), 1);
2010
2011        if sql != canonical {
2012            assert_eq!(DFParser::parse_sql(canonical).unwrap(), statements);
2013        }
2014
2015        let only_statement = statements.pop_front().unwrap();
2016        assert_eq!(
2017            canonical.to_uppercase(),
2018            only_statement.to_string().to_uppercase()
2019        );
2020        only_statement
2021    }
2022
2023    /// Ensures that `sql` parses as a single [Statement], and that
2024    /// re-serializing the parse result produces the same `sql`
2025    /// string (is not modified after a serialization round-trip).
2026    fn verified_stmt(sql: &str) -> Statement {
2027        one_statement_parses_to(sql, sql)
2028    }
2029
2030    #[test]
2031    /// Checks the recursion limit works for sql queries
2032    /// Recursion can happen easily with binary exprs (i.e, AND or OR)
2033    fn test_recursion_limit() {
2034        let sql = "SELECT 1 OR 2";
2035
2036        // Expect parse to succeed
2037        DFParserBuilder::new(sql)
2038            .build()
2039            .unwrap()
2040            .parse_statements()
2041            .unwrap();
2042
2043        let err = DFParserBuilder::new(sql)
2044            .with_recursion_limit(1)
2045            .build()
2046            .unwrap()
2047            .parse_statements()
2048            .unwrap_err();
2049
2050        assert_contains!(
2051            err.to_string(),
2052            "SQL error: RecursionLimitExceeded (current limit: 1)"
2053        );
2054    }
2055
2056    #[test]
2057    fn test_multistatement() {
2058        let sql = "COPY foo TO bar STORED AS CSV; \
2059             CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'; \
2060             RESET var;";
2061        let statements = DFParser::parse_sql(sql).unwrap();
2062        assert_eq!(
2063            statements,
2064            vec![
2065                Statement::CopyTo(CopyToStatement {
2066                    source: object_name("foo"),
2067                    target: "bar".to_string(),
2068                    partitioned_by: vec![],
2069                    stored_as: Some("CSV".to_owned()),
2070                    options: vec![],
2071                }),
2072                {
2073                    let name = ObjectName::from(vec![Ident::from("t")]);
2074                    let display = None;
2075                    Statement::CreateExternalTable(CreateExternalTable {
2076                        name: name.clone(),
2077                        columns: vec![make_column_def("c1", DataType::Int(display))],
2078                        file_type: "CSV".to_string(),
2079                        location: "foo.csv".into(),
2080                        table_partition_cols: vec![],
2081                        order_exprs: vec![],
2082                        if_not_exists: false,
2083                        or_replace: false,
2084                        temporary: false,
2085                        unbounded: false,
2086                        options: vec![],
2087                        constraints: vec![],
2088                    })
2089                },
2090                {
2091                    let name = ObjectName::from(vec![Ident::from("var")]);
2092                    Statement::Reset(ResetStatement::Variable(name))
2093                }
2094            ]
2095        );
2096    }
2097
2098    #[test]
2099    fn test_custom_tokens() {
2100        // Span mock.
2101        let span = Span {
2102            start: Location { line: 0, column: 0 },
2103            end: Location { line: 0, column: 0 },
2104        };
2105        let tokens = vec![
2106            TokenWithSpan {
2107                token: Token::make_keyword("SELECT"),
2108                span,
2109            },
2110            TokenWithSpan {
2111                token: Token::Whitespace(Whitespace::Space),
2112                span,
2113            },
2114            TokenWithSpan {
2115                token: Token::Placeholder("1".to_string()),
2116                span,
2117            },
2118        ];
2119
2120        let statements = DFParserBuilder::new(tokens)
2121            .build()
2122            .unwrap()
2123            .parse_statements()
2124            .unwrap();
2125        assert_eq!(statements.len(), 1);
2126    }
2127
2128    fn expect_parse_expr_ok(sql: &str, expected: ExprWithAlias) {
2129        let expr = DFParser::parse_sql_into_expr(sql).unwrap();
2130        assert_eq!(expr, expected, "actual:\n{expr:#?}");
2131    }
2132
2133    /// Parses sql and asserts that the expected error message was found
2134    fn expect_parse_expr_error(sql: &str, expected_error: &str) {
2135        match DFParser::parse_sql_into_expr(sql) {
2136            Ok(expr) => {
2137                panic!("Expected parse error for '{sql}', but was successful: {expr:#?}");
2138            }
2139            Err(e) => {
2140                let error_message = e.to_string();
2141                assert!(
2142                    error_message.contains(expected_error),
2143                    "Expected error '{expected_error}' not found in actual error '{error_message}'"
2144                );
2145            }
2146        }
2147    }
2148
2149    #[test]
2150    fn literal() {
2151        expect_parse_expr_ok(
2152            "1234",
2153            ExprWithAlias {
2154                expr: Expr::Value(ValueWithSpan::from(Value::Number(
2155                    "1234".to_string(),
2156                    false,
2157                ))),
2158                alias: None,
2159            },
2160        )
2161    }
2162
2163    #[test]
2164    fn literal_with_alias() {
2165        expect_parse_expr_ok(
2166            "1234 as foo",
2167            ExprWithAlias {
2168                expr: Expr::Value(ValueWithSpan::from(Value::Number(
2169                    "1234".to_string(),
2170                    false,
2171                ))),
2172                alias: Some(Ident::from("foo")),
2173            },
2174        )
2175    }
2176
2177    #[test]
2178    fn literal_with_alias_and_trailing_tokens() {
2179        expect_parse_expr_error(
2180            "1234 as foo.bar",
2181            "Expected: end of expression, found: .",
2182        )
2183    }
2184
2185    #[test]
2186    fn literal_with_alias_and_trailing_whitespace() {
2187        expect_parse_expr_ok(
2188            "1234 as foo   ",
2189            ExprWithAlias {
2190                expr: Expr::Value(ValueWithSpan::from(Value::Number(
2191                    "1234".to_string(),
2192                    false,
2193                ))),
2194                alias: Some(Ident::from("foo")),
2195            },
2196        )
2197    }
2198
2199    #[test]
2200    fn literal_with_alias_and_trailing_whitespace_and_token() {
2201        expect_parse_expr_error(
2202            "1234 as foo    bar",
2203            "Expected: end of expression, found: bar",
2204        )
2205    }
2206}