datafusion_sql/
parser.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`DFParser`]: DataFusion SQL Parser based on [`sqlparser`]
19//!
20//! This parser implements DataFusion specific statements such as
21//! `CREATE EXTERNAL TABLE`
22
23use datafusion_common::DataFusionError;
24use datafusion_common::config::SqlParserOptions;
25use datafusion_common::{Diagnostic, Span, sql_err};
26use sqlparser::ast::{ExprWithAlias, Ident, OrderByOptions};
27use sqlparser::tokenizer::TokenWithSpan;
28use sqlparser::{
29    ast::{
30        ColumnDef, ColumnOptionDef, ObjectName, OrderByExpr, Query,
31        Statement as SQLStatement, TableConstraint, Value,
32    },
33    dialect::{Dialect, GenericDialect, keywords::Keyword},
34    parser::{Parser, ParserError},
35    tokenizer::{Token, Tokenizer, Word},
36};
37use std::collections::VecDeque;
38use std::fmt;
39
40// Use `Parser::expected` instead, if possible
41macro_rules! parser_err {
42    ($MSG:expr $(; diagnostic = $DIAG:expr)?) => {{
43
44        let err = DataFusionError::from(ParserError::ParserError($MSG.to_string()));
45        $(
46            let err = err.with_diagnostic($DIAG);
47        )?
48        Err(err)
49    }};
50}
51
52fn parse_file_type(s: &str) -> Result<String, DataFusionError> {
53    Ok(s.to_uppercase())
54}
55
56/// DataFusion specific `EXPLAIN`
57///
58/// Syntax:
59/// ```sql
60/// EXPLAIN <ANALYZE> <VERBOSE> [FORMAT format] statement
61/// ```
62#[derive(Debug, Clone, PartialEq, Eq)]
63pub struct ExplainStatement {
64    /// `EXPLAIN ANALYZE ..`
65    pub analyze: bool,
66    /// `EXPLAIN .. VERBOSE ..`
67    pub verbose: bool,
68    /// `EXPLAIN .. FORMAT `
69    pub format: Option<String>,
70    /// The statement to analyze. Note this is a DataFusion [`Statement`] (not a
71    /// [`sqlparser::ast::Statement`] so that we can use `EXPLAIN`, `COPY`, and other
72    /// DataFusion specific statements
73    pub statement: Box<Statement>,
74}
75
76impl fmt::Display for ExplainStatement {
77    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
78        let Self {
79            analyze,
80            verbose,
81            format,
82            statement,
83        } = self;
84
85        write!(f, "EXPLAIN ")?;
86        if *analyze {
87            write!(f, "ANALYZE ")?;
88        }
89        if *verbose {
90            write!(f, "VERBOSE ")?;
91        }
92        if let Some(format) = format.as_ref() {
93            write!(f, "FORMAT {format} ")?;
94        }
95
96        write!(f, "{statement}")
97    }
98}
99
100/// DataFusion extension DDL for `COPY`
101///
102/// # Syntax:
103///
104/// ```text
105/// COPY <table_name | (<query>)>
106/// TO
107/// <destination_url>
108/// (key_value_list)
109/// ```
110///
111/// # Examples
112///
113/// ```sql
114/// COPY lineitem  TO 'lineitem'
115/// STORED AS PARQUET (
116///   partitions 16,
117///   row_group_limit_rows 100000,
118///   row_group_limit_bytes 200000
119/// )
120///
121/// COPY (SELECT l_orderkey from lineitem) to 'lineitem.parquet';
122/// ```
123#[derive(Debug, Clone, PartialEq, Eq)]
124pub struct CopyToStatement {
125    /// From where the data comes from
126    pub source: CopyToSource,
127    /// The URL to where the data is heading
128    pub target: String,
129    /// Partition keys
130    pub partitioned_by: Vec<String>,
131    /// File type (Parquet, NDJSON, CSV etc.)
132    pub stored_as: Option<String>,
133    /// Target specific options
134    pub options: Vec<(String, Value)>,
135}
136
137impl fmt::Display for CopyToStatement {
138    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
139        let Self {
140            source,
141            target,
142            partitioned_by,
143            stored_as,
144            options,
145            ..
146        } = self;
147
148        write!(f, "COPY {source} TO {target}")?;
149        if let Some(file_type) = stored_as {
150            write!(f, " STORED AS {file_type}")?;
151        }
152        if !partitioned_by.is_empty() {
153            write!(f, " PARTITIONED BY ({})", partitioned_by.join(", "))?;
154        }
155
156        if !options.is_empty() {
157            let opts: Vec<_> =
158                options.iter().map(|(k, v)| format!("'{k}' {v}")).collect();
159            write!(f, " OPTIONS ({})", opts.join(", "))?;
160        }
161
162        Ok(())
163    }
164}
165
166#[derive(Debug, Clone, PartialEq, Eq)]
167pub enum CopyToSource {
168    /// `COPY <table> TO ...`
169    Relation(ObjectName),
170    /// COPY (...query...) TO ...
171    Query(Box<Query>),
172}
173
174impl fmt::Display for CopyToSource {
175    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
176        match self {
177            CopyToSource::Relation(r) => write!(f, "{r}"),
178            CopyToSource::Query(q) => write!(f, "({q})"),
179        }
180    }
181}
182
183/// This type defines a lexicographical ordering.
184pub(crate) type LexOrdering = Vec<OrderByExpr>;
185
186/// DataFusion extension DDL for `CREATE EXTERNAL TABLE`
187///
188/// Syntax:
189///
190/// ```text
191/// CREATE
192/// [ OR REPLACE ]
193/// EXTERNAL TABLE
194/// [ IF NOT EXISTS ]
195/// <TABLE_NAME>[ (<column_definition>) ]
196/// STORED AS <file_type>
197/// [ PARTITIONED BY (<column_definition list> | <column list>) ]
198/// [ WITH ORDER (<ordered column list>)
199/// [ OPTIONS (<key_value_list>) ]
200/// LOCATION <literal>
201///
202/// <column_definition> := (<column_name> <data_type>, ...)
203///
204/// <column_list> := (<column_name>, ...)
205///
206/// <ordered_column_list> := (<column_name> <sort_clause>, ...)
207///
208/// <key_value_list> := (<literal> <literal, <literal> <literal>, ...)
209/// ```
210#[derive(Debug, Clone, PartialEq, Eq)]
211pub struct CreateExternalTable {
212    /// Table name
213    pub name: ObjectName,
214    /// Optional schema
215    pub columns: Vec<ColumnDef>,
216    /// File type (Parquet, NDJSON, CSV, etc)
217    pub file_type: String,
218    /// Path to file
219    pub location: String,
220    /// Partition Columns
221    pub table_partition_cols: Vec<String>,
222    /// Ordered expressions
223    pub order_exprs: Vec<LexOrdering>,
224    /// Option to not error if table already exists
225    pub if_not_exists: bool,
226    /// Option to replace table content if table already exists
227    pub or_replace: bool,
228    /// Whether the table is a temporary table
229    pub temporary: bool,
230    /// Infinite streams?
231    pub unbounded: bool,
232    /// Table(provider) specific options
233    pub options: Vec<(String, Value)>,
234    /// A table-level constraint
235    pub constraints: Vec<TableConstraint>,
236}
237
238impl fmt::Display for CreateExternalTable {
239    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
240        write!(f, "CREATE EXTERNAL TABLE ")?;
241        if self.if_not_exists {
242            write!(f, "IF NOT EXISTS ")?;
243        }
244        write!(f, "{} ", self.name)?;
245        write!(f, "STORED AS {} ", self.file_type)?;
246        if !self.order_exprs.is_empty() {
247            write!(f, "WITH ORDER (")?;
248            let mut first = true;
249            for expr in self.order_exprs.iter().flatten() {
250                if !first {
251                    write!(f, ", ")?;
252                }
253                write!(f, "{expr}")?;
254                first = false;
255            }
256            write!(f, ") ")?;
257        }
258        write!(f, "LOCATION {}", self.location)
259    }
260}
261
262/// DataFusion extension for `RESET`
263#[derive(Debug, Clone, PartialEq, Eq)]
264pub enum ResetStatement {
265    /// Reset a single configuration variable (stored as provided)
266    Variable(ObjectName),
267}
268
269impl fmt::Display for ResetStatement {
270    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
271        match self {
272            ResetStatement::Variable(name) => write!(f, "RESET {name}"),
273        }
274    }
275}
276
277/// DataFusion SQL Statement.
278///
279/// This can either be a [`Statement`] from [`sqlparser`] from a
280/// standard SQL dialect, or a DataFusion extension such as `CREATE
281/// EXTERNAL TABLE`. See [`DFParser`] for more information.
282///
283/// [`Statement`]: sqlparser::ast::Statement
284#[derive(Debug, Clone, PartialEq, Eq)]
285pub enum Statement {
286    /// ANSI SQL AST node (from sqlparser-rs)
287    Statement(Box<SQLStatement>),
288    /// Extension: `CREATE EXTERNAL TABLE`
289    CreateExternalTable(CreateExternalTable),
290    /// Extension: `COPY TO`
291    CopyTo(CopyToStatement),
292    /// EXPLAIN for extensions
293    Explain(ExplainStatement),
294    /// Extension: `RESET`
295    Reset(ResetStatement),
296}
297
298impl fmt::Display for Statement {
299    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
300        match self {
301            Statement::Statement(stmt) => write!(f, "{stmt}"),
302            Statement::CreateExternalTable(stmt) => write!(f, "{stmt}"),
303            Statement::CopyTo(stmt) => write!(f, "{stmt}"),
304            Statement::Explain(stmt) => write!(f, "{stmt}"),
305            Statement::Reset(stmt) => write!(f, "{stmt}"),
306        }
307    }
308}
309
310fn ensure_not_set<T>(field: &Option<T>, name: &str) -> Result<(), DataFusionError> {
311    if field.is_some() {
312        parser_err!(format!("{name} specified more than once",))?
313    }
314    Ok(())
315}
316
317/// DataFusion SQL Parser based on [`sqlparser`]
318///
319/// Parses DataFusion's SQL dialect, often delegating to [`sqlparser`]'s [`Parser`].
320///
321/// DataFusion mostly follows existing SQL dialects via
322/// `sqlparser`. However, certain statements such as `COPY` and
323/// `CREATE EXTERNAL TABLE` have special syntax in DataFusion. See
324/// [`Statement`] for a list of this special syntax
325pub struct DFParser<'a> {
326    pub parser: Parser<'a>,
327    options: SqlParserOptions,
328}
329
330/// Same as `sqlparser`
331const DEFAULT_RECURSION_LIMIT: usize = 50;
332const DEFAULT_DIALECT: GenericDialect = GenericDialect {};
333
334/// Builder for [`DFParser`]
335///
336/// # Example: Create and Parse SQL statements
337/// ```
338/// # use datafusion_sql::parser::DFParserBuilder;
339/// # use datafusion_common::Result;
340/// # fn test() -> Result<()> {
341/// let mut parser = DFParserBuilder::new("SELECT * FROM foo; SELECT 1 + 2").build()?;
342/// // parse the SQL into DFStatements
343/// let statements = parser.parse_statements()?;
344/// assert_eq!(statements.len(), 2);
345/// # Ok(())
346/// # }
347/// ```
348///
349/// # Example: Create and Parse expression with a different dialect
350/// ```
351/// # use datafusion_sql::parser::DFParserBuilder;
352/// # use datafusion_common::Result;
353/// # use datafusion_sql::sqlparser::dialect::MySqlDialect;
354/// # use datafusion_sql::sqlparser::ast::Expr;
355/// # fn test() -> Result<()> {
356/// let dialect = MySqlDialect {}; // Parse using MySQL dialect
357/// let mut parser = DFParserBuilder::new("1 + 2")
358///     .with_dialect(&dialect)
359///     .build()?;
360/// // parse 1+2 into an sqlparser::ast::Expr
361/// let res = parser.parse_expr()?;
362/// assert!(matches!(res.expr, Expr::BinaryOp { .. }));
363/// # Ok(())
364/// # }
365/// ```
366pub struct DFParserBuilder<'a> {
367    /// The SQL string to parse
368    sql: &'a str,
369    /// The Dialect to use (defaults to [`GenericDialect`]
370    dialect: &'a dyn Dialect,
371    /// The recursion limit while parsing
372    recursion_limit: usize,
373}
374
375impl<'a> DFParserBuilder<'a> {
376    /// Create a new parser builder for the specified tokens using the
377    /// [`GenericDialect`].
378    pub fn new(sql: &'a str) -> Self {
379        Self {
380            sql,
381            dialect: &DEFAULT_DIALECT,
382            recursion_limit: DEFAULT_RECURSION_LIMIT,
383        }
384    }
385
386    /// Adjust the parser builder's dialect. Defaults to [`GenericDialect`]
387    pub fn with_dialect(mut self, dialect: &'a dyn Dialect) -> Self {
388        self.dialect = dialect;
389        self
390    }
391
392    /// Adjust the recursion limit of sql parsing.  Defaults to 50
393    pub fn with_recursion_limit(mut self, recursion_limit: usize) -> Self {
394        self.recursion_limit = recursion_limit;
395        self
396    }
397
398    pub fn build(self) -> Result<DFParser<'a>, DataFusionError> {
399        let mut tokenizer = Tokenizer::new(self.dialect, self.sql);
400        // Convert TokenizerError -> ParserError
401        let tokens = tokenizer
402            .tokenize_with_location()
403            .map_err(ParserError::from)?;
404
405        Ok(DFParser {
406            parser: Parser::new(self.dialect)
407                .with_tokens_with_locations(tokens)
408                .with_recursion_limit(self.recursion_limit),
409            options: SqlParserOptions {
410                recursion_limit: self.recursion_limit,
411                ..Default::default()
412            },
413        })
414    }
415}
416
417impl<'a> DFParser<'a> {
418    #[deprecated(since = "46.0.0", note = "DFParserBuilder")]
419    pub fn new(sql: &'a str) -> Result<Self, DataFusionError> {
420        DFParserBuilder::new(sql).build()
421    }
422
423    #[deprecated(since = "46.0.0", note = "DFParserBuilder")]
424    pub fn new_with_dialect(
425        sql: &'a str,
426        dialect: &'a dyn Dialect,
427    ) -> Result<Self, DataFusionError> {
428        DFParserBuilder::new(sql).with_dialect(dialect).build()
429    }
430
431    /// Parse a sql string into one or [`Statement`]s using the
432    /// [`GenericDialect`].
433    pub fn parse_sql(sql: &'a str) -> Result<VecDeque<Statement>, DataFusionError> {
434        let mut parser = DFParserBuilder::new(sql).build()?;
435
436        parser.parse_statements()
437    }
438
439    /// Parse a SQL string and produce one or more [`Statement`]s with
440    /// with the specified dialect.
441    pub fn parse_sql_with_dialect(
442        sql: &str,
443        dialect: &dyn Dialect,
444    ) -> Result<VecDeque<Statement>, DataFusionError> {
445        let mut parser = DFParserBuilder::new(sql).with_dialect(dialect).build()?;
446        parser.parse_statements()
447    }
448
449    pub fn parse_sql_into_expr(sql: &str) -> Result<ExprWithAlias, DataFusionError> {
450        DFParserBuilder::new(sql).build()?.parse_into_expr()
451    }
452
453    pub fn parse_sql_into_expr_with_dialect(
454        sql: &str,
455        dialect: &dyn Dialect,
456    ) -> Result<ExprWithAlias, DataFusionError> {
457        DFParserBuilder::new(sql)
458            .with_dialect(dialect)
459            .build()?
460            .parse_into_expr()
461    }
462
463    /// Parse a sql string into one or [`Statement`]s
464    pub fn parse_statements(&mut self) -> Result<VecDeque<Statement>, DataFusionError> {
465        let mut stmts = VecDeque::new();
466        let mut expecting_statement_delimiter = false;
467        loop {
468            // ignore empty statements (between successive statement delimiters)
469            while self.parser.consume_token(&Token::SemiColon) {
470                expecting_statement_delimiter = false;
471            }
472
473            if self.parser.peek_token() == Token::EOF {
474                break;
475            }
476            if expecting_statement_delimiter {
477                return self.expected("end of statement", &self.parser.peek_token());
478            }
479
480            let statement = self.parse_statement()?;
481            stmts.push_back(statement);
482            expecting_statement_delimiter = true;
483        }
484        Ok(stmts)
485    }
486
487    /// Report an unexpected token
488    fn expected<T>(
489        &self,
490        expected: &str,
491        found: &TokenWithSpan,
492    ) -> Result<T, DataFusionError> {
493        let sql_parser_span = found.span;
494        let span = Span::try_from_sqlparser_span(sql_parser_span);
495        let diagnostic = Diagnostic::new_error(
496            format!("Expected: {expected}, found: {found}{}", found.span.start),
497            span,
498        );
499        parser_err!(
500            format!("Expected: {expected}, found: {found}{}", found.span.start);
501            diagnostic=
502            diagnostic
503        )
504    }
505
506    fn expect_token(
507        &mut self,
508        expected: &str,
509        token: &Token,
510    ) -> Result<(), DataFusionError> {
511        let next_token = self.parser.peek_token_ref();
512        if next_token.token != *token {
513            self.expected(expected, next_token)
514        } else {
515            Ok(())
516        }
517    }
518
519    /// Parse a new expression
520    pub fn parse_statement(&mut self) -> Result<Statement, DataFusionError> {
521        match self.parser.peek_token().token {
522            Token::Word(w) => {
523                match w.keyword {
524                    Keyword::CREATE => {
525                        self.parser.next_token(); // CREATE
526                        self.parse_create()
527                    }
528                    Keyword::COPY => {
529                        if let Token::Word(w) = self.parser.peek_nth_token(1).token {
530                            // use native parser for COPY INTO
531                            if w.keyword == Keyword::INTO {
532                                return self.parse_and_handle_statement();
533                            }
534                        }
535                        self.parser.next_token(); // COPY
536                        self.parse_copy()
537                    }
538                    Keyword::EXPLAIN => {
539                        self.parser.next_token(); // EXPLAIN
540                        self.parse_explain()
541                    }
542                    Keyword::RESET => {
543                        self.parser.next_token(); // RESET
544                        self.parse_reset()
545                    }
546                    _ => {
547                        // use sqlparser-rs parser
548                        self.parse_and_handle_statement()
549                    }
550                }
551            }
552            _ => {
553                // use the native parser
554                self.parse_and_handle_statement()
555            }
556        }
557    }
558
559    pub fn parse_expr(&mut self) -> Result<ExprWithAlias, DataFusionError> {
560        if let Token::Word(w) = self.parser.peek_token().token {
561            match w.keyword {
562                Keyword::CREATE | Keyword::COPY | Keyword::EXPLAIN => {
563                    return parser_err!("Unsupported command in expression")?;
564                }
565                _ => {}
566            }
567        }
568
569        Ok(self.parser.parse_expr_with_alias()?)
570    }
571
572    /// Parses the entire SQL string into an expression.
573    ///
574    /// In contrast to [`DFParser::parse_expr`], this function will report an error if the input
575    /// contains any trailing, unparsed tokens.
576    pub fn parse_into_expr(&mut self) -> Result<ExprWithAlias, DataFusionError> {
577        let expr = self.parse_expr()?;
578        self.expect_token("end of expression", &Token::EOF)?;
579        Ok(expr)
580    }
581
582    /// Helper method to parse a statement and handle errors consistently, especially for recursion limits
583    fn parse_and_handle_statement(&mut self) -> Result<Statement, DataFusionError> {
584        self.parser
585            .parse_statement()
586            .map(|stmt| Statement::Statement(Box::from(stmt)))
587            .map_err(|e| match e {
588                ParserError::RecursionLimitExceeded => DataFusionError::SQL(
589                    Box::new(ParserError::RecursionLimitExceeded),
590                    Some(format!(
591                        " (current limit: {})",
592                        self.options.recursion_limit
593                    )),
594                ),
595                other => DataFusionError::SQL(Box::new(other), None),
596            })
597    }
598
599    /// Parse a SQL `COPY TO` statement
600    pub fn parse_copy(&mut self) -> Result<Statement, DataFusionError> {
601        // parse as a query
602        let source = if self.parser.consume_token(&Token::LParen) {
603            let query = self.parser.parse_query()?;
604            self.parser.expect_token(&Token::RParen)?;
605            CopyToSource::Query(query)
606        } else {
607            // parse as table reference
608            let table_name = self.parser.parse_object_name(true)?;
609            CopyToSource::Relation(table_name)
610        };
611
612        #[derive(Default)]
613        struct Builder {
614            stored_as: Option<String>,
615            target: Option<String>,
616            partitioned_by: Option<Vec<String>>,
617            options: Option<Vec<(String, Value)>>,
618        }
619
620        let mut builder = Builder::default();
621
622        loop {
623            if let Some(keyword) = self.parser.parse_one_of_keywords(&[
624                Keyword::STORED,
625                Keyword::TO,
626                Keyword::PARTITIONED,
627                Keyword::OPTIONS,
628                Keyword::WITH,
629            ]) {
630                match keyword {
631                    Keyword::STORED => {
632                        self.parser.expect_keyword(Keyword::AS)?;
633                        ensure_not_set(&builder.stored_as, "STORED AS")?;
634                        builder.stored_as = Some(self.parse_file_format()?);
635                    }
636                    Keyword::TO => {
637                        ensure_not_set(&builder.target, "TO")?;
638                        builder.target = Some(self.parser.parse_literal_string()?);
639                    }
640                    Keyword::WITH => {
641                        self.parser.expect_keyword(Keyword::HEADER)?;
642                        self.parser.expect_keyword(Keyword::ROW)?;
643                        return parser_err!(
644                            "WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')"
645                        )?;
646                    }
647                    Keyword::PARTITIONED => {
648                        self.parser.expect_keyword(Keyword::BY)?;
649                        ensure_not_set(&builder.partitioned_by, "PARTITIONED BY")?;
650                        builder.partitioned_by = Some(self.parse_partitions()?);
651                    }
652                    Keyword::OPTIONS => {
653                        ensure_not_set(&builder.options, "OPTIONS")?;
654                        builder.options = Some(self.parse_value_options()?);
655                    }
656                    _ => {
657                        unreachable!()
658                    }
659                }
660            } else {
661                let token = self.parser.next_token();
662                if token == Token::EOF || token == Token::SemiColon {
663                    break;
664                } else {
665                    return self.expected("end of statement or ;", &token)?;
666                }
667            }
668        }
669
670        let Some(target) = builder.target else {
671            return parser_err!("Missing TO clause in COPY statement")?;
672        };
673
674        Ok(Statement::CopyTo(CopyToStatement {
675            source,
676            target,
677            partitioned_by: builder.partitioned_by.unwrap_or(vec![]),
678            stored_as: builder.stored_as,
679            options: builder.options.unwrap_or(vec![]),
680        }))
681    }
682
683    /// Parse the next token as a key name for an option list
684    ///
685    /// Note this is different than [`parse_literal_string`]
686    /// because it allows keywords as well as other non words
687    ///
688    /// [`parse_literal_string`]: sqlparser::parser::Parser::parse_literal_string
689    pub fn parse_option_key(&mut self) -> Result<String, DataFusionError> {
690        let next_token = self.parser.next_token();
691        match next_token.token {
692            Token::Word(Word { value, .. }) => {
693                let mut parts = vec![value];
694                while self.parser.consume_token(&Token::Period) {
695                    let next_token = self.parser.next_token();
696                    if let Token::Word(Word { value, .. }) = next_token.token {
697                        parts.push(value);
698                    } else {
699                        // Unquoted namespaced keys have to conform to the syntax
700                        // "<WORD>[\.<WORD>]*". If we have a key that breaks this
701                        // pattern, error out:
702                        return self.expected("key name", &next_token);
703                    }
704                }
705                Ok(parts.join("."))
706            }
707            Token::SingleQuotedString(s) => Ok(s),
708            Token::DoubleQuotedString(s) => Ok(s),
709            Token::EscapedStringLiteral(s) => Ok(s),
710            _ => self.expected("key name", &next_token),
711        }
712    }
713
714    /// Parse the next token as a value for an option list
715    ///
716    /// Note this is different than [`parse_value`] as it allows any
717    /// word or keyword in this location.
718    ///
719    /// [`parse_value`]: sqlparser::parser::Parser::parse_value
720    pub fn parse_option_value(&mut self) -> Result<Value, DataFusionError> {
721        let next_token = self.parser.next_token();
722        match next_token.token {
723            // e.g. things like "snappy" or "gzip" that may be keywords
724            Token::Word(word) => Ok(Value::SingleQuotedString(word.value)),
725            Token::SingleQuotedString(s) => Ok(Value::SingleQuotedString(s)),
726            Token::DoubleQuotedString(s) => Ok(Value::DoubleQuotedString(s)),
727            Token::EscapedStringLiteral(s) => Ok(Value::EscapedStringLiteral(s)),
728            Token::Number(n, l) => Ok(Value::Number(n, l)),
729            _ => self.expected("string or numeric value", &next_token),
730        }
731    }
732
733    /// Parse a SQL `EXPLAIN`
734    pub fn parse_explain(&mut self) -> Result<Statement, DataFusionError> {
735        let analyze = self.parser.parse_keyword(Keyword::ANALYZE);
736        let verbose = self.parser.parse_keyword(Keyword::VERBOSE);
737        let format = self.parse_explain_format()?;
738
739        let statement = self.parse_statement()?;
740
741        Ok(Statement::Explain(ExplainStatement {
742            statement: Box::new(statement),
743            analyze,
744            verbose,
745            format,
746        }))
747    }
748
749    /// Parse a SQL `RESET`
750    pub fn parse_reset(&mut self) -> Result<Statement, DataFusionError> {
751        let mut parts: Vec<String> = Vec::new();
752        let mut expecting_segment = true;
753
754        loop {
755            let next_token = self.parser.peek_token();
756            match &next_token.token {
757                Token::Word(word) => {
758                    self.parser.next_token();
759                    parts.push(word.value.clone());
760                    expecting_segment = false;
761                }
762                Token::SingleQuotedString(s)
763                | Token::DoubleQuotedString(s)
764                | Token::EscapedStringLiteral(s) => {
765                    self.parser.next_token();
766                    parts.push(s.clone());
767                    expecting_segment = false;
768                }
769                Token::Period => {
770                    self.parser.next_token();
771                    if expecting_segment || parts.is_empty() {
772                        return self.expected("configuration parameter", &next_token);
773                    }
774                    expecting_segment = true;
775                }
776                Token::EOF | Token::SemiColon => break,
777                _ => return self.expected("configuration parameter", &next_token),
778            }
779        }
780
781        if parts.is_empty() || expecting_segment {
782            return self.expected("configuration parameter", &self.parser.peek_token());
783        }
784
785        let idents: Vec<Ident> = parts.into_iter().map(Ident::new).collect();
786        let variable = ObjectName::from(idents);
787        Ok(Statement::Reset(ResetStatement::Variable(variable)))
788    }
789
790    pub fn parse_explain_format(&mut self) -> Result<Option<String>, DataFusionError> {
791        if !self.parser.parse_keyword(Keyword::FORMAT) {
792            return Ok(None);
793        }
794
795        let next_token = self.parser.next_token();
796        let format = match next_token.token {
797            Token::Word(w) => Ok(w.value),
798            Token::SingleQuotedString(w) => Ok(w),
799            Token::DoubleQuotedString(w) => Ok(w),
800            _ => self.expected("an explain format such as TREE", &next_token),
801        }?;
802        Ok(Some(format))
803    }
804
805    /// Parse a SQL `CREATE` statement handling `CREATE EXTERNAL TABLE`
806    pub fn parse_create(&mut self) -> Result<Statement, DataFusionError> {
807        // TODO: Change sql parser to take in `or_replace: bool` inside parse_create()
808        if self
809            .parser
810            .parse_keywords(&[Keyword::OR, Keyword::REPLACE, Keyword::EXTERNAL])
811        {
812            self.parse_create_external_table(false, true)
813        } else if self.parser.parse_keywords(&[
814            Keyword::OR,
815            Keyword::REPLACE,
816            Keyword::UNBOUNDED,
817            Keyword::EXTERNAL,
818        ]) {
819            self.parse_create_external_table(true, true)
820        } else if self.parser.parse_keyword(Keyword::EXTERNAL) {
821            self.parse_create_external_table(false, false)
822        } else if self
823            .parser
824            .parse_keywords(&[Keyword::UNBOUNDED, Keyword::EXTERNAL])
825        {
826            self.parse_create_external_table(true, false)
827        } else {
828            Ok(Statement::Statement(Box::from(self.parser.parse_create()?)))
829        }
830    }
831
832    fn parse_partitions(&mut self) -> Result<Vec<String>, DataFusionError> {
833        let mut partitions: Vec<String> = vec![];
834        if !self.parser.consume_token(&Token::LParen)
835            || self.parser.consume_token(&Token::RParen)
836        {
837            return Ok(partitions);
838        }
839
840        loop {
841            if let Token::Word(_) = self.parser.peek_token().token {
842                let identifier = self.parser.parse_identifier()?;
843                partitions.push(identifier.to_string());
844            } else {
845                return self.expected("partition name", &self.parser.peek_token());
846            }
847            let comma = self.parser.consume_token(&Token::Comma);
848            if self.parser.consume_token(&Token::RParen) {
849                // allow a trailing comma, even though it's not in standard
850                break;
851            } else if !comma {
852                return self.expected(
853                    "',' or ')' after partition definition",
854                    &self.parser.peek_token(),
855                );
856            }
857        }
858        Ok(partitions)
859    }
860
861    /// Parse the ordering clause of a `CREATE EXTERNAL TABLE` SQL statement
862    pub fn parse_order_by_exprs(&mut self) -> Result<Vec<OrderByExpr>, DataFusionError> {
863        let mut values = vec![];
864        self.parser.expect_token(&Token::LParen)?;
865        loop {
866            values.push(self.parse_order_by_expr()?);
867            if !self.parser.consume_token(&Token::Comma) {
868                self.parser.expect_token(&Token::RParen)?;
869                return Ok(values);
870            }
871        }
872    }
873
874    /// Parse an ORDER BY sub-expression optionally followed by ASC or DESC.
875    pub fn parse_order_by_expr(&mut self) -> Result<OrderByExpr, DataFusionError> {
876        let expr = self.parser.parse_expr()?;
877
878        let asc = if self.parser.parse_keyword(Keyword::ASC) {
879            Some(true)
880        } else if self.parser.parse_keyword(Keyword::DESC) {
881            Some(false)
882        } else {
883            None
884        };
885
886        let nulls_first = if self
887            .parser
888            .parse_keywords(&[Keyword::NULLS, Keyword::FIRST])
889        {
890            Some(true)
891        } else if self.parser.parse_keywords(&[Keyword::NULLS, Keyword::LAST]) {
892            Some(false)
893        } else {
894            None
895        };
896
897        Ok(OrderByExpr {
898            expr,
899            options: OrderByOptions { asc, nulls_first },
900            with_fill: None,
901        })
902    }
903
904    // This is a copy of the equivalent implementation in sqlparser.
905    fn parse_columns(
906        &mut self,
907    ) -> Result<(Vec<ColumnDef>, Vec<TableConstraint>), DataFusionError> {
908        let mut columns = vec![];
909        let mut constraints = vec![];
910        if !self.parser.consume_token(&Token::LParen)
911            || self.parser.consume_token(&Token::RParen)
912        {
913            return Ok((columns, constraints));
914        }
915
916        loop {
917            if let Some(constraint) = self.parser.parse_optional_table_constraint()? {
918                constraints.push(constraint);
919            } else if let Token::Word(_) = self.parser.peek_token().token {
920                let column_def = self.parse_column_def()?;
921                columns.push(column_def);
922            } else {
923                return self.expected(
924                    "column name or constraint definition",
925                    &self.parser.peek_token(),
926                );
927            }
928            let comma = self.parser.consume_token(&Token::Comma);
929            if self.parser.consume_token(&Token::RParen) {
930                // allow a trailing comma, even though it's not in standard
931                break;
932            } else if !comma {
933                return self.expected(
934                    "',' or ')' after column definition",
935                    &self.parser.peek_token(),
936                );
937            }
938        }
939
940        Ok((columns, constraints))
941    }
942
943    fn parse_column_def(&mut self) -> Result<ColumnDef, DataFusionError> {
944        let name = self.parser.parse_identifier()?;
945        let data_type = self.parser.parse_data_type()?;
946        let mut options = vec![];
947        loop {
948            if self.parser.parse_keyword(Keyword::CONSTRAINT) {
949                let name = Some(self.parser.parse_identifier()?);
950                if let Some(option) = self.parser.parse_optional_column_option()? {
951                    options.push(ColumnOptionDef { name, option });
952                } else {
953                    return self.expected(
954                        "constraint details after CONSTRAINT <name>",
955                        &self.parser.peek_token(),
956                    );
957                }
958            } else if let Some(option) = self.parser.parse_optional_column_option()? {
959                options.push(ColumnOptionDef { name: None, option });
960            } else {
961                break;
962            };
963        }
964        Ok(ColumnDef {
965            name,
966            data_type,
967            options,
968        })
969    }
970
971    fn parse_create_external_table(
972        &mut self,
973        unbounded: bool,
974        or_replace: bool,
975    ) -> Result<Statement, DataFusionError> {
976        let temporary = self
977            .parser
978            .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY])
979            .is_some();
980
981        self.parser.expect_keyword(Keyword::TABLE)?;
982        let if_not_exists =
983            self.parser
984                .parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]);
985
986        if if_not_exists && or_replace {
987            return parser_err!("'IF NOT EXISTS' cannot coexist with 'REPLACE'");
988        }
989
990        let table_name = self.parser.parse_object_name(true)?;
991        let (mut columns, constraints) = self.parse_columns()?;
992
993        #[derive(Default)]
994        struct Builder {
995            file_type: Option<String>,
996            location: Option<String>,
997            table_partition_cols: Option<Vec<String>>,
998            order_exprs: Vec<LexOrdering>,
999            options: Option<Vec<(String, Value)>>,
1000        }
1001        let mut builder = Builder::default();
1002
1003        loop {
1004            if let Some(keyword) = self.parser.parse_one_of_keywords(&[
1005                Keyword::STORED,
1006                Keyword::LOCATION,
1007                Keyword::WITH,
1008                Keyword::DELIMITER,
1009                Keyword::COMPRESSION,
1010                Keyword::PARTITIONED,
1011                Keyword::OPTIONS,
1012            ]) {
1013                match keyword {
1014                    Keyword::STORED => {
1015                        self.parser.expect_keyword(Keyword::AS)?;
1016                        ensure_not_set(&builder.file_type, "STORED AS")?;
1017                        builder.file_type = Some(self.parse_file_format()?);
1018                    }
1019                    Keyword::LOCATION => {
1020                        ensure_not_set(&builder.location, "LOCATION")?;
1021                        builder.location = Some(self.parser.parse_literal_string()?);
1022                    }
1023                    Keyword::WITH => {
1024                        if self.parser.parse_keyword(Keyword::ORDER) {
1025                            builder.order_exprs.push(self.parse_order_by_exprs()?);
1026                        } else {
1027                            self.parser.expect_keyword(Keyword::HEADER)?;
1028                            self.parser.expect_keyword(Keyword::ROW)?;
1029                            return parser_err!(
1030                                "WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS (format.has_header true)"
1031                            )?;
1032                        }
1033                    }
1034                    Keyword::DELIMITER => {
1035                        return parser_err!(
1036                            "DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS (format.delimiter ',')"
1037                        )?;
1038                    }
1039                    Keyword::COMPRESSION => {
1040                        self.parser.expect_keyword(Keyword::TYPE)?;
1041                        return parser_err!(
1042                            "COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS (format.compression gzip)"
1043                        )?;
1044                    }
1045                    Keyword::PARTITIONED => {
1046                        self.parser.expect_keyword(Keyword::BY)?;
1047                        ensure_not_set(&builder.table_partition_cols, "PARTITIONED BY")?;
1048                        // Expects either list of column names (col_name [, col_name]*)
1049                        // or list of column definitions (col_name datatype [, col_name datatype]* )
1050                        // use the token after the name to decide which parsing rule to use
1051                        // Note that mixing both names and definitions is not allowed
1052                        let peeked = self.parser.peek_nth_token(2);
1053                        if peeked == Token::Comma || peeked == Token::RParen {
1054                            // List of column names
1055                            builder.table_partition_cols = Some(self.parse_partitions()?)
1056                        } else {
1057                            // List of column defs
1058                            let (cols, cons) = self.parse_columns()?;
1059                            builder.table_partition_cols = Some(
1060                                cols.iter().map(|col| col.name.to_string()).collect(),
1061                            );
1062
1063                            columns.extend(cols);
1064
1065                            if !cons.is_empty() {
1066                                return sql_err!(ParserError::ParserError(
1067                                    "Constraints on Partition Columns are not supported"
1068                                        .to_string(),
1069                                ));
1070                            }
1071                        }
1072                    }
1073                    Keyword::OPTIONS => {
1074                        ensure_not_set(&builder.options, "OPTIONS")?;
1075                        builder.options = Some(self.parse_value_options()?);
1076                    }
1077                    _ => {
1078                        unreachable!()
1079                    }
1080                }
1081            } else {
1082                let token = self.parser.next_token();
1083                if token == Token::EOF || token == Token::SemiColon {
1084                    break;
1085                } else {
1086                    return self.expected("end of statement or ;", &token)?;
1087                }
1088            }
1089        }
1090
1091        // Validations: location and file_type are required
1092        if builder.file_type.is_none() {
1093            return sql_err!(ParserError::ParserError(
1094                "Missing STORED AS clause in CREATE EXTERNAL TABLE statement".into(),
1095            ));
1096        }
1097        if builder.location.is_none() {
1098            return sql_err!(ParserError::ParserError(
1099                "Missing LOCATION clause in CREATE EXTERNAL TABLE statement".into(),
1100            ));
1101        }
1102
1103        let create = CreateExternalTable {
1104            name: table_name,
1105            columns,
1106            file_type: builder.file_type.unwrap(),
1107            location: builder.location.unwrap(),
1108            table_partition_cols: builder.table_partition_cols.unwrap_or(vec![]),
1109            order_exprs: builder.order_exprs,
1110            if_not_exists,
1111            or_replace,
1112            temporary,
1113            unbounded,
1114            options: builder.options.unwrap_or(Vec::new()),
1115            constraints,
1116        };
1117        Ok(Statement::CreateExternalTable(create))
1118    }
1119
1120    /// Parses the set of valid formats
1121    fn parse_file_format(&mut self) -> Result<String, DataFusionError> {
1122        let token = self.parser.next_token();
1123        match &token.token {
1124            Token::Word(w) => parse_file_type(&w.value),
1125            _ => self.expected("one of ARROW, PARQUET, NDJSON, or CSV", &token),
1126        }
1127    }
1128
1129    /// Parses (key value) style options into a map of String --> [`Value`].
1130    ///
1131    /// This method supports keywords as key names as well as multiple
1132    /// value types such as Numbers as well as Strings.
1133    fn parse_value_options(&mut self) -> Result<Vec<(String, Value)>, DataFusionError> {
1134        let mut options = vec![];
1135        self.parser.expect_token(&Token::LParen)?;
1136
1137        loop {
1138            let key = self.parse_option_key()?;
1139            let value = self.parse_option_value()?;
1140            options.push((key, value));
1141            let comma = self.parser.consume_token(&Token::Comma);
1142            if self.parser.consume_token(&Token::RParen) {
1143                // Allow a trailing comma, even though it's not in standard
1144                break;
1145            } else if !comma {
1146                return self.expected(
1147                    "',' or ')' after option definition",
1148                    &self.parser.peek_token(),
1149                );
1150            }
1151        }
1152        Ok(options)
1153    }
1154}
1155
1156#[cfg(test)]
1157mod tests {
1158    use super::*;
1159    use datafusion_common::assert_contains;
1160    use sqlparser::ast::Expr::Identifier;
1161    use sqlparser::ast::{
1162        BinaryOperator, DataType, ExactNumberInfo, Expr, Ident, ValueWithSpan,
1163    };
1164    use sqlparser::dialect::SnowflakeDialect;
1165    use sqlparser::tokenizer::Span;
1166
1167    fn expect_parse_ok(sql: &str, expected: Statement) -> Result<(), DataFusionError> {
1168        let statements = DFParser::parse_sql(sql)?;
1169        assert_eq!(
1170            statements.len(),
1171            1,
1172            "Expected to parse exactly one statement"
1173        );
1174        assert_eq!(statements[0], expected, "actual:\n{:#?}", statements[0]);
1175        Ok(())
1176    }
1177
1178    /// Parses sql and asserts that the expected error message was found
1179    fn expect_parse_error(sql: &str, expected_error: &str) {
1180        match DFParser::parse_sql(sql) {
1181            Ok(statements) => {
1182                panic!(
1183                    "Expected parse error for '{sql}', but was successful: {statements:?}"
1184                );
1185            }
1186            Err(e) => {
1187                let error_message = e.to_string();
1188                assert!(
1189                    error_message.contains(expected_error),
1190                    "Expected error '{expected_error}' not found in actual error '{error_message}'"
1191                );
1192            }
1193        }
1194    }
1195
1196    fn make_column_def(name: impl Into<String>, data_type: DataType) -> ColumnDef {
1197        ColumnDef {
1198            name: Ident {
1199                value: name.into(),
1200                quote_style: None,
1201                span: Span::empty(),
1202            },
1203            data_type,
1204            options: vec![],
1205        }
1206    }
1207
1208    #[test]
1209    fn create_external_table() -> Result<(), DataFusionError> {
1210        // positive case
1211        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'";
1212        let display = None;
1213        let name = ObjectName::from(vec![Ident::from("t")]);
1214        let expected = Statement::CreateExternalTable(CreateExternalTable {
1215            name: name.clone(),
1216            columns: vec![make_column_def("c1", DataType::Int(display))],
1217            file_type: "CSV".to_string(),
1218            location: "foo.csv".into(),
1219            table_partition_cols: vec![],
1220            order_exprs: vec![],
1221            if_not_exists: false,
1222            or_replace: false,
1223            temporary: false,
1224            unbounded: false,
1225            options: vec![],
1226            constraints: vec![],
1227        });
1228        expect_parse_ok(sql, expected)?;
1229
1230        // positive case: leading space
1231        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'     ";
1232        let expected = Statement::CreateExternalTable(CreateExternalTable {
1233            name: name.clone(),
1234            columns: vec![make_column_def("c1", DataType::Int(None))],
1235            file_type: "CSV".to_string(),
1236            location: "foo.csv".into(),
1237            table_partition_cols: vec![],
1238            order_exprs: vec![],
1239            if_not_exists: false,
1240            or_replace: false,
1241            temporary: false,
1242            unbounded: false,
1243            options: vec![],
1244            constraints: vec![],
1245        });
1246        expect_parse_ok(sql, expected)?;
1247
1248        // positive case: leading space + semicolon
1249        let sql =
1250            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'      ;";
1251        let expected = Statement::CreateExternalTable(CreateExternalTable {
1252            name: name.clone(),
1253            columns: vec![make_column_def("c1", DataType::Int(None))],
1254            file_type: "CSV".to_string(),
1255            location: "foo.csv".into(),
1256            table_partition_cols: vec![],
1257            order_exprs: vec![],
1258            if_not_exists: false,
1259            or_replace: false,
1260            temporary: false,
1261            unbounded: false,
1262            options: vec![],
1263            constraints: vec![],
1264        });
1265        expect_parse_ok(sql, expected)?;
1266
1267        // positive case with delimiter
1268        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS (format.delimiter '|')";
1269        let display = None;
1270        let expected = Statement::CreateExternalTable(CreateExternalTable {
1271            name: name.clone(),
1272            columns: vec![make_column_def("c1", DataType::Int(display))],
1273            file_type: "CSV".to_string(),
1274            location: "foo.csv".into(),
1275            table_partition_cols: vec![],
1276            order_exprs: vec![],
1277            if_not_exists: false,
1278            or_replace: false,
1279            temporary: false,
1280            unbounded: false,
1281            options: vec![(
1282                "format.delimiter".into(),
1283                Value::SingleQuotedString("|".into()),
1284            )],
1285            constraints: vec![],
1286        });
1287        expect_parse_ok(sql, expected)?;
1288
1289        // positive case: partitioned by
1290        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1, p2) LOCATION 'foo.csv'";
1291        let display = None;
1292        let expected = Statement::CreateExternalTable(CreateExternalTable {
1293            name: name.clone(),
1294            columns: vec![make_column_def("c1", DataType::Int(display))],
1295            file_type: "CSV".to_string(),
1296            location: "foo.csv".into(),
1297            table_partition_cols: vec!["p1".to_string(), "p2".to_string()],
1298            order_exprs: vec![],
1299            if_not_exists: false,
1300            or_replace: false,
1301            temporary: false,
1302            unbounded: false,
1303            options: vec![],
1304            constraints: vec![],
1305        });
1306        expect_parse_ok(sql, expected)?;
1307
1308        // positive case: it is ok for sql stmt with `COMPRESSION TYPE GZIP` tokens
1309        let sqls =
1310            vec![
1311             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1312             ('format.compression' 'GZIP')", "GZIP"),
1313             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1314             ('format.compression' 'BZIP2')", "BZIP2"),
1315             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1316             ('format.compression' 'XZ')", "XZ"),
1317             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1318             ('format.compression' 'ZSTD')", "ZSTD"),
1319         ];
1320        for (sql, compression) in sqls {
1321            let expected = Statement::CreateExternalTable(CreateExternalTable {
1322                name: name.clone(),
1323                columns: vec![make_column_def("c1", DataType::Int(display))],
1324                file_type: "CSV".to_string(),
1325                location: "foo.csv".into(),
1326                table_partition_cols: vec![],
1327                order_exprs: vec![],
1328                if_not_exists: false,
1329                or_replace: false,
1330                temporary: false,
1331                unbounded: false,
1332                options: vec![(
1333                    "format.compression".into(),
1334                    Value::SingleQuotedString(compression.into()),
1335                )],
1336                constraints: vec![],
1337            });
1338            expect_parse_ok(sql, expected)?;
1339        }
1340
1341        // positive case: it is ok for parquet files not to have columns specified
1342        let sql = "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'";
1343        let expected = Statement::CreateExternalTable(CreateExternalTable {
1344            name: name.clone(),
1345            columns: vec![],
1346            file_type: "PARQUET".to_string(),
1347            location: "foo.parquet".into(),
1348            table_partition_cols: vec![],
1349            order_exprs: vec![],
1350            if_not_exists: false,
1351            or_replace: false,
1352            temporary: false,
1353            unbounded: false,
1354            options: vec![],
1355            constraints: vec![],
1356        });
1357        expect_parse_ok(sql, expected)?;
1358
1359        // positive case: it is ok for parquet files to be other than upper case
1360        let sql = "CREATE EXTERNAL TABLE t STORED AS parqueT LOCATION 'foo.parquet'";
1361        let expected = Statement::CreateExternalTable(CreateExternalTable {
1362            name: name.clone(),
1363            columns: vec![],
1364            file_type: "PARQUET".to_string(),
1365            location: "foo.parquet".into(),
1366            table_partition_cols: vec![],
1367            order_exprs: vec![],
1368            if_not_exists: false,
1369            or_replace: false,
1370            temporary: false,
1371            unbounded: false,
1372            options: vec![],
1373            constraints: vec![],
1374        });
1375        expect_parse_ok(sql, expected)?;
1376
1377        // positive case: it is ok for avro files not to have columns specified
1378        let sql = "CREATE EXTERNAL TABLE t STORED AS AVRO LOCATION 'foo.avro'";
1379        let expected = Statement::CreateExternalTable(CreateExternalTable {
1380            name: name.clone(),
1381            columns: vec![],
1382            file_type: "AVRO".to_string(),
1383            location: "foo.avro".into(),
1384            table_partition_cols: vec![],
1385            order_exprs: vec![],
1386            if_not_exists: false,
1387            or_replace: false,
1388            temporary: false,
1389            unbounded: false,
1390            options: vec![],
1391            constraints: vec![],
1392        });
1393        expect_parse_ok(sql, expected)?;
1394
1395        // positive case: it is ok for avro files not to have columns specified
1396        let sql = "CREATE EXTERNAL TABLE IF NOT EXISTS t STORED AS PARQUET LOCATION 'foo.parquet'";
1397        let expected = Statement::CreateExternalTable(CreateExternalTable {
1398            name: name.clone(),
1399            columns: vec![],
1400            file_type: "PARQUET".to_string(),
1401            location: "foo.parquet".into(),
1402            table_partition_cols: vec![],
1403            order_exprs: vec![],
1404            if_not_exists: true,
1405            or_replace: false,
1406            temporary: false,
1407            unbounded: false,
1408            options: vec![],
1409            constraints: vec![],
1410        });
1411        expect_parse_ok(sql, expected)?;
1412
1413        // positive case: or replace
1414        let sql =
1415            "CREATE OR REPLACE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'";
1416        let expected = Statement::CreateExternalTable(CreateExternalTable {
1417            name: name.clone(),
1418            columns: vec![],
1419            file_type: "PARQUET".to_string(),
1420            location: "foo.parquet".into(),
1421            table_partition_cols: vec![],
1422            order_exprs: vec![],
1423            if_not_exists: false,
1424            or_replace: true,
1425            temporary: false,
1426            unbounded: false,
1427            options: vec![],
1428            constraints: vec![],
1429        });
1430        expect_parse_ok(sql, expected)?;
1431
1432        // positive case: column definition allowed in 'partition by' clause
1433        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int) LOCATION 'foo.csv'";
1434        let expected = Statement::CreateExternalTable(CreateExternalTable {
1435            name: name.clone(),
1436            columns: vec![
1437                make_column_def("c1", DataType::Int(None)),
1438                make_column_def("p1", DataType::Int(None)),
1439            ],
1440            file_type: "CSV".to_string(),
1441            location: "foo.csv".into(),
1442            table_partition_cols: vec!["p1".to_string()],
1443            order_exprs: vec![],
1444            if_not_exists: false,
1445            or_replace: false,
1446            temporary: false,
1447            unbounded: false,
1448            options: vec![],
1449            constraints: vec![],
1450        });
1451        expect_parse_ok(sql, expected)?;
1452
1453        // negative case: mixed column defs and column names in `PARTITIONED BY` clause
1454        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int, c1) LOCATION 'foo.csv'";
1455        expect_parse_error(
1456            sql,
1457            "SQL error: ParserError(\"Expected: a data type name, found: ) at Line: 1, Column: 73\")",
1458        );
1459
1460        // negative case: mixed column defs and column names in `PARTITIONED BY` clause
1461        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (c1, p1 int) LOCATION 'foo.csv'";
1462        expect_parse_error(
1463            sql,
1464            "SQL error: ParserError(\"Expected: ',' or ')' after partition definition, found: int at Line: 1, Column: 70\")",
1465        );
1466
1467        // positive case: additional options (one entry) can be specified
1468        let sql =
1469            "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1') LOCATION 'blahblah'";
1470        let expected = Statement::CreateExternalTable(CreateExternalTable {
1471            name: name.clone(),
1472            columns: vec![],
1473            file_type: "X".to_string(),
1474            location: "blahblah".into(),
1475            table_partition_cols: vec![],
1476            order_exprs: vec![],
1477            if_not_exists: false,
1478            or_replace: false,
1479            temporary: false,
1480            unbounded: false,
1481            options: vec![("k1".into(), Value::SingleQuotedString("v1".into()))],
1482            constraints: vec![],
1483        });
1484        expect_parse_ok(sql, expected)?;
1485
1486        // positive case: additional options (multiple entries) can be specified
1487        let sql = "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2) LOCATION 'blahblah'";
1488        let expected = Statement::CreateExternalTable(CreateExternalTable {
1489            name: name.clone(),
1490            columns: vec![],
1491            file_type: "X".to_string(),
1492            location: "blahblah".into(),
1493            table_partition_cols: vec![],
1494            order_exprs: vec![],
1495            if_not_exists: false,
1496            or_replace: false,
1497            temporary: false,
1498            unbounded: false,
1499            options: vec![
1500                ("k1".into(), Value::SingleQuotedString("v1".into())),
1501                ("k2".into(), Value::SingleQuotedString("v2".into())),
1502            ],
1503            constraints: vec![],
1504        });
1505        expect_parse_ok(sql, expected)?;
1506
1507        // Ordered Col
1508        let sqls = [
1509            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1) LOCATION 'foo.csv'",
1510            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS FIRST) LOCATION 'foo.csv'",
1511            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS LAST) LOCATION 'foo.csv'",
1512            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC) LOCATION 'foo.csv'",
1513            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC) LOCATION 'foo.csv'",
1514            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS FIRST) LOCATION 'foo.csv'",
1515            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS LAST) LOCATION 'foo.csv'",
1516            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS FIRST) LOCATION 'foo.csv'",
1517            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS LAST) LOCATION 'foo.csv'",
1518        ];
1519        let expected = vec![
1520            (None, None),
1521            (None, Some(true)),
1522            (None, Some(false)),
1523            (Some(true), None),
1524            (Some(false), None),
1525            (Some(false), Some(true)),
1526            (Some(false), Some(false)),
1527            (Some(true), Some(true)),
1528            (Some(true), Some(false)),
1529        ];
1530        for (sql, (asc, nulls_first)) in sqls.iter().zip(expected.into_iter()) {
1531            let expected = Statement::CreateExternalTable(CreateExternalTable {
1532                name: name.clone(),
1533                columns: vec![make_column_def("c1", DataType::Int(None))],
1534                file_type: "CSV".to_string(),
1535                location: "foo.csv".into(),
1536                table_partition_cols: vec![],
1537                order_exprs: vec![vec![OrderByExpr {
1538                    expr: Identifier(Ident {
1539                        value: "c1".to_owned(),
1540                        quote_style: None,
1541                        span: Span::empty(),
1542                    }),
1543                    options: OrderByOptions { asc, nulls_first },
1544                    with_fill: None,
1545                }]],
1546                if_not_exists: false,
1547                or_replace: false,
1548                temporary: false,
1549                unbounded: false,
1550                options: vec![],
1551                constraints: vec![],
1552            });
1553            expect_parse_ok(sql, expected)?;
1554        }
1555
1556        // Ordered Col
1557        let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 ASC, c2 DESC NULLS FIRST) LOCATION 'foo.csv'";
1558        let display = None;
1559        let expected = Statement::CreateExternalTable(CreateExternalTable {
1560            name: name.clone(),
1561            columns: vec![
1562                make_column_def("c1", DataType::Int(display)),
1563                make_column_def("c2", DataType::Int(display)),
1564            ],
1565            file_type: "CSV".to_string(),
1566            location: "foo.csv".into(),
1567            table_partition_cols: vec![],
1568            order_exprs: vec![vec![
1569                OrderByExpr {
1570                    expr: Identifier(Ident {
1571                        value: "c1".to_owned(),
1572                        quote_style: None,
1573                        span: Span::empty(),
1574                    }),
1575                    options: OrderByOptions {
1576                        asc: Some(true),
1577                        nulls_first: None,
1578                    },
1579                    with_fill: None,
1580                },
1581                OrderByExpr {
1582                    expr: Identifier(Ident {
1583                        value: "c2".to_owned(),
1584                        quote_style: None,
1585                        span: Span::empty(),
1586                    }),
1587                    options: OrderByOptions {
1588                        asc: Some(false),
1589                        nulls_first: Some(true),
1590                    },
1591                    with_fill: None,
1592                },
1593            ]],
1594            if_not_exists: false,
1595            or_replace: false,
1596            temporary: false,
1597            unbounded: false,
1598            options: vec![],
1599            constraints: vec![],
1600        });
1601        expect_parse_ok(sql, expected)?;
1602
1603        // Ordered Binary op
1604        let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 - c2 ASC) LOCATION 'foo.csv'";
1605        let display = None;
1606        let expected = Statement::CreateExternalTable(CreateExternalTable {
1607            name: name.clone(),
1608            columns: vec![
1609                make_column_def("c1", DataType::Int(display)),
1610                make_column_def("c2", DataType::Int(display)),
1611            ],
1612            file_type: "CSV".to_string(),
1613            location: "foo.csv".into(),
1614            table_partition_cols: vec![],
1615            order_exprs: vec![vec![OrderByExpr {
1616                expr: Expr::BinaryOp {
1617                    left: Box::new(Identifier(Ident {
1618                        value: "c1".to_owned(),
1619                        quote_style: None,
1620                        span: Span::empty(),
1621                    })),
1622                    op: BinaryOperator::Minus,
1623                    right: Box::new(Identifier(Ident {
1624                        value: "c2".to_owned(),
1625                        quote_style: None,
1626                        span: Span::empty(),
1627                    })),
1628                },
1629                options: OrderByOptions {
1630                    asc: Some(true),
1631                    nulls_first: None,
1632                },
1633                with_fill: None,
1634            }]],
1635            if_not_exists: false,
1636            or_replace: false,
1637            temporary: false,
1638            unbounded: false,
1639            options: vec![],
1640            constraints: vec![],
1641        });
1642        expect_parse_ok(sql, expected)?;
1643
1644        // Most complete CREATE EXTERNAL TABLE statement possible (using IF NOT EXISTS)
1645        let sql = "
1646            CREATE UNBOUNDED EXTERNAL TABLE IF NOT EXISTS t (c1 int, c2 float)
1647            STORED AS PARQUET
1648            WITH ORDER (c1 - c2 ASC)
1649            PARTITIONED BY (c1)
1650            LOCATION 'foo.parquet'
1651            OPTIONS ('format.compression' 'zstd',
1652                     'format.delimiter' '*',
1653                     'ROW_GROUP_SIZE' '1024',
1654                     'TRUNCATE' 'NO',
1655                     'format.has_header' 'true')";
1656        let expected = Statement::CreateExternalTable(CreateExternalTable {
1657            name: name.clone(),
1658            columns: vec![
1659                make_column_def("c1", DataType::Int(None)),
1660                make_column_def("c2", DataType::Float(ExactNumberInfo::None)),
1661            ],
1662            file_type: "PARQUET".to_string(),
1663            location: "foo.parquet".into(),
1664            table_partition_cols: vec!["c1".into()],
1665            order_exprs: vec![vec![OrderByExpr {
1666                expr: Expr::BinaryOp {
1667                    left: Box::new(Identifier(Ident {
1668                        value: "c1".to_owned(),
1669                        quote_style: None,
1670                        span: Span::empty(),
1671                    })),
1672                    op: BinaryOperator::Minus,
1673                    right: Box::new(Identifier(Ident {
1674                        value: "c2".to_owned(),
1675                        quote_style: None,
1676                        span: Span::empty(),
1677                    })),
1678                },
1679                options: OrderByOptions {
1680                    asc: Some(true),
1681                    nulls_first: None,
1682                },
1683                with_fill: None,
1684            }]],
1685            if_not_exists: true,
1686            or_replace: false,
1687            temporary: false,
1688            unbounded: true,
1689            options: vec![
1690                (
1691                    "format.compression".into(),
1692                    Value::SingleQuotedString("zstd".into()),
1693                ),
1694                (
1695                    "format.delimiter".into(),
1696                    Value::SingleQuotedString("*".into()),
1697                ),
1698                (
1699                    "ROW_GROUP_SIZE".into(),
1700                    Value::SingleQuotedString("1024".into()),
1701                ),
1702                ("TRUNCATE".into(), Value::SingleQuotedString("NO".into())),
1703                (
1704                    "format.has_header".into(),
1705                    Value::SingleQuotedString("true".into()),
1706                ),
1707            ],
1708            constraints: vec![],
1709        });
1710        expect_parse_ok(sql, expected)?;
1711
1712        // Most complete CREATE EXTERNAL TABLE statement possible (using OR REPLACE)
1713        let sql = "
1714            CREATE OR REPLACE UNBOUNDED EXTERNAL TABLE t (c1 int, c2 float)
1715            STORED AS PARQUET
1716            WITH ORDER (c1 - c2 ASC)
1717            PARTITIONED BY (c1)
1718            LOCATION 'foo.parquet'
1719            OPTIONS ('format.compression' 'zstd',
1720                     'format.delimiter' '*',
1721                     'ROW_GROUP_SIZE' '1024',
1722                     'TRUNCATE' 'NO',
1723                     'format.has_header' 'true')";
1724        let expected = Statement::CreateExternalTable(CreateExternalTable {
1725            name: name.clone(),
1726            columns: vec![
1727                make_column_def("c1", DataType::Int(None)),
1728                make_column_def("c2", DataType::Float(ExactNumberInfo::None)),
1729            ],
1730            file_type: "PARQUET".to_string(),
1731            location: "foo.parquet".into(),
1732            table_partition_cols: vec!["c1".into()],
1733            order_exprs: vec![vec![OrderByExpr {
1734                expr: Expr::BinaryOp {
1735                    left: Box::new(Identifier(Ident {
1736                        value: "c1".to_owned(),
1737                        quote_style: None,
1738                        span: Span::empty(),
1739                    })),
1740                    op: BinaryOperator::Minus,
1741                    right: Box::new(Identifier(Ident {
1742                        value: "c2".to_owned(),
1743                        quote_style: None,
1744                        span: Span::empty(),
1745                    })),
1746                },
1747                options: OrderByOptions {
1748                    asc: Some(true),
1749                    nulls_first: None,
1750                },
1751                with_fill: None,
1752            }]],
1753            if_not_exists: false,
1754            or_replace: true,
1755            temporary: false,
1756            unbounded: true,
1757            options: vec![
1758                (
1759                    "format.compression".into(),
1760                    Value::SingleQuotedString("zstd".into()),
1761                ),
1762                (
1763                    "format.delimiter".into(),
1764                    Value::SingleQuotedString("*".into()),
1765                ),
1766                (
1767                    "ROW_GROUP_SIZE".into(),
1768                    Value::SingleQuotedString("1024".into()),
1769                ),
1770                ("TRUNCATE".into(), Value::SingleQuotedString("NO".into())),
1771                (
1772                    "format.has_header".into(),
1773                    Value::SingleQuotedString("true".into()),
1774                ),
1775            ],
1776            constraints: vec![],
1777        });
1778        expect_parse_ok(sql, expected)?;
1779
1780        // For error cases, see: `create_external_table.slt`
1781
1782        Ok(())
1783    }
1784
1785    #[test]
1786    fn copy_to_table_to_table() -> Result<(), DataFusionError> {
1787        // positive case
1788        let sql = "COPY foo TO bar STORED AS CSV";
1789        let expected = Statement::CopyTo(CopyToStatement {
1790            source: object_name("foo"),
1791            target: "bar".to_string(),
1792            partitioned_by: vec![],
1793            stored_as: Some("CSV".to_owned()),
1794            options: vec![],
1795        });
1796
1797        assert_eq!(verified_stmt(sql), expected);
1798        Ok(())
1799    }
1800
1801    #[test]
1802    fn skip_copy_into_snowflake() -> Result<(), DataFusionError> {
1803        let sql = "COPY INTO foo FROM @~/staged FILE_FORMAT = (FORMAT_NAME = 'mycsv');";
1804        let dialect = Box::new(SnowflakeDialect);
1805        let statements = DFParser::parse_sql_with_dialect(sql, dialect.as_ref())?;
1806
1807        assert_eq!(
1808            statements.len(),
1809            1,
1810            "Expected to parse exactly one statement"
1811        );
1812        if let Statement::CopyTo(_) = &statements[0] {
1813            panic!("Expected non COPY TO statement, but was successful: {statements:?}");
1814        }
1815        Ok(())
1816    }
1817
1818    #[test]
1819    fn explain_copy_to_table_to_table() -> Result<(), DataFusionError> {
1820        let cases = vec![
1821            ("EXPLAIN COPY foo TO bar STORED AS PARQUET", false, false),
1822            (
1823                "EXPLAIN ANALYZE COPY foo TO bar STORED AS PARQUET",
1824                true,
1825                false,
1826            ),
1827            (
1828                "EXPLAIN VERBOSE COPY foo TO bar STORED AS PARQUET",
1829                false,
1830                true,
1831            ),
1832            (
1833                "EXPLAIN ANALYZE VERBOSE COPY foo TO bar STORED AS PARQUET",
1834                true,
1835                true,
1836            ),
1837        ];
1838        for (sql, analyze, verbose) in cases {
1839            println!("sql: {sql}, analyze: {analyze}, verbose: {verbose}");
1840
1841            let expected_copy = Statement::CopyTo(CopyToStatement {
1842                source: object_name("foo"),
1843                target: "bar".to_string(),
1844                partitioned_by: vec![],
1845                stored_as: Some("PARQUET".to_owned()),
1846                options: vec![],
1847            });
1848            let expected = Statement::Explain(ExplainStatement {
1849                analyze,
1850                verbose,
1851                format: None,
1852                statement: Box::new(expected_copy),
1853            });
1854            assert_eq!(verified_stmt(sql), expected);
1855        }
1856        Ok(())
1857    }
1858
1859    #[test]
1860    fn copy_to_query_to_table() -> Result<(), DataFusionError> {
1861        let statement = verified_stmt("SELECT 1");
1862
1863        // unwrap the various layers
1864        let statement = if let Statement::Statement(statement) = statement {
1865            *statement
1866        } else {
1867            panic!("Expected statement, got {statement:?}");
1868        };
1869
1870        let query = if let SQLStatement::Query(query) = statement {
1871            query
1872        } else {
1873            panic!("Expected query, got {statement:?}");
1874        };
1875
1876        let sql =
1877            "COPY (SELECT 1) TO bar STORED AS CSV OPTIONS ('format.has_header' 'true')";
1878        let expected = Statement::CopyTo(CopyToStatement {
1879            source: CopyToSource::Query(query),
1880            target: "bar".to_string(),
1881            partitioned_by: vec![],
1882            stored_as: Some("CSV".to_owned()),
1883            options: vec![(
1884                "format.has_header".into(),
1885                Value::SingleQuotedString("true".into()),
1886            )],
1887        });
1888        assert_eq!(verified_stmt(sql), expected);
1889        Ok(())
1890    }
1891
1892    #[test]
1893    fn copy_to_options() -> Result<(), DataFusionError> {
1894        let sql = "COPY foo TO bar STORED AS CSV OPTIONS ('row_group_size' '55')";
1895        let expected = Statement::CopyTo(CopyToStatement {
1896            source: object_name("foo"),
1897            target: "bar".to_string(),
1898            partitioned_by: vec![],
1899            stored_as: Some("CSV".to_owned()),
1900            options: vec![(
1901                "row_group_size".to_string(),
1902                Value::SingleQuotedString("55".to_string()),
1903            )],
1904        });
1905        assert_eq!(verified_stmt(sql), expected);
1906        Ok(())
1907    }
1908
1909    #[test]
1910    fn copy_to_partitioned_by() -> Result<(), DataFusionError> {
1911        let sql = "COPY foo TO bar STORED AS CSV PARTITIONED BY (a) OPTIONS ('row_group_size' '55')";
1912        let expected = Statement::CopyTo(CopyToStatement {
1913            source: object_name("foo"),
1914            target: "bar".to_string(),
1915            partitioned_by: vec!["a".to_string()],
1916            stored_as: Some("CSV".to_owned()),
1917            options: vec![(
1918                "row_group_size".to_string(),
1919                Value::SingleQuotedString("55".to_string()),
1920            )],
1921        });
1922        assert_eq!(verified_stmt(sql), expected);
1923        Ok(())
1924    }
1925
1926    #[test]
1927    fn copy_to_multi_options() -> Result<(), DataFusionError> {
1928        // order of options is preserved
1929        let sql = "COPY foo TO bar STORED AS parquet OPTIONS ('format.row_group_size' 55, 'format.compression' snappy, 'execution.keep_partition_by_columns' true)";
1930
1931        let expected_options = vec![
1932            (
1933                "format.row_group_size".to_string(),
1934                Value::Number("55".to_string(), false),
1935            ),
1936            (
1937                "format.compression".to_string(),
1938                Value::SingleQuotedString("snappy".to_string()),
1939            ),
1940            (
1941                "execution.keep_partition_by_columns".to_string(),
1942                Value::SingleQuotedString("true".to_string()),
1943            ),
1944        ];
1945
1946        let mut statements = DFParser::parse_sql(sql).unwrap();
1947        assert_eq!(statements.len(), 1);
1948        let only_statement = statements.pop_front().unwrap();
1949
1950        let options = if let Statement::CopyTo(copy_to) = only_statement {
1951            copy_to.options
1952        } else {
1953            panic!("Expected copy");
1954        };
1955
1956        assert_eq!(options, expected_options);
1957
1958        Ok(())
1959    }
1960
1961    // For error cases, see: `copy.slt`
1962
1963    fn object_name(name: &str) -> CopyToSource {
1964        CopyToSource::Relation(ObjectName::from(vec![Ident::new(name)]))
1965    }
1966
1967    // Based on  sqlparser-rs
1968    // https://github.com/sqlparser-rs/sqlparser-rs/blob/ae3b5844c839072c235965fe0d1bddc473dced87/src/test_utils.rs#L104-L116
1969
1970    /// Ensures that `sql` parses as a single [Statement]
1971    ///
1972    /// If `canonical` is non empty,this function additionally asserts
1973    /// that:
1974    ///
1975    /// 1. parsing `sql` results in the same [`Statement`] as parsing
1976    ///    `canonical`.
1977    ///
1978    /// 2. re-serializing the result of parsing `sql` produces the same
1979    ///    `canonical` sql string
1980    fn one_statement_parses_to(sql: &str, canonical: &str) -> Statement {
1981        let mut statements = DFParser::parse_sql(sql).unwrap();
1982        assert_eq!(statements.len(), 1);
1983
1984        if sql != canonical {
1985            assert_eq!(DFParser::parse_sql(canonical).unwrap(), statements);
1986        }
1987
1988        let only_statement = statements.pop_front().unwrap();
1989        assert_eq!(
1990            canonical.to_uppercase(),
1991            only_statement.to_string().to_uppercase()
1992        );
1993        only_statement
1994    }
1995
1996    /// Ensures that `sql` parses as a single [Statement], and that
1997    /// re-serializing the parse result produces the same `sql`
1998    /// string (is not modified after a serialization round-trip).
1999    fn verified_stmt(sql: &str) -> Statement {
2000        one_statement_parses_to(sql, sql)
2001    }
2002
2003    #[test]
2004    /// Checks the recursion limit works for sql queries
2005    /// Recursion can happen easily with binary exprs (i.e, AND or OR)
2006    fn test_recursion_limit() {
2007        let sql = "SELECT 1 OR 2";
2008
2009        // Expect parse to succeed
2010        DFParserBuilder::new(sql)
2011            .build()
2012            .unwrap()
2013            .parse_statements()
2014            .unwrap();
2015
2016        let err = DFParserBuilder::new(sql)
2017            .with_recursion_limit(1)
2018            .build()
2019            .unwrap()
2020            .parse_statements()
2021            .unwrap_err();
2022
2023        assert_contains!(
2024            err.to_string(),
2025            "SQL error: RecursionLimitExceeded (current limit: 1)"
2026        );
2027    }
2028
2029    fn expect_parse_expr_ok(sql: &str, expected: ExprWithAlias) {
2030        let expr = DFParser::parse_sql_into_expr(sql).unwrap();
2031        assert_eq!(expr, expected, "actual:\n{expr:#?}");
2032    }
2033
2034    /// Parses sql and asserts that the expected error message was found
2035    fn expect_parse_expr_error(sql: &str, expected_error: &str) {
2036        match DFParser::parse_sql_into_expr(sql) {
2037            Ok(expr) => {
2038                panic!("Expected parse error for '{sql}', but was successful: {expr:#?}");
2039            }
2040            Err(e) => {
2041                let error_message = e.to_string();
2042                assert!(
2043                    error_message.contains(expected_error),
2044                    "Expected error '{expected_error}' not found in actual error '{error_message}'"
2045                );
2046            }
2047        }
2048    }
2049
2050    #[test]
2051    fn literal() {
2052        expect_parse_expr_ok(
2053            "1234",
2054            ExprWithAlias {
2055                expr: Expr::Value(ValueWithSpan::from(Value::Number(
2056                    "1234".to_string(),
2057                    false,
2058                ))),
2059                alias: None,
2060            },
2061        )
2062    }
2063
2064    #[test]
2065    fn literal_with_alias() {
2066        expect_parse_expr_ok(
2067            "1234 as foo",
2068            ExprWithAlias {
2069                expr: Expr::Value(ValueWithSpan::from(Value::Number(
2070                    "1234".to_string(),
2071                    false,
2072                ))),
2073                alias: Some(Ident::from("foo")),
2074            },
2075        )
2076    }
2077
2078    #[test]
2079    fn literal_with_alias_and_trailing_tokens() {
2080        expect_parse_expr_error(
2081            "1234 as foo.bar",
2082            "Expected: end of expression, found: .",
2083        )
2084    }
2085
2086    #[test]
2087    fn literal_with_alias_and_trailing_whitespace() {
2088        expect_parse_expr_ok(
2089            "1234 as foo   ",
2090            ExprWithAlias {
2091                expr: Expr::Value(ValueWithSpan::from(Value::Number(
2092                    "1234".to_string(),
2093                    false,
2094                ))),
2095                alias: Some(Ident::from("foo")),
2096            },
2097        )
2098    }
2099
2100    #[test]
2101    fn literal_with_alias_and_trailing_whitespace_and_token() {
2102        expect_parse_expr_error(
2103            "1234 as foo    bar",
2104            "Expected: end of expression, found: bar",
2105        )
2106    }
2107}