datafusion_sql/
parser.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`DFParser`]: DataFusion SQL Parser based on [`sqlparser`]
19//!
20//! This parser implements DataFusion specific statements such as
21//! `CREATE EXTERNAL TABLE`
22
23use datafusion_common::config::SqlParserOptions;
24use datafusion_common::DataFusionError;
25use datafusion_common::{sql_err, Diagnostic, Span};
26use sqlparser::ast::{ExprWithAlias, OrderByOptions};
27use sqlparser::tokenizer::TokenWithSpan;
28use sqlparser::{
29    ast::{
30        ColumnDef, ColumnOptionDef, ObjectName, OrderByExpr, Query,
31        Statement as SQLStatement, TableConstraint, Value,
32    },
33    dialect::{keywords::Keyword, Dialect, GenericDialect},
34    parser::{Parser, ParserError},
35    tokenizer::{Token, Tokenizer, Word},
36};
37use std::collections::VecDeque;
38use std::fmt;
39
40// Use `Parser::expected` instead, if possible
41macro_rules! parser_err {
42    ($MSG:expr $(; diagnostic = $DIAG:expr)?) => {{
43
44        let err = DataFusionError::from(ParserError::ParserError($MSG.to_string()));
45        $(
46            let err = err.with_diagnostic($DIAG);
47        )?
48        Err(err)
49    }};
50}
51
52fn parse_file_type(s: &str) -> Result<String, DataFusionError> {
53    Ok(s.to_uppercase())
54}
55
56/// DataFusion specific `EXPLAIN`
57///
58/// Syntax:
59/// ```sql
60/// EXPLAIN <ANALYZE> <VERBOSE> [FORMAT format] statement
61///```
62#[derive(Debug, Clone, PartialEq, Eq)]
63pub struct ExplainStatement {
64    /// `EXPLAIN ANALYZE ..`
65    pub analyze: bool,
66    /// `EXPLAIN .. VERBOSE ..`
67    pub verbose: bool,
68    /// `EXPLAIN .. FORMAT `
69    pub format: Option<String>,
70    /// The statement to analyze. Note this is a DataFusion [`Statement`] (not a
71    /// [`sqlparser::ast::Statement`] so that we can use `EXPLAIN`, `COPY`, and other
72    /// DataFusion specific statements
73    pub statement: Box<Statement>,
74}
75
76impl fmt::Display for ExplainStatement {
77    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
78        let Self {
79            analyze,
80            verbose,
81            format,
82            statement,
83        } = self;
84
85        write!(f, "EXPLAIN ")?;
86        if *analyze {
87            write!(f, "ANALYZE ")?;
88        }
89        if *verbose {
90            write!(f, "VERBOSE ")?;
91        }
92        if let Some(format) = format.as_ref() {
93            write!(f, "FORMAT {format} ")?;
94        }
95
96        write!(f, "{statement}")
97    }
98}
99
100/// DataFusion extension DDL for `COPY`
101///
102/// # Syntax:
103///
104/// ```text
105/// COPY <table_name | (<query>)>
106/// TO
107/// <destination_url>
108/// (key_value_list)
109/// ```
110///
111/// # Examples
112///
113/// ```sql
114/// COPY lineitem  TO 'lineitem'
115/// STORED AS PARQUET (
116///   partitions 16,
117///   row_group_limit_rows 100000,
118///   row_group_limit_bytes 200000
119/// )
120///
121/// COPY (SELECT l_orderkey from lineitem) to 'lineitem.parquet';
122/// ```
123#[derive(Debug, Clone, PartialEq, Eq)]
124pub struct CopyToStatement {
125    /// From where the data comes from
126    pub source: CopyToSource,
127    /// The URL to where the data is heading
128    pub target: String,
129    /// Partition keys
130    pub partitioned_by: Vec<String>,
131    /// File type (Parquet, NDJSON, CSV etc.)
132    pub stored_as: Option<String>,
133    /// Target specific options
134    pub options: Vec<(String, Value)>,
135}
136
137impl fmt::Display for CopyToStatement {
138    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
139        let Self {
140            source,
141            target,
142            partitioned_by,
143            stored_as,
144            options,
145            ..
146        } = self;
147
148        write!(f, "COPY {source} TO {target}")?;
149        if let Some(file_type) = stored_as {
150            write!(f, " STORED AS {file_type}")?;
151        }
152        if !partitioned_by.is_empty() {
153            write!(f, " PARTITIONED BY ({})", partitioned_by.join(", "))?;
154        }
155
156        if !options.is_empty() {
157            let opts: Vec<_> =
158                options.iter().map(|(k, v)| format!("'{k}' {v}")).collect();
159            write!(f, " OPTIONS ({})", opts.join(", "))?;
160        }
161
162        Ok(())
163    }
164}
165
166#[derive(Debug, Clone, PartialEq, Eq)]
167pub enum CopyToSource {
168    /// `COPY <table> TO ...`
169    Relation(ObjectName),
170    /// COPY (...query...) TO ...
171    Query(Box<Query>),
172}
173
174impl fmt::Display for CopyToSource {
175    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
176        match self {
177            CopyToSource::Relation(r) => write!(f, "{r}"),
178            CopyToSource::Query(q) => write!(f, "({q})"),
179        }
180    }
181}
182
183/// This type defines a lexicographical ordering.
184pub(crate) type LexOrdering = Vec<OrderByExpr>;
185
186/// DataFusion extension DDL for `CREATE EXTERNAL TABLE`
187///
188/// Syntax:
189///
190/// ```text
191/// CREATE EXTERNAL TABLE
192/// [ IF NOT EXISTS ]
193/// <TABLE_NAME>[ (<column_definition>) ]
194/// STORED AS <file_type>
195/// [ PARTITIONED BY (<column_definition list> | <column list>) ]
196/// [ WITH ORDER (<ordered column list>)
197/// [ OPTIONS (<key_value_list>) ]
198/// LOCATION <literal>
199///
200/// <column_definition> := (<column_name> <data_type>, ...)
201///
202/// <column_list> := (<column_name>, ...)
203///
204/// <ordered_column_list> := (<column_name> <sort_clause>, ...)
205///
206/// <key_value_list> := (<literal> <literal, <literal> <literal>, ...)
207/// ```
208#[derive(Debug, Clone, PartialEq, Eq)]
209pub struct CreateExternalTable {
210    /// Table name
211    pub name: ObjectName,
212    /// Optional schema
213    pub columns: Vec<ColumnDef>,
214    /// File type (Parquet, NDJSON, CSV, etc)
215    pub file_type: String,
216    /// Path to file
217    pub location: String,
218    /// Partition Columns
219    pub table_partition_cols: Vec<String>,
220    /// Ordered expressions
221    pub order_exprs: Vec<LexOrdering>,
222    /// Option to not error if table already exists
223    pub if_not_exists: bool,
224    /// Whether the table is a temporary table
225    pub temporary: bool,
226    /// Infinite streams?
227    pub unbounded: bool,
228    /// Table(provider) specific options
229    pub options: Vec<(String, Value)>,
230    /// A table-level constraint
231    pub constraints: Vec<TableConstraint>,
232}
233
234impl fmt::Display for CreateExternalTable {
235    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
236        write!(f, "CREATE EXTERNAL TABLE ")?;
237        if self.if_not_exists {
238            write!(f, "IF NOT EXISTS ")?;
239        }
240        write!(f, "{} ", self.name)?;
241        write!(f, "STORED AS {} ", self.file_type)?;
242        write!(f, "LOCATION {} ", self.location)
243    }
244}
245
246/// DataFusion SQL Statement.
247///
248/// This can either be a [`Statement`] from [`sqlparser`] from a
249/// standard SQL dialect, or a DataFusion extension such as `CREATE
250/// EXTERNAL TABLE`. See [`DFParser`] for more information.
251///
252/// [`Statement`]: sqlparser::ast::Statement
253#[derive(Debug, Clone, PartialEq, Eq)]
254pub enum Statement {
255    /// ANSI SQL AST node (from sqlparser-rs)
256    Statement(Box<SQLStatement>),
257    /// Extension: `CREATE EXTERNAL TABLE`
258    CreateExternalTable(CreateExternalTable),
259    /// Extension: `COPY TO`
260    CopyTo(CopyToStatement),
261    /// EXPLAIN for extensions
262    Explain(ExplainStatement),
263}
264
265impl fmt::Display for Statement {
266    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
267        match self {
268            Statement::Statement(stmt) => write!(f, "{stmt}"),
269            Statement::CreateExternalTable(stmt) => write!(f, "{stmt}"),
270            Statement::CopyTo(stmt) => write!(f, "{stmt}"),
271            Statement::Explain(stmt) => write!(f, "{stmt}"),
272        }
273    }
274}
275
276fn ensure_not_set<T>(field: &Option<T>, name: &str) -> Result<(), DataFusionError> {
277    if field.is_some() {
278        parser_err!(format!("{name} specified more than once",))?
279    }
280    Ok(())
281}
282
283/// DataFusion SQL Parser based on [`sqlparser`]
284///
285/// Parses DataFusion's SQL dialect, often delegating to [`sqlparser`]'s [`Parser`].
286///
287/// DataFusion mostly follows existing SQL dialects via
288/// `sqlparser`. However, certain statements such as `COPY` and
289/// `CREATE EXTERNAL TABLE` have special syntax in DataFusion. See
290/// [`Statement`] for a list of this special syntax
291pub struct DFParser<'a> {
292    pub parser: Parser<'a>,
293    options: SqlParserOptions,
294}
295
296/// Same as `sqlparser`
297const DEFAULT_RECURSION_LIMIT: usize = 50;
298const DEFAULT_DIALECT: GenericDialect = GenericDialect {};
299
300/// Builder for [`DFParser`]
301///
302/// # Example: Create and Parse SQL statements
303/// ```
304/// # use datafusion_sql::parser::DFParserBuilder;
305/// # use datafusion_common::Result;
306/// # fn test() -> Result<()> {
307/// let mut parser = DFParserBuilder::new("SELECT * FROM foo; SELECT 1 + 2")
308///   .build()?;
309/// // parse the SQL into DFStatements
310/// let statements = parser.parse_statements()?;
311/// assert_eq!(statements.len(), 2);
312/// # Ok(())
313/// # }
314/// ```
315///
316/// # Example: Create and Parse expression with a different dialect
317/// ```
318/// # use datafusion_sql::parser::DFParserBuilder;
319/// # use datafusion_common::Result;
320/// # use datafusion_sql::sqlparser::dialect::MySqlDialect;
321/// # use datafusion_sql::sqlparser::ast::Expr;
322/// # fn test() -> Result<()> {
323/// let dialect = MySqlDialect{}; // Parse using MySQL dialect
324/// let mut parser = DFParserBuilder::new("1 + 2")
325///   .with_dialect(&dialect)
326///   .build()?;
327/// // parse 1+2 into an sqlparser::ast::Expr
328/// let res = parser.parse_expr()?;
329/// assert!(matches!(res.expr, Expr::BinaryOp {..}));
330/// # Ok(())
331/// # }
332/// ```
333pub struct DFParserBuilder<'a> {
334    /// The SQL string to parse
335    sql: &'a str,
336    /// The Dialect to use (defaults to [`GenericDialect`]
337    dialect: &'a dyn Dialect,
338    /// The recursion limit while parsing
339    recursion_limit: usize,
340}
341
342impl<'a> DFParserBuilder<'a> {
343    /// Create a new parser builder for the specified tokens using the
344    /// [`GenericDialect`].
345    pub fn new(sql: &'a str) -> Self {
346        Self {
347            sql,
348            dialect: &DEFAULT_DIALECT,
349            recursion_limit: DEFAULT_RECURSION_LIMIT,
350        }
351    }
352
353    /// Adjust the parser builder's dialect. Defaults to [`GenericDialect`]
354    pub fn with_dialect(mut self, dialect: &'a dyn Dialect) -> Self {
355        self.dialect = dialect;
356        self
357    }
358
359    /// Adjust the recursion limit of sql parsing.  Defaults to 50
360    pub fn with_recursion_limit(mut self, recursion_limit: usize) -> Self {
361        self.recursion_limit = recursion_limit;
362        self
363    }
364
365    pub fn build(self) -> Result<DFParser<'a>, DataFusionError> {
366        let mut tokenizer = Tokenizer::new(self.dialect, self.sql);
367        // Convert TokenizerError -> ParserError
368        let tokens = tokenizer
369            .tokenize_with_location()
370            .map_err(ParserError::from)?;
371
372        Ok(DFParser {
373            parser: Parser::new(self.dialect)
374                .with_tokens_with_locations(tokens)
375                .with_recursion_limit(self.recursion_limit),
376            options: SqlParserOptions {
377                recursion_limit: self.recursion_limit,
378                ..Default::default()
379            },
380        })
381    }
382}
383
384impl<'a> DFParser<'a> {
385    #[deprecated(since = "46.0.0", note = "DFParserBuilder")]
386    pub fn new(sql: &'a str) -> Result<Self, DataFusionError> {
387        DFParserBuilder::new(sql).build()
388    }
389
390    #[deprecated(since = "46.0.0", note = "DFParserBuilder")]
391    pub fn new_with_dialect(
392        sql: &'a str,
393        dialect: &'a dyn Dialect,
394    ) -> Result<Self, DataFusionError> {
395        DFParserBuilder::new(sql).with_dialect(dialect).build()
396    }
397
398    /// Parse a sql string into one or [`Statement`]s using the
399    /// [`GenericDialect`].
400    pub fn parse_sql(sql: &'a str) -> Result<VecDeque<Statement>, DataFusionError> {
401        let mut parser = DFParserBuilder::new(sql).build()?;
402
403        parser.parse_statements()
404    }
405
406    /// Parse a SQL string and produce one or more [`Statement`]s with
407    /// with the specified dialect.
408    pub fn parse_sql_with_dialect(
409        sql: &str,
410        dialect: &dyn Dialect,
411    ) -> Result<VecDeque<Statement>, DataFusionError> {
412        let mut parser = DFParserBuilder::new(sql).with_dialect(dialect).build()?;
413        parser.parse_statements()
414    }
415
416    pub fn parse_sql_into_expr(sql: &str) -> Result<ExprWithAlias, DataFusionError> {
417        DFParserBuilder::new(sql).build()?.parse_into_expr()
418    }
419
420    pub fn parse_sql_into_expr_with_dialect(
421        sql: &str,
422        dialect: &dyn Dialect,
423    ) -> Result<ExprWithAlias, DataFusionError> {
424        DFParserBuilder::new(sql)
425            .with_dialect(dialect)
426            .build()?
427            .parse_into_expr()
428    }
429
430    /// Parse a sql string into one or [`Statement`]s
431    pub fn parse_statements(&mut self) -> Result<VecDeque<Statement>, DataFusionError> {
432        let mut stmts = VecDeque::new();
433        let mut expecting_statement_delimiter = false;
434        loop {
435            // ignore empty statements (between successive statement delimiters)
436            while self.parser.consume_token(&Token::SemiColon) {
437                expecting_statement_delimiter = false;
438            }
439
440            if self.parser.peek_token() == Token::EOF {
441                break;
442            }
443            if expecting_statement_delimiter {
444                return self.expected("end of statement", self.parser.peek_token());
445            }
446
447            let statement = self.parse_statement()?;
448            stmts.push_back(statement);
449            expecting_statement_delimiter = true;
450        }
451        Ok(stmts)
452    }
453
454    /// Report an unexpected token
455    fn expected<T>(
456        &self,
457        expected: &str,
458        found: TokenWithSpan,
459    ) -> Result<T, DataFusionError> {
460        let sql_parser_span = found.span;
461        let span = Span::try_from_sqlparser_span(sql_parser_span);
462        let diagnostic = Diagnostic::new_error(
463            format!("Expected: {expected}, found: {found}{}", found.span.start),
464            span,
465        );
466        parser_err!(
467            format!("Expected: {expected}, found: {found}{}", found.span.start);
468            diagnostic=
469            diagnostic
470        )
471    }
472
473    fn expect_token(
474        &mut self,
475        expected: &str,
476        token: Token,
477    ) -> Result<(), DataFusionError> {
478        let next_token = self.parser.peek_token_ref();
479        if next_token.token != token {
480            self.expected(expected, next_token.clone())
481        } else {
482            Ok(())
483        }
484    }
485
486    /// Parse a new expression
487    pub fn parse_statement(&mut self) -> Result<Statement, DataFusionError> {
488        match self.parser.peek_token().token {
489            Token::Word(w) => {
490                match w.keyword {
491                    Keyword::CREATE => {
492                        self.parser.next_token(); // CREATE
493                        self.parse_create()
494                    }
495                    Keyword::COPY => {
496                        if let Token::Word(w) = self.parser.peek_nth_token(1).token {
497                            // use native parser for COPY INTO
498                            if w.keyword == Keyword::INTO {
499                                return self.parse_and_handle_statement();
500                            }
501                        }
502                        self.parser.next_token(); // COPY
503                        self.parse_copy()
504                    }
505                    Keyword::EXPLAIN => {
506                        self.parser.next_token(); // EXPLAIN
507                        self.parse_explain()
508                    }
509                    _ => {
510                        // use sqlparser-rs parser
511                        self.parse_and_handle_statement()
512                    }
513                }
514            }
515            _ => {
516                // use the native parser
517                self.parse_and_handle_statement()
518            }
519        }
520    }
521
522    pub fn parse_expr(&mut self) -> Result<ExprWithAlias, DataFusionError> {
523        if let Token::Word(w) = self.parser.peek_token().token {
524            match w.keyword {
525                Keyword::CREATE | Keyword::COPY | Keyword::EXPLAIN => {
526                    return parser_err!("Unsupported command in expression")?;
527                }
528                _ => {}
529            }
530        }
531
532        Ok(self.parser.parse_expr_with_alias()?)
533    }
534
535    /// Parses the entire SQL string into an expression.
536    ///
537    /// In contrast to [`DFParser::parse_expr`], this function will report an error if the input
538    /// contains any trailing, unparsed tokens.
539    pub fn parse_into_expr(&mut self) -> Result<ExprWithAlias, DataFusionError> {
540        let expr = self.parse_expr()?;
541        self.expect_token("end of expression", Token::EOF)?;
542        Ok(expr)
543    }
544
545    /// Helper method to parse a statement and handle errors consistently, especially for recursion limits
546    fn parse_and_handle_statement(&mut self) -> Result<Statement, DataFusionError> {
547        self.parser
548            .parse_statement()
549            .map(|stmt| Statement::Statement(Box::from(stmt)))
550            .map_err(|e| match e {
551                ParserError::RecursionLimitExceeded => DataFusionError::SQL(
552                    Box::new(ParserError::RecursionLimitExceeded),
553                    Some(format!(
554                        " (current limit: {})",
555                        self.options.recursion_limit
556                    )),
557                ),
558                other => DataFusionError::SQL(Box::new(other), None),
559            })
560    }
561
562    /// Parse a SQL `COPY TO` statement
563    pub fn parse_copy(&mut self) -> Result<Statement, DataFusionError> {
564        // parse as a query
565        let source = if self.parser.consume_token(&Token::LParen) {
566            let query = self.parser.parse_query()?;
567            self.parser.expect_token(&Token::RParen)?;
568            CopyToSource::Query(query)
569        } else {
570            // parse as table reference
571            let table_name = self.parser.parse_object_name(true)?;
572            CopyToSource::Relation(table_name)
573        };
574
575        #[derive(Default)]
576        struct Builder {
577            stored_as: Option<String>,
578            target: Option<String>,
579            partitioned_by: Option<Vec<String>>,
580            options: Option<Vec<(String, Value)>>,
581        }
582
583        let mut builder = Builder::default();
584
585        loop {
586            if let Some(keyword) = self.parser.parse_one_of_keywords(&[
587                Keyword::STORED,
588                Keyword::TO,
589                Keyword::PARTITIONED,
590                Keyword::OPTIONS,
591                Keyword::WITH,
592            ]) {
593                match keyword {
594                    Keyword::STORED => {
595                        self.parser.expect_keyword(Keyword::AS)?;
596                        ensure_not_set(&builder.stored_as, "STORED AS")?;
597                        builder.stored_as = Some(self.parse_file_format()?);
598                    }
599                    Keyword::TO => {
600                        ensure_not_set(&builder.target, "TO")?;
601                        builder.target = Some(self.parser.parse_literal_string()?);
602                    }
603                    Keyword::WITH => {
604                        self.parser.expect_keyword(Keyword::HEADER)?;
605                        self.parser.expect_keyword(Keyword::ROW)?;
606                        return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')")?;
607                    }
608                    Keyword::PARTITIONED => {
609                        self.parser.expect_keyword(Keyword::BY)?;
610                        ensure_not_set(&builder.partitioned_by, "PARTITIONED BY")?;
611                        builder.partitioned_by = Some(self.parse_partitions()?);
612                    }
613                    Keyword::OPTIONS => {
614                        ensure_not_set(&builder.options, "OPTIONS")?;
615                        builder.options = Some(self.parse_value_options()?);
616                    }
617                    _ => {
618                        unreachable!()
619                    }
620                }
621            } else {
622                let token = self.parser.next_token();
623                if token == Token::EOF || token == Token::SemiColon {
624                    break;
625                } else {
626                    return self.expected("end of statement or ;", token)?;
627                }
628            }
629        }
630
631        let Some(target) = builder.target else {
632            return parser_err!("Missing TO clause in COPY statement")?;
633        };
634
635        Ok(Statement::CopyTo(CopyToStatement {
636            source,
637            target,
638            partitioned_by: builder.partitioned_by.unwrap_or(vec![]),
639            stored_as: builder.stored_as,
640            options: builder.options.unwrap_or(vec![]),
641        }))
642    }
643
644    /// Parse the next token as a key name for an option list
645    ///
646    /// Note this is different than [`parse_literal_string`]
647    /// because it allows keywords as well as other non words
648    ///
649    /// [`parse_literal_string`]: sqlparser::parser::Parser::parse_literal_string
650    pub fn parse_option_key(&mut self) -> Result<String, DataFusionError> {
651        let next_token = self.parser.next_token();
652        match next_token.token {
653            Token::Word(Word { value, .. }) => {
654                let mut parts = vec![value];
655                while self.parser.consume_token(&Token::Period) {
656                    let next_token = self.parser.next_token();
657                    if let Token::Word(Word { value, .. }) = next_token.token {
658                        parts.push(value);
659                    } else {
660                        // Unquoted namespaced keys have to conform to the syntax
661                        // "<WORD>[\.<WORD>]*". If we have a key that breaks this
662                        // pattern, error out:
663                        return self.expected("key name", next_token);
664                    }
665                }
666                Ok(parts.join("."))
667            }
668            Token::SingleQuotedString(s) => Ok(s),
669            Token::DoubleQuotedString(s) => Ok(s),
670            Token::EscapedStringLiteral(s) => Ok(s),
671            _ => self.expected("key name", next_token),
672        }
673    }
674
675    /// Parse the next token as a value for an option list
676    ///
677    /// Note this is different than [`parse_value`] as it allows any
678    /// word or keyword in this location.
679    ///
680    /// [`parse_value`]: sqlparser::parser::Parser::parse_value
681    pub fn parse_option_value(&mut self) -> Result<Value, DataFusionError> {
682        let next_token = self.parser.next_token();
683        match next_token.token {
684            // e.g. things like "snappy" or "gzip" that may be keywords
685            Token::Word(word) => Ok(Value::SingleQuotedString(word.value)),
686            Token::SingleQuotedString(s) => Ok(Value::SingleQuotedString(s)),
687            Token::DoubleQuotedString(s) => Ok(Value::DoubleQuotedString(s)),
688            Token::EscapedStringLiteral(s) => Ok(Value::EscapedStringLiteral(s)),
689            Token::Number(n, l) => Ok(Value::Number(n, l)),
690            _ => self.expected("string or numeric value", next_token),
691        }
692    }
693
694    /// Parse a SQL `EXPLAIN`
695    pub fn parse_explain(&mut self) -> Result<Statement, DataFusionError> {
696        let analyze = self.parser.parse_keyword(Keyword::ANALYZE);
697        let verbose = self.parser.parse_keyword(Keyword::VERBOSE);
698        let format = self.parse_explain_format()?;
699
700        let statement = self.parse_statement()?;
701
702        Ok(Statement::Explain(ExplainStatement {
703            statement: Box::new(statement),
704            analyze,
705            verbose,
706            format,
707        }))
708    }
709
710    pub fn parse_explain_format(&mut self) -> Result<Option<String>, DataFusionError> {
711        if !self.parser.parse_keyword(Keyword::FORMAT) {
712            return Ok(None);
713        }
714
715        let next_token = self.parser.next_token();
716        let format = match next_token.token {
717            Token::Word(w) => Ok(w.value),
718            Token::SingleQuotedString(w) => Ok(w),
719            Token::DoubleQuotedString(w) => Ok(w),
720            _ => self.expected("an explain format such as TREE", next_token),
721        }?;
722        Ok(Some(format))
723    }
724
725    /// Parse a SQL `CREATE` statement handling `CREATE EXTERNAL TABLE`
726    pub fn parse_create(&mut self) -> Result<Statement, DataFusionError> {
727        if self.parser.parse_keyword(Keyword::EXTERNAL) {
728            self.parse_create_external_table(false)
729        } else if self.parser.parse_keyword(Keyword::UNBOUNDED) {
730            self.parser.expect_keyword(Keyword::EXTERNAL)?;
731            self.parse_create_external_table(true)
732        } else {
733            Ok(Statement::Statement(Box::from(self.parser.parse_create()?)))
734        }
735    }
736
737    fn parse_partitions(&mut self) -> Result<Vec<String>, DataFusionError> {
738        let mut partitions: Vec<String> = vec![];
739        if !self.parser.consume_token(&Token::LParen)
740            || self.parser.consume_token(&Token::RParen)
741        {
742            return Ok(partitions);
743        }
744
745        loop {
746            if let Token::Word(_) = self.parser.peek_token().token {
747                let identifier = self.parser.parse_identifier()?;
748                partitions.push(identifier.to_string());
749            } else {
750                return self.expected("partition name", self.parser.peek_token());
751            }
752            let comma = self.parser.consume_token(&Token::Comma);
753            if self.parser.consume_token(&Token::RParen) {
754                // allow a trailing comma, even though it's not in standard
755                break;
756            } else if !comma {
757                return self.expected(
758                    "',' or ')' after partition definition",
759                    self.parser.peek_token(),
760                );
761            }
762        }
763        Ok(partitions)
764    }
765
766    /// Parse the ordering clause of a `CREATE EXTERNAL TABLE` SQL statement
767    pub fn parse_order_by_exprs(&mut self) -> Result<Vec<OrderByExpr>, DataFusionError> {
768        let mut values = vec![];
769        self.parser.expect_token(&Token::LParen)?;
770        loop {
771            values.push(self.parse_order_by_expr()?);
772            if !self.parser.consume_token(&Token::Comma) {
773                self.parser.expect_token(&Token::RParen)?;
774                return Ok(values);
775            }
776        }
777    }
778
779    /// Parse an ORDER BY sub-expression optionally followed by ASC or DESC.
780    pub fn parse_order_by_expr(&mut self) -> Result<OrderByExpr, DataFusionError> {
781        let expr = self.parser.parse_expr()?;
782
783        let asc = if self.parser.parse_keyword(Keyword::ASC) {
784            Some(true)
785        } else if self.parser.parse_keyword(Keyword::DESC) {
786            Some(false)
787        } else {
788            None
789        };
790
791        let nulls_first = if self
792            .parser
793            .parse_keywords(&[Keyword::NULLS, Keyword::FIRST])
794        {
795            Some(true)
796        } else if self.parser.parse_keywords(&[Keyword::NULLS, Keyword::LAST]) {
797            Some(false)
798        } else {
799            None
800        };
801
802        Ok(OrderByExpr {
803            expr,
804            options: OrderByOptions { asc, nulls_first },
805            with_fill: None,
806        })
807    }
808
809    // This is a copy of the equivalent implementation in sqlparser.
810    fn parse_columns(
811        &mut self,
812    ) -> Result<(Vec<ColumnDef>, Vec<TableConstraint>), DataFusionError> {
813        let mut columns = vec![];
814        let mut constraints = vec![];
815        if !self.parser.consume_token(&Token::LParen)
816            || self.parser.consume_token(&Token::RParen)
817        {
818            return Ok((columns, constraints));
819        }
820
821        loop {
822            if let Some(constraint) = self.parser.parse_optional_table_constraint()? {
823                constraints.push(constraint);
824            } else if let Token::Word(_) = self.parser.peek_token().token {
825                let column_def = self.parse_column_def()?;
826                columns.push(column_def);
827            } else {
828                return self.expected(
829                    "column name or constraint definition",
830                    self.parser.peek_token(),
831                );
832            }
833            let comma = self.parser.consume_token(&Token::Comma);
834            if self.parser.consume_token(&Token::RParen) {
835                // allow a trailing comma, even though it's not in standard
836                break;
837            } else if !comma {
838                return self.expected(
839                    "',' or ')' after column definition",
840                    self.parser.peek_token(),
841                );
842            }
843        }
844
845        Ok((columns, constraints))
846    }
847
848    fn parse_column_def(&mut self) -> Result<ColumnDef, DataFusionError> {
849        let name = self.parser.parse_identifier()?;
850        let data_type = self.parser.parse_data_type()?;
851        let mut options = vec![];
852        loop {
853            if self.parser.parse_keyword(Keyword::CONSTRAINT) {
854                let name = Some(self.parser.parse_identifier()?);
855                if let Some(option) = self.parser.parse_optional_column_option()? {
856                    options.push(ColumnOptionDef { name, option });
857                } else {
858                    return self.expected(
859                        "constraint details after CONSTRAINT <name>",
860                        self.parser.peek_token(),
861                    );
862                }
863            } else if let Some(option) = self.parser.parse_optional_column_option()? {
864                options.push(ColumnOptionDef { name: None, option });
865            } else {
866                break;
867            };
868        }
869        Ok(ColumnDef {
870            name,
871            data_type,
872            options,
873        })
874    }
875
876    fn parse_create_external_table(
877        &mut self,
878        unbounded: bool,
879    ) -> Result<Statement, DataFusionError> {
880        let temporary = self
881            .parser
882            .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY])
883            .is_some();
884        self.parser.expect_keyword(Keyword::TABLE)?;
885        let if_not_exists =
886            self.parser
887                .parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]);
888        let table_name = self.parser.parse_object_name(true)?;
889        let (mut columns, constraints) = self.parse_columns()?;
890
891        #[derive(Default)]
892        struct Builder {
893            file_type: Option<String>,
894            location: Option<String>,
895            table_partition_cols: Option<Vec<String>>,
896            order_exprs: Vec<LexOrdering>,
897            options: Option<Vec<(String, Value)>>,
898        }
899        let mut builder = Builder::default();
900
901        loop {
902            if let Some(keyword) = self.parser.parse_one_of_keywords(&[
903                Keyword::STORED,
904                Keyword::LOCATION,
905                Keyword::WITH,
906                Keyword::DELIMITER,
907                Keyword::COMPRESSION,
908                Keyword::PARTITIONED,
909                Keyword::OPTIONS,
910            ]) {
911                match keyword {
912                    Keyword::STORED => {
913                        self.parser.expect_keyword(Keyword::AS)?;
914                        ensure_not_set(&builder.file_type, "STORED AS")?;
915                        builder.file_type = Some(self.parse_file_format()?);
916                    }
917                    Keyword::LOCATION => {
918                        ensure_not_set(&builder.location, "LOCATION")?;
919                        builder.location = Some(self.parser.parse_literal_string()?);
920                    }
921                    Keyword::WITH => {
922                        if self.parser.parse_keyword(Keyword::ORDER) {
923                            builder.order_exprs.push(self.parse_order_by_exprs()?);
924                        } else {
925                            self.parser.expect_keyword(Keyword::HEADER)?;
926                            self.parser.expect_keyword(Keyword::ROW)?;
927                            return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS (format.has_header true)")?;
928                        }
929                    }
930                    Keyword::DELIMITER => {
931                        return parser_err!("DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS (format.delimiter ',')")?;
932                    }
933                    Keyword::COMPRESSION => {
934                        self.parser.expect_keyword(Keyword::TYPE)?;
935                        return parser_err!("COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS (format.compression gzip)")?;
936                    }
937                    Keyword::PARTITIONED => {
938                        self.parser.expect_keyword(Keyword::BY)?;
939                        ensure_not_set(&builder.table_partition_cols, "PARTITIONED BY")?;
940                        // Expects either list of column names (col_name [, col_name]*)
941                        // or list of column definitions (col_name datatype [, col_name datatype]* )
942                        // use the token after the name to decide which parsing rule to use
943                        // Note that mixing both names and definitions is not allowed
944                        let peeked = self.parser.peek_nth_token(2);
945                        if peeked == Token::Comma || peeked == Token::RParen {
946                            // List of column names
947                            builder.table_partition_cols = Some(self.parse_partitions()?)
948                        } else {
949                            // List of column defs
950                            let (cols, cons) = self.parse_columns()?;
951                            builder.table_partition_cols = Some(
952                                cols.iter().map(|col| col.name.to_string()).collect(),
953                            );
954
955                            columns.extend(cols);
956
957                            if !cons.is_empty() {
958                                return sql_err!(ParserError::ParserError(
959                                    "Constraints on Partition Columns are not supported"
960                                        .to_string(),
961                                ));
962                            }
963                        }
964                    }
965                    Keyword::OPTIONS => {
966                        ensure_not_set(&builder.options, "OPTIONS")?;
967                        builder.options = Some(self.parse_value_options()?);
968                    }
969                    _ => {
970                        unreachable!()
971                    }
972                }
973            } else {
974                let token = self.parser.next_token();
975                if token == Token::EOF || token == Token::SemiColon {
976                    break;
977                } else {
978                    return self.expected("end of statement or ;", token)?;
979                }
980            }
981        }
982
983        // Validations: location and file_type are required
984        if builder.file_type.is_none() {
985            return sql_err!(ParserError::ParserError(
986                "Missing STORED AS clause in CREATE EXTERNAL TABLE statement".into(),
987            ));
988        }
989        if builder.location.is_none() {
990            return sql_err!(ParserError::ParserError(
991                "Missing LOCATION clause in CREATE EXTERNAL TABLE statement".into(),
992            ));
993        }
994
995        let create = CreateExternalTable {
996            name: table_name,
997            columns,
998            file_type: builder.file_type.unwrap(),
999            location: builder.location.unwrap(),
1000            table_partition_cols: builder.table_partition_cols.unwrap_or(vec![]),
1001            order_exprs: builder.order_exprs,
1002            if_not_exists,
1003            temporary,
1004            unbounded,
1005            options: builder.options.unwrap_or(Vec::new()),
1006            constraints,
1007        };
1008        Ok(Statement::CreateExternalTable(create))
1009    }
1010
1011    /// Parses the set of valid formats
1012    fn parse_file_format(&mut self) -> Result<String, DataFusionError> {
1013        let token = self.parser.next_token();
1014        match &token.token {
1015            Token::Word(w) => parse_file_type(&w.value),
1016            _ => self.expected("one of ARROW, PARQUET, NDJSON, or CSV", token),
1017        }
1018    }
1019
1020    /// Parses (key value) style options into a map of String --> [`Value`].
1021    ///
1022    /// This method supports keywords as key names as well as multiple
1023    /// value types such as Numbers as well as Strings.
1024    fn parse_value_options(&mut self) -> Result<Vec<(String, Value)>, DataFusionError> {
1025        let mut options = vec![];
1026        self.parser.expect_token(&Token::LParen)?;
1027
1028        loop {
1029            let key = self.parse_option_key()?;
1030            let value = self.parse_option_value()?;
1031            options.push((key, value));
1032            let comma = self.parser.consume_token(&Token::Comma);
1033            if self.parser.consume_token(&Token::RParen) {
1034                // Allow a trailing comma, even though it's not in standard
1035                break;
1036            } else if !comma {
1037                return self.expected(
1038                    "',' or ')' after option definition",
1039                    self.parser.peek_token(),
1040                );
1041            }
1042        }
1043        Ok(options)
1044    }
1045}
1046
1047#[cfg(test)]
1048mod tests {
1049    use super::*;
1050    use datafusion_common::assert_contains;
1051    use sqlparser::ast::Expr::Identifier;
1052    use sqlparser::ast::{BinaryOperator, DataType, Expr, Ident, ValueWithSpan};
1053    use sqlparser::dialect::SnowflakeDialect;
1054    use sqlparser::tokenizer::Span;
1055
1056    fn expect_parse_ok(sql: &str, expected: Statement) -> Result<(), DataFusionError> {
1057        let statements = DFParser::parse_sql(sql)?;
1058        assert_eq!(
1059            statements.len(),
1060            1,
1061            "Expected to parse exactly one statement"
1062        );
1063        assert_eq!(statements[0], expected, "actual:\n{:#?}", statements[0]);
1064        Ok(())
1065    }
1066
1067    /// Parses sql and asserts that the expected error message was found
1068    fn expect_parse_error(sql: &str, expected_error: &str) {
1069        match DFParser::parse_sql(sql) {
1070            Ok(statements) => {
1071                panic!(
1072                    "Expected parse error for '{sql}', but was successful: {statements:?}"
1073                );
1074            }
1075            Err(e) => {
1076                let error_message = e.to_string();
1077                assert!(
1078                    error_message.contains(expected_error),
1079                    "Expected error '{expected_error}' not found in actual error '{error_message}'"
1080                );
1081            }
1082        }
1083    }
1084
1085    fn make_column_def(name: impl Into<String>, data_type: DataType) -> ColumnDef {
1086        ColumnDef {
1087            name: Ident {
1088                value: name.into(),
1089                quote_style: None,
1090                span: Span::empty(),
1091            },
1092            data_type,
1093            options: vec![],
1094        }
1095    }
1096
1097    #[test]
1098    fn create_external_table() -> Result<(), DataFusionError> {
1099        // positive case
1100        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'";
1101        let display = None;
1102        let name = ObjectName::from(vec![Ident::from("t")]);
1103        let expected = Statement::CreateExternalTable(CreateExternalTable {
1104            name: name.clone(),
1105            columns: vec![make_column_def("c1", DataType::Int(display))],
1106            file_type: "CSV".to_string(),
1107            location: "foo.csv".into(),
1108            table_partition_cols: vec![],
1109            order_exprs: vec![],
1110            if_not_exists: false,
1111            temporary: false,
1112            unbounded: false,
1113            options: vec![],
1114            constraints: vec![],
1115        });
1116        expect_parse_ok(sql, expected)?;
1117
1118        // positive case: leading space
1119        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'     ";
1120        let expected = Statement::CreateExternalTable(CreateExternalTable {
1121            name: name.clone(),
1122            columns: vec![make_column_def("c1", DataType::Int(None))],
1123            file_type: "CSV".to_string(),
1124            location: "foo.csv".into(),
1125            table_partition_cols: vec![],
1126            order_exprs: vec![],
1127            if_not_exists: false,
1128            temporary: false,
1129            unbounded: false,
1130            options: vec![],
1131            constraints: vec![],
1132        });
1133        expect_parse_ok(sql, expected)?;
1134
1135        // positive case: leading space + semicolon
1136        let sql =
1137            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'      ;";
1138        let expected = Statement::CreateExternalTable(CreateExternalTable {
1139            name: name.clone(),
1140            columns: vec![make_column_def("c1", DataType::Int(None))],
1141            file_type: "CSV".to_string(),
1142            location: "foo.csv".into(),
1143            table_partition_cols: vec![],
1144            order_exprs: vec![],
1145            if_not_exists: false,
1146            temporary: false,
1147            unbounded: false,
1148            options: vec![],
1149            constraints: vec![],
1150        });
1151        expect_parse_ok(sql, expected)?;
1152
1153        // positive case with delimiter
1154        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS (format.delimiter '|')";
1155        let display = None;
1156        let expected = Statement::CreateExternalTable(CreateExternalTable {
1157            name: name.clone(),
1158            columns: vec![make_column_def("c1", DataType::Int(display))],
1159            file_type: "CSV".to_string(),
1160            location: "foo.csv".into(),
1161            table_partition_cols: vec![],
1162            order_exprs: vec![],
1163            if_not_exists: false,
1164            temporary: false,
1165            unbounded: false,
1166            options: vec![(
1167                "format.delimiter".into(),
1168                Value::SingleQuotedString("|".into()),
1169            )],
1170            constraints: vec![],
1171        });
1172        expect_parse_ok(sql, expected)?;
1173
1174        // positive case: partitioned by
1175        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1, p2) LOCATION 'foo.csv'";
1176        let display = None;
1177        let expected = Statement::CreateExternalTable(CreateExternalTable {
1178            name: name.clone(),
1179            columns: vec![make_column_def("c1", DataType::Int(display))],
1180            file_type: "CSV".to_string(),
1181            location: "foo.csv".into(),
1182            table_partition_cols: vec!["p1".to_string(), "p2".to_string()],
1183            order_exprs: vec![],
1184            if_not_exists: false,
1185            temporary: false,
1186            unbounded: false,
1187            options: vec![],
1188            constraints: vec![],
1189        });
1190        expect_parse_ok(sql, expected)?;
1191
1192        // positive case: it is ok for sql stmt with `COMPRESSION TYPE GZIP` tokens
1193        let sqls =
1194            vec![
1195             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1196             ('format.compression' 'GZIP')", "GZIP"),
1197             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1198             ('format.compression' 'BZIP2')", "BZIP2"),
1199             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1200             ('format.compression' 'XZ')", "XZ"),
1201             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1202             ('format.compression' 'ZSTD')", "ZSTD"),
1203         ];
1204        for (sql, compression) in sqls {
1205            let expected = Statement::CreateExternalTable(CreateExternalTable {
1206                name: name.clone(),
1207                columns: vec![make_column_def("c1", DataType::Int(display))],
1208                file_type: "CSV".to_string(),
1209                location: "foo.csv".into(),
1210                table_partition_cols: vec![],
1211                order_exprs: vec![],
1212                if_not_exists: false,
1213                temporary: false,
1214                unbounded: false,
1215                options: vec![(
1216                    "format.compression".into(),
1217                    Value::SingleQuotedString(compression.into()),
1218                )],
1219                constraints: vec![],
1220            });
1221            expect_parse_ok(sql, expected)?;
1222        }
1223
1224        // positive case: it is ok for parquet files not to have columns specified
1225        let sql = "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'";
1226        let expected = Statement::CreateExternalTable(CreateExternalTable {
1227            name: name.clone(),
1228            columns: vec![],
1229            file_type: "PARQUET".to_string(),
1230            location: "foo.parquet".into(),
1231            table_partition_cols: vec![],
1232            order_exprs: vec![],
1233            if_not_exists: false,
1234            temporary: false,
1235            unbounded: false,
1236            options: vec![],
1237            constraints: vec![],
1238        });
1239        expect_parse_ok(sql, expected)?;
1240
1241        // positive case: it is ok for parquet files to be other than upper case
1242        let sql = "CREATE EXTERNAL TABLE t STORED AS parqueT LOCATION 'foo.parquet'";
1243        let expected = Statement::CreateExternalTable(CreateExternalTable {
1244            name: name.clone(),
1245            columns: vec![],
1246            file_type: "PARQUET".to_string(),
1247            location: "foo.parquet".into(),
1248            table_partition_cols: vec![],
1249            order_exprs: vec![],
1250            if_not_exists: false,
1251            temporary: false,
1252            unbounded: false,
1253            options: vec![],
1254            constraints: vec![],
1255        });
1256        expect_parse_ok(sql, expected)?;
1257
1258        // positive case: it is ok for avro files not to have columns specified
1259        let sql = "CREATE EXTERNAL TABLE t STORED AS AVRO LOCATION 'foo.avro'";
1260        let expected = Statement::CreateExternalTable(CreateExternalTable {
1261            name: name.clone(),
1262            columns: vec![],
1263            file_type: "AVRO".to_string(),
1264            location: "foo.avro".into(),
1265            table_partition_cols: vec![],
1266            order_exprs: vec![],
1267            if_not_exists: false,
1268            temporary: false,
1269            unbounded: false,
1270            options: vec![],
1271            constraints: vec![],
1272        });
1273        expect_parse_ok(sql, expected)?;
1274
1275        // positive case: it is ok for avro files not to have columns specified
1276        let sql =
1277            "CREATE EXTERNAL TABLE IF NOT EXISTS t STORED AS PARQUET LOCATION 'foo.parquet'";
1278        let expected = Statement::CreateExternalTable(CreateExternalTable {
1279            name: name.clone(),
1280            columns: vec![],
1281            file_type: "PARQUET".to_string(),
1282            location: "foo.parquet".into(),
1283            table_partition_cols: vec![],
1284            order_exprs: vec![],
1285            if_not_exists: true,
1286            temporary: false,
1287            unbounded: false,
1288            options: vec![],
1289            constraints: vec![],
1290        });
1291        expect_parse_ok(sql, expected)?;
1292
1293        // positive case: column definition allowed in 'partition by' clause
1294        let sql =
1295            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int) LOCATION 'foo.csv'";
1296        let expected = Statement::CreateExternalTable(CreateExternalTable {
1297            name: name.clone(),
1298            columns: vec![
1299                make_column_def("c1", DataType::Int(None)),
1300                make_column_def("p1", DataType::Int(None)),
1301            ],
1302            file_type: "CSV".to_string(),
1303            location: "foo.csv".into(),
1304            table_partition_cols: vec!["p1".to_string()],
1305            order_exprs: vec![],
1306            if_not_exists: false,
1307            temporary: false,
1308            unbounded: false,
1309            options: vec![],
1310            constraints: vec![],
1311        });
1312        expect_parse_ok(sql, expected)?;
1313
1314        // negative case: mixed column defs and column names in `PARTITIONED BY` clause
1315        let sql =
1316            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int, c1) LOCATION 'foo.csv'";
1317        expect_parse_error(
1318            sql,
1319            "SQL error: ParserError(\"Expected: a data type name, found: ) at Line: 1, Column: 73\")",
1320        );
1321
1322        // negative case: mixed column defs and column names in `PARTITIONED BY` clause
1323        let sql =
1324            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (c1, p1 int) LOCATION 'foo.csv'";
1325        expect_parse_error(sql, "SQL error: ParserError(\"Expected: ',' or ')' after partition definition, found: int at Line: 1, Column: 70\")");
1326
1327        // positive case: additional options (one entry) can be specified
1328        let sql =
1329            "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1') LOCATION 'blahblah'";
1330        let expected = Statement::CreateExternalTable(CreateExternalTable {
1331            name: name.clone(),
1332            columns: vec![],
1333            file_type: "X".to_string(),
1334            location: "blahblah".into(),
1335            table_partition_cols: vec![],
1336            order_exprs: vec![],
1337            if_not_exists: false,
1338            temporary: false,
1339            unbounded: false,
1340            options: vec![("k1".into(), Value::SingleQuotedString("v1".into()))],
1341            constraints: vec![],
1342        });
1343        expect_parse_ok(sql, expected)?;
1344
1345        // positive case: additional options (multiple entries) can be specified
1346        let sql =
1347            "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2) LOCATION 'blahblah'";
1348        let expected = Statement::CreateExternalTable(CreateExternalTable {
1349            name: name.clone(),
1350            columns: vec![],
1351            file_type: "X".to_string(),
1352            location: "blahblah".into(),
1353            table_partition_cols: vec![],
1354            order_exprs: vec![],
1355            if_not_exists: false,
1356            temporary: false,
1357            unbounded: false,
1358            options: vec![
1359                ("k1".into(), Value::SingleQuotedString("v1".into())),
1360                ("k2".into(), Value::SingleQuotedString("v2".into())),
1361            ],
1362            constraints: vec![],
1363        });
1364        expect_parse_ok(sql, expected)?;
1365
1366        // Ordered Col
1367        let sqls = ["CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1) LOCATION 'foo.csv'",
1368                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS FIRST) LOCATION 'foo.csv'",
1369                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS LAST) LOCATION 'foo.csv'",
1370                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC) LOCATION 'foo.csv'",
1371                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC) LOCATION 'foo.csv'",
1372                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS FIRST) LOCATION 'foo.csv'",
1373                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS LAST) LOCATION 'foo.csv'",
1374                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS FIRST) LOCATION 'foo.csv'",
1375                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS LAST) LOCATION 'foo.csv'"];
1376        let expected = vec![
1377            (None, None),
1378            (None, Some(true)),
1379            (None, Some(false)),
1380            (Some(true), None),
1381            (Some(false), None),
1382            (Some(false), Some(true)),
1383            (Some(false), Some(false)),
1384            (Some(true), Some(true)),
1385            (Some(true), Some(false)),
1386        ];
1387        for (sql, (asc, nulls_first)) in sqls.iter().zip(expected.into_iter()) {
1388            let expected = Statement::CreateExternalTable(CreateExternalTable {
1389                name: name.clone(),
1390                columns: vec![make_column_def("c1", DataType::Int(None))],
1391                file_type: "CSV".to_string(),
1392                location: "foo.csv".into(),
1393                table_partition_cols: vec![],
1394                order_exprs: vec![vec![OrderByExpr {
1395                    expr: Identifier(Ident {
1396                        value: "c1".to_owned(),
1397                        quote_style: None,
1398                        span: Span::empty(),
1399                    }),
1400                    options: OrderByOptions { asc, nulls_first },
1401                    with_fill: None,
1402                }]],
1403                if_not_exists: false,
1404                temporary: false,
1405                unbounded: false,
1406                options: vec![],
1407                constraints: vec![],
1408            });
1409            expect_parse_ok(sql, expected)?;
1410        }
1411
1412        // Ordered Col
1413        let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 ASC, c2 DESC NULLS FIRST) LOCATION 'foo.csv'";
1414        let display = None;
1415        let expected = Statement::CreateExternalTable(CreateExternalTable {
1416            name: name.clone(),
1417            columns: vec![
1418                make_column_def("c1", DataType::Int(display)),
1419                make_column_def("c2", DataType::Int(display)),
1420            ],
1421            file_type: "CSV".to_string(),
1422            location: "foo.csv".into(),
1423            table_partition_cols: vec![],
1424            order_exprs: vec![vec![
1425                OrderByExpr {
1426                    expr: Identifier(Ident {
1427                        value: "c1".to_owned(),
1428                        quote_style: None,
1429                        span: Span::empty(),
1430                    }),
1431                    options: OrderByOptions {
1432                        asc: Some(true),
1433                        nulls_first: None,
1434                    },
1435                    with_fill: None,
1436                },
1437                OrderByExpr {
1438                    expr: Identifier(Ident {
1439                        value: "c2".to_owned(),
1440                        quote_style: None,
1441                        span: Span::empty(),
1442                    }),
1443                    options: OrderByOptions {
1444                        asc: Some(false),
1445                        nulls_first: Some(true),
1446                    },
1447                    with_fill: None,
1448                },
1449            ]],
1450            if_not_exists: false,
1451            temporary: false,
1452            unbounded: false,
1453            options: vec![],
1454            constraints: vec![],
1455        });
1456        expect_parse_ok(sql, expected)?;
1457
1458        // Ordered Binary op
1459        let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 - c2 ASC) LOCATION 'foo.csv'";
1460        let display = None;
1461        let expected = Statement::CreateExternalTable(CreateExternalTable {
1462            name: name.clone(),
1463            columns: vec![
1464                make_column_def("c1", DataType::Int(display)),
1465                make_column_def("c2", DataType::Int(display)),
1466            ],
1467            file_type: "CSV".to_string(),
1468            location: "foo.csv".into(),
1469            table_partition_cols: vec![],
1470            order_exprs: vec![vec![OrderByExpr {
1471                expr: Expr::BinaryOp {
1472                    left: Box::new(Identifier(Ident {
1473                        value: "c1".to_owned(),
1474                        quote_style: None,
1475                        span: Span::empty(),
1476                    })),
1477                    op: BinaryOperator::Minus,
1478                    right: Box::new(Identifier(Ident {
1479                        value: "c2".to_owned(),
1480                        quote_style: None,
1481                        span: Span::empty(),
1482                    })),
1483                },
1484                options: OrderByOptions {
1485                    asc: Some(true),
1486                    nulls_first: None,
1487                },
1488                with_fill: None,
1489            }]],
1490            if_not_exists: false,
1491            temporary: false,
1492            unbounded: false,
1493            options: vec![],
1494            constraints: vec![],
1495        });
1496        expect_parse_ok(sql, expected)?;
1497
1498        // Most complete CREATE EXTERNAL TABLE statement possible
1499        let sql = "
1500            CREATE UNBOUNDED EXTERNAL TABLE IF NOT EXISTS t (c1 int, c2 float)
1501            STORED AS PARQUET
1502            WITH ORDER (c1 - c2 ASC)
1503            PARTITIONED BY (c1)
1504            LOCATION 'foo.parquet'
1505            OPTIONS ('format.compression' 'zstd',
1506                     'format.delimiter' '*',
1507                     'ROW_GROUP_SIZE' '1024',
1508                     'TRUNCATE' 'NO',
1509                     'format.has_header' 'true')";
1510        let expected = Statement::CreateExternalTable(CreateExternalTable {
1511            name: name.clone(),
1512            columns: vec![
1513                make_column_def("c1", DataType::Int(None)),
1514                make_column_def("c2", DataType::Float(None)),
1515            ],
1516            file_type: "PARQUET".to_string(),
1517            location: "foo.parquet".into(),
1518            table_partition_cols: vec!["c1".into()],
1519            order_exprs: vec![vec![OrderByExpr {
1520                expr: Expr::BinaryOp {
1521                    left: Box::new(Identifier(Ident {
1522                        value: "c1".to_owned(),
1523                        quote_style: None,
1524                        span: Span::empty(),
1525                    })),
1526                    op: BinaryOperator::Minus,
1527                    right: Box::new(Identifier(Ident {
1528                        value: "c2".to_owned(),
1529                        quote_style: None,
1530                        span: Span::empty(),
1531                    })),
1532                },
1533                options: OrderByOptions {
1534                    asc: Some(true),
1535                    nulls_first: None,
1536                },
1537                with_fill: None,
1538            }]],
1539            if_not_exists: true,
1540            temporary: false,
1541            unbounded: true,
1542            options: vec![
1543                (
1544                    "format.compression".into(),
1545                    Value::SingleQuotedString("zstd".into()),
1546                ),
1547                (
1548                    "format.delimiter".into(),
1549                    Value::SingleQuotedString("*".into()),
1550                ),
1551                (
1552                    "ROW_GROUP_SIZE".into(),
1553                    Value::SingleQuotedString("1024".into()),
1554                ),
1555                ("TRUNCATE".into(), Value::SingleQuotedString("NO".into())),
1556                (
1557                    "format.has_header".into(),
1558                    Value::SingleQuotedString("true".into()),
1559                ),
1560            ],
1561            constraints: vec![],
1562        });
1563        expect_parse_ok(sql, expected)?;
1564
1565        // For error cases, see: `create_external_table.slt`
1566
1567        Ok(())
1568    }
1569
1570    #[test]
1571    fn copy_to_table_to_table() -> Result<(), DataFusionError> {
1572        // positive case
1573        let sql = "COPY foo TO bar STORED AS CSV";
1574        let expected = Statement::CopyTo(CopyToStatement {
1575            source: object_name("foo"),
1576            target: "bar".to_string(),
1577            partitioned_by: vec![],
1578            stored_as: Some("CSV".to_owned()),
1579            options: vec![],
1580        });
1581
1582        assert_eq!(verified_stmt(sql), expected);
1583        Ok(())
1584    }
1585
1586    #[test]
1587    fn skip_copy_into_snowflake() -> Result<(), DataFusionError> {
1588        let sql = "COPY INTO foo FROM @~/staged FILE_FORMAT = (FORMAT_NAME = 'mycsv');";
1589        let dialect = Box::new(SnowflakeDialect);
1590        let statements = DFParser::parse_sql_with_dialect(sql, dialect.as_ref())?;
1591
1592        assert_eq!(
1593            statements.len(),
1594            1,
1595            "Expected to parse exactly one statement"
1596        );
1597        if let Statement::CopyTo(_) = &statements[0] {
1598            panic!("Expected non COPY TO statement, but was successful: {statements:?}");
1599        }
1600        Ok(())
1601    }
1602
1603    #[test]
1604    fn explain_copy_to_table_to_table() -> Result<(), DataFusionError> {
1605        let cases = vec![
1606            ("EXPLAIN COPY foo TO bar STORED AS PARQUET", false, false),
1607            (
1608                "EXPLAIN ANALYZE COPY foo TO bar STORED AS PARQUET",
1609                true,
1610                false,
1611            ),
1612            (
1613                "EXPLAIN VERBOSE COPY foo TO bar STORED AS PARQUET",
1614                false,
1615                true,
1616            ),
1617            (
1618                "EXPLAIN ANALYZE VERBOSE COPY foo TO bar STORED AS PARQUET",
1619                true,
1620                true,
1621            ),
1622        ];
1623        for (sql, analyze, verbose) in cases {
1624            println!("sql: {sql}, analyze: {analyze}, verbose: {verbose}");
1625
1626            let expected_copy = Statement::CopyTo(CopyToStatement {
1627                source: object_name("foo"),
1628                target: "bar".to_string(),
1629                partitioned_by: vec![],
1630                stored_as: Some("PARQUET".to_owned()),
1631                options: vec![],
1632            });
1633            let expected = Statement::Explain(ExplainStatement {
1634                analyze,
1635                verbose,
1636                format: None,
1637                statement: Box::new(expected_copy),
1638            });
1639            assert_eq!(verified_stmt(sql), expected);
1640        }
1641        Ok(())
1642    }
1643
1644    #[test]
1645    fn copy_to_query_to_table() -> Result<(), DataFusionError> {
1646        let statement = verified_stmt("SELECT 1");
1647
1648        // unwrap the various layers
1649        let statement = if let Statement::Statement(statement) = statement {
1650            *statement
1651        } else {
1652            panic!("Expected statement, got {statement:?}");
1653        };
1654
1655        let query = if let SQLStatement::Query(query) = statement {
1656            query
1657        } else {
1658            panic!("Expected query, got {statement:?}");
1659        };
1660
1661        let sql =
1662            "COPY (SELECT 1) TO bar STORED AS CSV OPTIONS ('format.has_header' 'true')";
1663        let expected = Statement::CopyTo(CopyToStatement {
1664            source: CopyToSource::Query(query),
1665            target: "bar".to_string(),
1666            partitioned_by: vec![],
1667            stored_as: Some("CSV".to_owned()),
1668            options: vec![(
1669                "format.has_header".into(),
1670                Value::SingleQuotedString("true".into()),
1671            )],
1672        });
1673        assert_eq!(verified_stmt(sql), expected);
1674        Ok(())
1675    }
1676
1677    #[test]
1678    fn copy_to_options() -> Result<(), DataFusionError> {
1679        let sql = "COPY foo TO bar STORED AS CSV OPTIONS ('row_group_size' '55')";
1680        let expected = Statement::CopyTo(CopyToStatement {
1681            source: object_name("foo"),
1682            target: "bar".to_string(),
1683            partitioned_by: vec![],
1684            stored_as: Some("CSV".to_owned()),
1685            options: vec![(
1686                "row_group_size".to_string(),
1687                Value::SingleQuotedString("55".to_string()),
1688            )],
1689        });
1690        assert_eq!(verified_stmt(sql), expected);
1691        Ok(())
1692    }
1693
1694    #[test]
1695    fn copy_to_partitioned_by() -> Result<(), DataFusionError> {
1696        let sql = "COPY foo TO bar STORED AS CSV PARTITIONED BY (a) OPTIONS ('row_group_size' '55')";
1697        let expected = Statement::CopyTo(CopyToStatement {
1698            source: object_name("foo"),
1699            target: "bar".to_string(),
1700            partitioned_by: vec!["a".to_string()],
1701            stored_as: Some("CSV".to_owned()),
1702            options: vec![(
1703                "row_group_size".to_string(),
1704                Value::SingleQuotedString("55".to_string()),
1705            )],
1706        });
1707        assert_eq!(verified_stmt(sql), expected);
1708        Ok(())
1709    }
1710
1711    #[test]
1712    fn copy_to_multi_options() -> Result<(), DataFusionError> {
1713        // order of options is preserved
1714        let sql =
1715            "COPY foo TO bar STORED AS parquet OPTIONS ('format.row_group_size' 55, 'format.compression' snappy, 'execution.keep_partition_by_columns' true)";
1716
1717        let expected_options = vec![
1718            (
1719                "format.row_group_size".to_string(),
1720                Value::Number("55".to_string(), false),
1721            ),
1722            (
1723                "format.compression".to_string(),
1724                Value::SingleQuotedString("snappy".to_string()),
1725            ),
1726            (
1727                "execution.keep_partition_by_columns".to_string(),
1728                Value::SingleQuotedString("true".to_string()),
1729            ),
1730        ];
1731
1732        let mut statements = DFParser::parse_sql(sql).unwrap();
1733        assert_eq!(statements.len(), 1);
1734        let only_statement = statements.pop_front().unwrap();
1735
1736        let options = if let Statement::CopyTo(copy_to) = only_statement {
1737            copy_to.options
1738        } else {
1739            panic!("Expected copy");
1740        };
1741
1742        assert_eq!(options, expected_options);
1743
1744        Ok(())
1745    }
1746
1747    // For error cases, see: `copy.slt`
1748
1749    fn object_name(name: &str) -> CopyToSource {
1750        CopyToSource::Relation(ObjectName::from(vec![Ident::new(name)]))
1751    }
1752
1753    // Based on  sqlparser-rs
1754    // https://github.com/sqlparser-rs/sqlparser-rs/blob/ae3b5844c839072c235965fe0d1bddc473dced87/src/test_utils.rs#L104-L116
1755
1756    /// Ensures that `sql` parses as a single [Statement]
1757    ///
1758    /// If `canonical` is non empty,this function additionally asserts
1759    /// that:
1760    ///
1761    /// 1. parsing `sql` results in the same [`Statement`] as parsing
1762    ///    `canonical`.
1763    ///
1764    /// 2. re-serializing the result of parsing `sql` produces the same
1765    ///    `canonical` sql string
1766    fn one_statement_parses_to(sql: &str, canonical: &str) -> Statement {
1767        let mut statements = DFParser::parse_sql(sql).unwrap();
1768        assert_eq!(statements.len(), 1);
1769
1770        if sql != canonical {
1771            assert_eq!(DFParser::parse_sql(canonical).unwrap(), statements);
1772        }
1773
1774        let only_statement = statements.pop_front().unwrap();
1775        assert_eq!(
1776            canonical.to_uppercase(),
1777            only_statement.to_string().to_uppercase()
1778        );
1779        only_statement
1780    }
1781
1782    /// Ensures that `sql` parses as a single [Statement], and that
1783    /// re-serializing the parse result produces the same `sql`
1784    /// string (is not modified after a serialization round-trip).
1785    fn verified_stmt(sql: &str) -> Statement {
1786        one_statement_parses_to(sql, sql)
1787    }
1788
1789    #[test]
1790    /// Checks the recursion limit works for sql queries
1791    /// Recursion can happen easily with binary exprs (i.e, AND or OR)
1792    fn test_recursion_limit() {
1793        let sql = "SELECT 1 OR 2";
1794
1795        // Expect parse to succeed
1796        DFParserBuilder::new(sql)
1797            .build()
1798            .unwrap()
1799            .parse_statements()
1800            .unwrap();
1801
1802        let err = DFParserBuilder::new(sql)
1803            .with_recursion_limit(1)
1804            .build()
1805            .unwrap()
1806            .parse_statements()
1807            .unwrap_err();
1808
1809        assert_contains!(
1810            err.to_string(),
1811            "SQL error: RecursionLimitExceeded (current limit: 1)"
1812        );
1813    }
1814
1815    fn expect_parse_expr_ok(sql: &str, expected: ExprWithAlias) {
1816        let expr = DFParser::parse_sql_into_expr(sql).unwrap();
1817        assert_eq!(expr, expected, "actual:\n{expr:#?}");
1818    }
1819
1820    /// Parses sql and asserts that the expected error message was found
1821    fn expect_parse_expr_error(sql: &str, expected_error: &str) {
1822        match DFParser::parse_sql_into_expr(sql) {
1823            Ok(expr) => {
1824                panic!("Expected parse error for '{sql}', but was successful: {expr:#?}");
1825            }
1826            Err(e) => {
1827                let error_message = e.to_string();
1828                assert!(
1829                    error_message.contains(expected_error),
1830                    "Expected error '{expected_error}' not found in actual error '{error_message}'"
1831                );
1832            }
1833        }
1834    }
1835
1836    #[test]
1837    fn literal() {
1838        expect_parse_expr_ok(
1839            "1234",
1840            ExprWithAlias {
1841                expr: Expr::Value(ValueWithSpan::from(Value::Number(
1842                    "1234".to_string(),
1843                    false,
1844                ))),
1845                alias: None,
1846            },
1847        )
1848    }
1849
1850    #[test]
1851    fn literal_with_alias() {
1852        expect_parse_expr_ok(
1853            "1234 as foo",
1854            ExprWithAlias {
1855                expr: Expr::Value(ValueWithSpan::from(Value::Number(
1856                    "1234".to_string(),
1857                    false,
1858                ))),
1859                alias: Some(Ident::from("foo")),
1860            },
1861        )
1862    }
1863
1864    #[test]
1865    fn literal_with_alias_and_trailing_tokens() {
1866        expect_parse_expr_error(
1867            "1234 as foo.bar",
1868            "Expected: end of expression, found: .",
1869        )
1870    }
1871
1872    #[test]
1873    fn literal_with_alias_and_trailing_whitespace() {
1874        expect_parse_expr_ok(
1875            "1234 as foo   ",
1876            ExprWithAlias {
1877                expr: Expr::Value(ValueWithSpan::from(Value::Number(
1878                    "1234".to_string(),
1879                    false,
1880                ))),
1881                alias: Some(Ident::from("foo")),
1882            },
1883        )
1884    }
1885
1886    #[test]
1887    fn literal_with_alias_and_trailing_whitespace_and_token() {
1888        expect_parse_expr_error(
1889            "1234 as foo    bar",
1890            "Expected: end of expression, found: bar",
1891        )
1892    }
1893}