datafusion_sql/
parser.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`DFParser`]: DataFusion SQL Parser based on [`sqlparser`]
19//!
20//! This parser implements DataFusion specific statements such as
21//! `CREATE EXTERNAL TABLE`
22
23use std::collections::VecDeque;
24use std::fmt;
25
26use sqlparser::ast::{ExprWithAlias, OrderByOptions};
27use sqlparser::tokenizer::TokenWithSpan;
28use sqlparser::{
29    ast::{
30        ColumnDef, ColumnOptionDef, ObjectName, OrderByExpr, Query,
31        Statement as SQLStatement, TableConstraint, Value,
32    },
33    dialect::{keywords::Keyword, Dialect, GenericDialect},
34    parser::{Parser, ParserError},
35    tokenizer::{Token, Tokenizer, Word},
36};
37
38// Use `Parser::expected` instead, if possible
39macro_rules! parser_err {
40    ($MSG:expr) => {
41        Err(ParserError::ParserError($MSG.to_string()))
42    };
43}
44
45fn parse_file_type(s: &str) -> Result<String, ParserError> {
46    Ok(s.to_uppercase())
47}
48
49/// DataFusion specific `EXPLAIN`
50///
51/// Syntax:
52/// ```sql
53/// EXPLAIN <ANALYZE> <VERBOSE> [FORMAT format] statement
54///```
55#[derive(Debug, Clone, PartialEq, Eq)]
56pub struct ExplainStatement {
57    /// `EXPLAIN ANALYZE ..`
58    pub analyze: bool,
59    /// `EXPLAIN .. VERBOSE ..`
60    pub verbose: bool,
61    /// `EXPLAIN .. FORMAT `
62    pub format: Option<String>,
63    /// The statement to analyze. Note this is a DataFusion [`Statement`] (not a
64    /// [`sqlparser::ast::Statement`] so that we can use `EXPLAIN`, `COPY`, and other
65    /// DataFusion specific statements
66    pub statement: Box<Statement>,
67}
68
69impl fmt::Display for ExplainStatement {
70    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
71        let Self {
72            analyze,
73            verbose,
74            format,
75            statement,
76        } = self;
77
78        write!(f, "EXPLAIN ")?;
79        if *analyze {
80            write!(f, "ANALYZE ")?;
81        }
82        if *verbose {
83            write!(f, "VERBOSE ")?;
84        }
85        if let Some(format) = format.as_ref() {
86            write!(f, "FORMAT {format} ")?;
87        }
88
89        write!(f, "{statement}")
90    }
91}
92
93/// DataFusion extension DDL for `COPY`
94///
95/// # Syntax:
96///
97/// ```text
98/// COPY <table_name | (<query>)>
99/// TO
100/// <destination_url>
101/// (key_value_list)
102/// ```
103///
104/// # Examples
105///
106/// ```sql
107/// COPY lineitem  TO 'lineitem'
108/// STORED AS PARQUET (
109///   partitions 16,
110///   row_group_limit_rows 100000,
111///   row_group_limit_bytes 200000
112/// )
113///
114/// COPY (SELECT l_orderkey from lineitem) to 'lineitem.parquet';
115/// ```
116#[derive(Debug, Clone, PartialEq, Eq)]
117pub struct CopyToStatement {
118    /// From where the data comes from
119    pub source: CopyToSource,
120    /// The URL to where the data is heading
121    pub target: String,
122    /// Partition keys
123    pub partitioned_by: Vec<String>,
124    /// File type (Parquet, NDJSON, CSV etc.)
125    pub stored_as: Option<String>,
126    /// Target specific options
127    pub options: Vec<(String, Value)>,
128}
129
130impl fmt::Display for CopyToStatement {
131    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
132        let Self {
133            source,
134            target,
135            partitioned_by,
136            stored_as,
137            options,
138            ..
139        } = self;
140
141        write!(f, "COPY {source} TO {target}")?;
142        if let Some(file_type) = stored_as {
143            write!(f, " STORED AS {}", file_type)?;
144        }
145        if !partitioned_by.is_empty() {
146            write!(f, " PARTITIONED BY ({})", partitioned_by.join(", "))?;
147        }
148
149        if !options.is_empty() {
150            let opts: Vec<_> =
151                options.iter().map(|(k, v)| format!("'{k}' {v}")).collect();
152            write!(f, " OPTIONS ({})", opts.join(", "))?;
153        }
154
155        Ok(())
156    }
157}
158
159#[derive(Debug, Clone, PartialEq, Eq)]
160pub enum CopyToSource {
161    /// `COPY <table> TO ...`
162    Relation(ObjectName),
163    /// COPY (...query...) TO ...
164    Query(Box<Query>),
165}
166
167impl fmt::Display for CopyToSource {
168    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
169        match self {
170            CopyToSource::Relation(r) => write!(f, "{r}"),
171            CopyToSource::Query(q) => write!(f, "({q})"),
172        }
173    }
174}
175
176/// This type defines a lexicographical ordering.
177pub(crate) type LexOrdering = Vec<OrderByExpr>;
178
179/// DataFusion extension DDL for `CREATE EXTERNAL TABLE`
180///
181/// Syntax:
182///
183/// ```text
184/// CREATE EXTERNAL TABLE
185/// [ IF NOT EXISTS ]
186/// <TABLE_NAME>[ (<column_definition>) ]
187/// STORED AS <file_type>
188/// [ PARTITIONED BY (<column_definition list> | <column list>) ]
189/// [ WITH ORDER (<ordered column list>)
190/// [ OPTIONS (<key_value_list>) ]
191/// LOCATION <literal>
192///
193/// <column_definition> := (<column_name> <data_type>, ...)
194///
195/// <column_list> := (<column_name>, ...)
196///
197/// <ordered_column_list> := (<column_name> <sort_clause>, ...)
198///
199/// <key_value_list> := (<literal> <literal, <literal> <literal>, ...)
200/// ```
201#[derive(Debug, Clone, PartialEq, Eq)]
202pub struct CreateExternalTable {
203    /// Table name
204    pub name: ObjectName,
205    /// Optional schema
206    pub columns: Vec<ColumnDef>,
207    /// File type (Parquet, NDJSON, CSV, etc)
208    pub file_type: String,
209    /// Path to file
210    pub location: String,
211    /// Partition Columns
212    pub table_partition_cols: Vec<String>,
213    /// Ordered expressions
214    pub order_exprs: Vec<LexOrdering>,
215    /// Option to not error if table already exists
216    pub if_not_exists: bool,
217    /// Whether the table is a temporary table
218    pub temporary: bool,
219    /// Infinite streams?
220    pub unbounded: bool,
221    /// Table(provider) specific options
222    pub options: Vec<(String, Value)>,
223    /// A table-level constraint
224    pub constraints: Vec<TableConstraint>,
225}
226
227impl fmt::Display for CreateExternalTable {
228    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
229        write!(f, "CREATE EXTERNAL TABLE ")?;
230        if self.if_not_exists {
231            write!(f, "IF NOT EXISTS ")?;
232        }
233        write!(f, "{} ", self.name)?;
234        write!(f, "STORED AS {} ", self.file_type)?;
235        write!(f, "LOCATION {} ", self.location)
236    }
237}
238
239/// DataFusion SQL Statement.
240///
241/// This can either be a [`Statement`] from [`sqlparser`] from a
242/// standard SQL dialect, or a DataFusion extension such as `CREATE
243/// EXTERNAL TABLE`. See [`DFParser`] for more information.
244///
245/// [`Statement`]: sqlparser::ast::Statement
246#[derive(Debug, Clone, PartialEq, Eq)]
247pub enum Statement {
248    /// ANSI SQL AST node (from sqlparser-rs)
249    Statement(Box<SQLStatement>),
250    /// Extension: `CREATE EXTERNAL TABLE`
251    CreateExternalTable(CreateExternalTable),
252    /// Extension: `COPY TO`
253    CopyTo(CopyToStatement),
254    /// EXPLAIN for extensions
255    Explain(ExplainStatement),
256}
257
258impl fmt::Display for Statement {
259    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
260        match self {
261            Statement::Statement(stmt) => write!(f, "{stmt}"),
262            Statement::CreateExternalTable(stmt) => write!(f, "{stmt}"),
263            Statement::CopyTo(stmt) => write!(f, "{stmt}"),
264            Statement::Explain(stmt) => write!(f, "{stmt}"),
265        }
266    }
267}
268
269fn ensure_not_set<T>(field: &Option<T>, name: &str) -> Result<(), ParserError> {
270    if field.is_some() {
271        return Err(ParserError::ParserError(format!(
272            "{name} specified more than once",
273        )));
274    }
275    Ok(())
276}
277
278/// DataFusion SQL Parser based on [`sqlparser`]
279///
280/// Parses DataFusion's SQL dialect, often delegating to [`sqlparser`]'s [`Parser`].
281///
282/// DataFusion mostly follows existing SQL dialects via
283/// `sqlparser`. However, certain statements such as `COPY` and
284/// `CREATE EXTERNAL TABLE` have special syntax in DataFusion. See
285/// [`Statement`] for a list of this special syntax
286pub struct DFParser<'a> {
287    pub parser: Parser<'a>,
288}
289
290/// Same as `sqlparser`
291const DEFAULT_RECURSION_LIMIT: usize = 50;
292const DEFAULT_DIALECT: GenericDialect = GenericDialect {};
293
294/// Builder for [`DFParser`]
295///
296/// # Example: Create and Parse SQL statements
297/// ```
298/// # use datafusion_sql::parser::DFParserBuilder;
299/// # use datafusion_common::Result;
300/// # fn test() -> Result<()> {
301/// let mut parser = DFParserBuilder::new("SELECT * FROM foo; SELECT 1 + 2")
302///   .build()?;
303/// // parse the SQL into DFStatements
304/// let statements = parser.parse_statements()?;
305/// assert_eq!(statements.len(), 2);
306/// # Ok(())
307/// # }
308/// ```
309///
310/// # Example: Create and Parse expression with a different dialect
311/// ```
312/// # use datafusion_sql::parser::DFParserBuilder;
313/// # use datafusion_common::Result;
314/// # use datafusion_sql::sqlparser::dialect::MySqlDialect;
315/// # use datafusion_sql::sqlparser::ast::Expr;
316/// # fn test() -> Result<()> {
317/// let dialect = MySqlDialect{}; // Parse using MySQL dialect
318/// let mut parser = DFParserBuilder::new("1 + 2")
319///   .with_dialect(&dialect)
320///   .build()?;
321/// // parse 1+2 into an sqlparser::ast::Expr
322/// let res = parser.parse_expr()?;
323/// assert!(matches!(res.expr, Expr::BinaryOp {..}));
324/// # Ok(())
325/// # }
326/// ```
327pub struct DFParserBuilder<'a> {
328    /// The SQL string to parse
329    sql: &'a str,
330    /// The Dialect to use (defaults to [`GenericDialect`]
331    dialect: &'a dyn Dialect,
332    /// The recursion limit while parsing
333    recursion_limit: usize,
334}
335
336impl<'a> DFParserBuilder<'a> {
337    /// Create a new parser builder for the specified tokens using the
338    /// [`GenericDialect`].
339    pub fn new(sql: &'a str) -> Self {
340        Self {
341            sql,
342            dialect: &DEFAULT_DIALECT,
343            recursion_limit: DEFAULT_RECURSION_LIMIT,
344        }
345    }
346
347    /// Adjust the parser builder's dialect. Defaults to [`GenericDialect`]
348    pub fn with_dialect(mut self, dialect: &'a dyn Dialect) -> Self {
349        self.dialect = dialect;
350        self
351    }
352
353    /// Adjust the recursion limit of sql parsing.  Defaults to 50
354    pub fn with_recursion_limit(mut self, recursion_limit: usize) -> Self {
355        self.recursion_limit = recursion_limit;
356        self
357    }
358
359    pub fn build(self) -> Result<DFParser<'a>, ParserError> {
360        let mut tokenizer = Tokenizer::new(self.dialect, self.sql);
361        let tokens = tokenizer.tokenize_with_location()?;
362
363        Ok(DFParser {
364            parser: Parser::new(self.dialect)
365                .with_tokens_with_locations(tokens)
366                .with_recursion_limit(self.recursion_limit),
367        })
368    }
369}
370
371impl<'a> DFParser<'a> {
372    #[deprecated(since = "46.0.0", note = "DFParserBuilder")]
373    pub fn new(sql: &'a str) -> Result<Self, ParserError> {
374        DFParserBuilder::new(sql).build()
375    }
376
377    #[deprecated(since = "46.0.0", note = "DFParserBuilder")]
378    pub fn new_with_dialect(
379        sql: &'a str,
380        dialect: &'a dyn Dialect,
381    ) -> Result<Self, ParserError> {
382        DFParserBuilder::new(sql).with_dialect(dialect).build()
383    }
384
385    /// Parse a sql string into one or [`Statement`]s using the
386    /// [`GenericDialect`].
387    pub fn parse_sql(sql: &'a str) -> Result<VecDeque<Statement>, ParserError> {
388        let mut parser = DFParserBuilder::new(sql).build()?;
389
390        parser.parse_statements()
391    }
392
393    /// Parse a SQL string and produce one or more [`Statement`]s with
394    /// with the specified dialect.
395    pub fn parse_sql_with_dialect(
396        sql: &str,
397        dialect: &dyn Dialect,
398    ) -> Result<VecDeque<Statement>, ParserError> {
399        let mut parser = DFParserBuilder::new(sql).with_dialect(dialect).build()?;
400        parser.parse_statements()
401    }
402
403    pub fn parse_sql_into_expr_with_dialect(
404        sql: &str,
405        dialect: &dyn Dialect,
406    ) -> Result<ExprWithAlias, ParserError> {
407        let mut parser = DFParserBuilder::new(sql).with_dialect(dialect).build()?;
408
409        parser.parse_expr()
410    }
411
412    /// Parse a sql string into one or [`Statement`]s
413    pub fn parse_statements(&mut self) -> Result<VecDeque<Statement>, ParserError> {
414        let mut stmts = VecDeque::new();
415        let mut expecting_statement_delimiter = false;
416        loop {
417            // ignore empty statements (between successive statement delimiters)
418            while self.parser.consume_token(&Token::SemiColon) {
419                expecting_statement_delimiter = false;
420            }
421
422            if self.parser.peek_token() == Token::EOF {
423                break;
424            }
425            if expecting_statement_delimiter {
426                return self.expected("end of statement", self.parser.peek_token());
427            }
428
429            let statement = self.parse_statement()?;
430            stmts.push_back(statement);
431            expecting_statement_delimiter = true;
432        }
433        Ok(stmts)
434    }
435
436    /// Report an unexpected token
437    fn expected<T>(
438        &self,
439        expected: &str,
440        found: TokenWithSpan,
441    ) -> Result<T, ParserError> {
442        parser_err!(format!("Expected {expected}, found: {found}"))
443    }
444
445    /// Parse a new expression
446    pub fn parse_statement(&mut self) -> Result<Statement, ParserError> {
447        match self.parser.peek_token().token {
448            Token::Word(w) => {
449                match w.keyword {
450                    Keyword::CREATE => {
451                        self.parser.next_token(); // CREATE
452                        self.parse_create()
453                    }
454                    Keyword::COPY => {
455                        if let Token::Word(w) = self.parser.peek_nth_token(1).token {
456                            // use native parser for COPY INTO
457                            if w.keyword == Keyword::INTO {
458                                return Ok(Statement::Statement(Box::from(
459                                    self.parser.parse_statement()?,
460                                )));
461                            }
462                        }
463                        self.parser.next_token(); // COPY
464                        self.parse_copy()
465                    }
466                    Keyword::EXPLAIN => {
467                        self.parser.next_token(); // EXPLAIN
468                        self.parse_explain()
469                    }
470                    _ => {
471                        // use sqlparser-rs parser
472                        Ok(Statement::Statement(Box::from(
473                            self.parser.parse_statement()?,
474                        )))
475                    }
476                }
477            }
478            _ => {
479                // use the native parser
480                Ok(Statement::Statement(Box::from(
481                    self.parser.parse_statement()?,
482                )))
483            }
484        }
485    }
486
487    pub fn parse_expr(&mut self) -> Result<ExprWithAlias, ParserError> {
488        if let Token::Word(w) = self.parser.peek_token().token {
489            match w.keyword {
490                Keyword::CREATE | Keyword::COPY | Keyword::EXPLAIN => {
491                    return parser_err!("Unsupported command in expression");
492                }
493                _ => {}
494            }
495        }
496
497        self.parser.parse_expr_with_alias()
498    }
499
500    /// Parse a SQL `COPY TO` statement
501    pub fn parse_copy(&mut self) -> Result<Statement, ParserError> {
502        // parse as a query
503        let source = if self.parser.consume_token(&Token::LParen) {
504            let query = self.parser.parse_query()?;
505            self.parser.expect_token(&Token::RParen)?;
506            CopyToSource::Query(query)
507        } else {
508            // parse as table reference
509            let table_name = self.parser.parse_object_name(true)?;
510            CopyToSource::Relation(table_name)
511        };
512
513        #[derive(Default)]
514        struct Builder {
515            stored_as: Option<String>,
516            target: Option<String>,
517            partitioned_by: Option<Vec<String>>,
518            options: Option<Vec<(String, Value)>>,
519        }
520
521        let mut builder = Builder::default();
522
523        loop {
524            if let Some(keyword) = self.parser.parse_one_of_keywords(&[
525                Keyword::STORED,
526                Keyword::TO,
527                Keyword::PARTITIONED,
528                Keyword::OPTIONS,
529                Keyword::WITH,
530            ]) {
531                match keyword {
532                    Keyword::STORED => {
533                        self.parser.expect_keyword(Keyword::AS)?;
534                        ensure_not_set(&builder.stored_as, "STORED AS")?;
535                        builder.stored_as = Some(self.parse_file_format()?);
536                    }
537                    Keyword::TO => {
538                        ensure_not_set(&builder.target, "TO")?;
539                        builder.target = Some(self.parser.parse_literal_string()?);
540                    }
541                    Keyword::WITH => {
542                        self.parser.expect_keyword(Keyword::HEADER)?;
543                        self.parser.expect_keyword(Keyword::ROW)?;
544                        return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')");
545                    }
546                    Keyword::PARTITIONED => {
547                        self.parser.expect_keyword(Keyword::BY)?;
548                        ensure_not_set(&builder.partitioned_by, "PARTITIONED BY")?;
549                        builder.partitioned_by = Some(self.parse_partitions()?);
550                    }
551                    Keyword::OPTIONS => {
552                        ensure_not_set(&builder.options, "OPTIONS")?;
553                        builder.options = Some(self.parse_value_options()?);
554                    }
555                    _ => {
556                        unreachable!()
557                    }
558                }
559            } else {
560                let token = self.parser.next_token();
561                if token == Token::EOF || token == Token::SemiColon {
562                    break;
563                } else {
564                    return Err(ParserError::ParserError(format!(
565                        "Unexpected token {token}"
566                    )));
567                }
568            }
569        }
570
571        let Some(target) = builder.target else {
572            return Err(ParserError::ParserError(
573                "Missing TO clause in COPY statement".into(),
574            ));
575        };
576
577        Ok(Statement::CopyTo(CopyToStatement {
578            source,
579            target,
580            partitioned_by: builder.partitioned_by.unwrap_or(vec![]),
581            stored_as: builder.stored_as,
582            options: builder.options.unwrap_or(vec![]),
583        }))
584    }
585
586    /// Parse the next token as a key name for an option list
587    ///
588    /// Note this is different than [`parse_literal_string`]
589    /// because it allows keywords as well as other non words
590    ///
591    /// [`parse_literal_string`]: sqlparser::parser::Parser::parse_literal_string
592    pub fn parse_option_key(&mut self) -> Result<String, ParserError> {
593        let next_token = self.parser.next_token();
594        match next_token.token {
595            Token::Word(Word { value, .. }) => {
596                let mut parts = vec![value];
597                while self.parser.consume_token(&Token::Period) {
598                    let next_token = self.parser.next_token();
599                    if let Token::Word(Word { value, .. }) = next_token.token {
600                        parts.push(value);
601                    } else {
602                        // Unquoted namespaced keys have to conform to the syntax
603                        // "<WORD>[\.<WORD>]*". If we have a key that breaks this
604                        // pattern, error out:
605                        return self.parser.expected("key name", next_token);
606                    }
607                }
608                Ok(parts.join("."))
609            }
610            Token::SingleQuotedString(s) => Ok(s),
611            Token::DoubleQuotedString(s) => Ok(s),
612            Token::EscapedStringLiteral(s) => Ok(s),
613            _ => self.parser.expected("key name", next_token),
614        }
615    }
616
617    /// Parse the next token as a value for an option list
618    ///
619    /// Note this is different than [`parse_value`] as it allows any
620    /// word or keyword in this location.
621    ///
622    /// [`parse_value`]: sqlparser::parser::Parser::parse_value
623    pub fn parse_option_value(&mut self) -> Result<Value, ParserError> {
624        let next_token = self.parser.next_token();
625        match next_token.token {
626            // e.g. things like "snappy" or "gzip" that may be keywords
627            Token::Word(word) => Ok(Value::SingleQuotedString(word.value)),
628            Token::SingleQuotedString(s) => Ok(Value::SingleQuotedString(s)),
629            Token::DoubleQuotedString(s) => Ok(Value::DoubleQuotedString(s)),
630            Token::EscapedStringLiteral(s) => Ok(Value::EscapedStringLiteral(s)),
631            Token::Number(n, l) => Ok(Value::Number(n, l)),
632            _ => self.parser.expected("string or numeric value", next_token),
633        }
634    }
635
636    /// Parse a SQL `EXPLAIN`
637    pub fn parse_explain(&mut self) -> Result<Statement, ParserError> {
638        let analyze = self.parser.parse_keyword(Keyword::ANALYZE);
639        let verbose = self.parser.parse_keyword(Keyword::VERBOSE);
640        let format = self.parse_explain_format()?;
641
642        let statement = self.parse_statement()?;
643
644        Ok(Statement::Explain(ExplainStatement {
645            statement: Box::new(statement),
646            analyze,
647            verbose,
648            format,
649        }))
650    }
651
652    pub fn parse_explain_format(&mut self) -> Result<Option<String>, ParserError> {
653        if !self.parser.parse_keyword(Keyword::FORMAT) {
654            return Ok(None);
655        }
656
657        let next_token = self.parser.next_token();
658        let format = match next_token.token {
659            Token::Word(w) => Ok(w.value),
660            Token::SingleQuotedString(w) => Ok(w),
661            Token::DoubleQuotedString(w) => Ok(w),
662            _ => self
663                .parser
664                .expected("an explain format such as TREE", next_token),
665        }?;
666        Ok(Some(format))
667    }
668
669    /// Parse a SQL `CREATE` statement handling `CREATE EXTERNAL TABLE`
670    pub fn parse_create(&mut self) -> Result<Statement, ParserError> {
671        if self.parser.parse_keyword(Keyword::EXTERNAL) {
672            self.parse_create_external_table(false)
673        } else if self.parser.parse_keyword(Keyword::UNBOUNDED) {
674            self.parser.expect_keyword(Keyword::EXTERNAL)?;
675            self.parse_create_external_table(true)
676        } else {
677            Ok(Statement::Statement(Box::from(self.parser.parse_create()?)))
678        }
679    }
680
681    fn parse_partitions(&mut self) -> Result<Vec<String>, ParserError> {
682        let mut partitions: Vec<String> = vec![];
683        if !self.parser.consume_token(&Token::LParen)
684            || self.parser.consume_token(&Token::RParen)
685        {
686            return Ok(partitions);
687        }
688
689        loop {
690            if let Token::Word(_) = self.parser.peek_token().token {
691                let identifier = self.parser.parse_identifier()?;
692                partitions.push(identifier.to_string());
693            } else {
694                return self.expected("partition name", self.parser.peek_token());
695            }
696            let comma = self.parser.consume_token(&Token::Comma);
697            if self.parser.consume_token(&Token::RParen) {
698                // allow a trailing comma, even though it's not in standard
699                break;
700            } else if !comma {
701                return self.expected(
702                    "',' or ')' after partition definition",
703                    self.parser.peek_token(),
704                );
705            }
706        }
707        Ok(partitions)
708    }
709
710    /// Parse the ordering clause of a `CREATE EXTERNAL TABLE` SQL statement
711    pub fn parse_order_by_exprs(&mut self) -> Result<Vec<OrderByExpr>, ParserError> {
712        let mut values = vec![];
713        self.parser.expect_token(&Token::LParen)?;
714        loop {
715            values.push(self.parse_order_by_expr()?);
716            if !self.parser.consume_token(&Token::Comma) {
717                self.parser.expect_token(&Token::RParen)?;
718                return Ok(values);
719            }
720        }
721    }
722
723    /// Parse an ORDER BY sub-expression optionally followed by ASC or DESC.
724    pub fn parse_order_by_expr(&mut self) -> Result<OrderByExpr, ParserError> {
725        let expr = self.parser.parse_expr()?;
726
727        let asc = if self.parser.parse_keyword(Keyword::ASC) {
728            Some(true)
729        } else if self.parser.parse_keyword(Keyword::DESC) {
730            Some(false)
731        } else {
732            None
733        };
734
735        let nulls_first = if self
736            .parser
737            .parse_keywords(&[Keyword::NULLS, Keyword::FIRST])
738        {
739            Some(true)
740        } else if self.parser.parse_keywords(&[Keyword::NULLS, Keyword::LAST]) {
741            Some(false)
742        } else {
743            None
744        };
745
746        Ok(OrderByExpr {
747            expr,
748            options: OrderByOptions { asc, nulls_first },
749            with_fill: None,
750        })
751    }
752
753    // This is a copy of the equivalent implementation in sqlparser.
754    fn parse_columns(
755        &mut self,
756    ) -> Result<(Vec<ColumnDef>, Vec<TableConstraint>), ParserError> {
757        let mut columns = vec![];
758        let mut constraints = vec![];
759        if !self.parser.consume_token(&Token::LParen)
760            || self.parser.consume_token(&Token::RParen)
761        {
762            return Ok((columns, constraints));
763        }
764
765        loop {
766            if let Some(constraint) = self.parser.parse_optional_table_constraint()? {
767                constraints.push(constraint);
768            } else if let Token::Word(_) = self.parser.peek_token().token {
769                let column_def = self.parse_column_def()?;
770                columns.push(column_def);
771            } else {
772                return self.expected(
773                    "column name or constraint definition",
774                    self.parser.peek_token(),
775                );
776            }
777            let comma = self.parser.consume_token(&Token::Comma);
778            if self.parser.consume_token(&Token::RParen) {
779                // allow a trailing comma, even though it's not in standard
780                break;
781            } else if !comma {
782                return self.expected(
783                    "',' or ')' after column definition",
784                    self.parser.peek_token(),
785                );
786            }
787        }
788
789        Ok((columns, constraints))
790    }
791
792    fn parse_column_def(&mut self) -> Result<ColumnDef, ParserError> {
793        let name = self.parser.parse_identifier()?;
794        let data_type = self.parser.parse_data_type()?;
795        let mut options = vec![];
796        loop {
797            if self.parser.parse_keyword(Keyword::CONSTRAINT) {
798                let name = Some(self.parser.parse_identifier()?);
799                if let Some(option) = self.parser.parse_optional_column_option()? {
800                    options.push(ColumnOptionDef { name, option });
801                } else {
802                    return self.expected(
803                        "constraint details after CONSTRAINT <name>",
804                        self.parser.peek_token(),
805                    );
806                }
807            } else if let Some(option) = self.parser.parse_optional_column_option()? {
808                options.push(ColumnOptionDef { name: None, option });
809            } else {
810                break;
811            };
812        }
813        Ok(ColumnDef {
814            name,
815            data_type,
816            options,
817        })
818    }
819
820    fn parse_create_external_table(
821        &mut self,
822        unbounded: bool,
823    ) -> Result<Statement, ParserError> {
824        let temporary = self
825            .parser
826            .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY])
827            .is_some();
828        self.parser.expect_keyword(Keyword::TABLE)?;
829        let if_not_exists =
830            self.parser
831                .parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]);
832        let table_name = self.parser.parse_object_name(true)?;
833        let (mut columns, constraints) = self.parse_columns()?;
834
835        #[derive(Default)]
836        struct Builder {
837            file_type: Option<String>,
838            location: Option<String>,
839            table_partition_cols: Option<Vec<String>>,
840            order_exprs: Vec<LexOrdering>,
841            options: Option<Vec<(String, Value)>>,
842        }
843        let mut builder = Builder::default();
844
845        loop {
846            if let Some(keyword) = self.parser.parse_one_of_keywords(&[
847                Keyword::STORED,
848                Keyword::LOCATION,
849                Keyword::WITH,
850                Keyword::DELIMITER,
851                Keyword::COMPRESSION,
852                Keyword::PARTITIONED,
853                Keyword::OPTIONS,
854            ]) {
855                match keyword {
856                    Keyword::STORED => {
857                        self.parser.expect_keyword(Keyword::AS)?;
858                        ensure_not_set(&builder.file_type, "STORED AS")?;
859                        builder.file_type = Some(self.parse_file_format()?);
860                    }
861                    Keyword::LOCATION => {
862                        ensure_not_set(&builder.location, "LOCATION")?;
863                        builder.location = Some(self.parser.parse_literal_string()?);
864                    }
865                    Keyword::WITH => {
866                        if self.parser.parse_keyword(Keyword::ORDER) {
867                            builder.order_exprs.push(self.parse_order_by_exprs()?);
868                        } else {
869                            self.parser.expect_keyword(Keyword::HEADER)?;
870                            self.parser.expect_keyword(Keyword::ROW)?;
871                            return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS (format.has_header true)");
872                        }
873                    }
874                    Keyword::DELIMITER => {
875                        return parser_err!("DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS (format.delimiter ',')");
876                    }
877                    Keyword::COMPRESSION => {
878                        self.parser.expect_keyword(Keyword::TYPE)?;
879                        return parser_err!("COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS (format.compression gzip)");
880                    }
881                    Keyword::PARTITIONED => {
882                        self.parser.expect_keyword(Keyword::BY)?;
883                        ensure_not_set(&builder.table_partition_cols, "PARTITIONED BY")?;
884                        // Expects either list of column names (col_name [, col_name]*)
885                        // or list of column definitions (col_name datatype [, col_name datatype]* )
886                        // use the token after the name to decide which parsing rule to use
887                        // Note that mixing both names and definitions is not allowed
888                        let peeked = self.parser.peek_nth_token(2);
889                        if peeked == Token::Comma || peeked == Token::RParen {
890                            // List of column names
891                            builder.table_partition_cols = Some(self.parse_partitions()?)
892                        } else {
893                            // List of column defs
894                            let (cols, cons) = self.parse_columns()?;
895                            builder.table_partition_cols = Some(
896                                cols.iter().map(|col| col.name.to_string()).collect(),
897                            );
898
899                            columns.extend(cols);
900
901                            if !cons.is_empty() {
902                                return Err(ParserError::ParserError(
903                                    "Constraints on Partition Columns are not supported"
904                                        .to_string(),
905                                ));
906                            }
907                        }
908                    }
909                    Keyword::OPTIONS => {
910                        ensure_not_set(&builder.options, "OPTIONS")?;
911                        builder.options = Some(self.parse_value_options()?);
912                    }
913                    _ => {
914                        unreachable!()
915                    }
916                }
917            } else {
918                let token = self.parser.next_token();
919                if token == Token::EOF || token == Token::SemiColon {
920                    break;
921                } else {
922                    return Err(ParserError::ParserError(format!(
923                        "Unexpected token {token}"
924                    )));
925                }
926            }
927        }
928
929        // Validations: location and file_type are required
930        if builder.file_type.is_none() {
931            return Err(ParserError::ParserError(
932                "Missing STORED AS clause in CREATE EXTERNAL TABLE statement".into(),
933            ));
934        }
935        if builder.location.is_none() {
936            return Err(ParserError::ParserError(
937                "Missing LOCATION clause in CREATE EXTERNAL TABLE statement".into(),
938            ));
939        }
940
941        let create = CreateExternalTable {
942            name: table_name,
943            columns,
944            file_type: builder.file_type.unwrap(),
945            location: builder.location.unwrap(),
946            table_partition_cols: builder.table_partition_cols.unwrap_or(vec![]),
947            order_exprs: builder.order_exprs,
948            if_not_exists,
949            temporary,
950            unbounded,
951            options: builder.options.unwrap_or(Vec::new()),
952            constraints,
953        };
954        Ok(Statement::CreateExternalTable(create))
955    }
956
957    /// Parses the set of valid formats
958    fn parse_file_format(&mut self) -> Result<String, ParserError> {
959        let token = self.parser.next_token();
960        match &token.token {
961            Token::Word(w) => parse_file_type(&w.value),
962            _ => self.expected("one of ARROW, PARQUET, NDJSON, or CSV", token),
963        }
964    }
965
966    /// Parses (key value) style options into a map of String --> [`Value`].
967    ///
968    /// This method supports keywords as key names as well as multiple
969    /// value types such as Numbers as well as Strings.
970    fn parse_value_options(&mut self) -> Result<Vec<(String, Value)>, ParserError> {
971        let mut options = vec![];
972        self.parser.expect_token(&Token::LParen)?;
973
974        loop {
975            let key = self.parse_option_key()?;
976            let value = self.parse_option_value()?;
977            options.push((key, value));
978            let comma = self.parser.consume_token(&Token::Comma);
979            if self.parser.consume_token(&Token::RParen) {
980                // Allow a trailing comma, even though it's not in standard
981                break;
982            } else if !comma {
983                return self.expected(
984                    "',' or ')' after option definition",
985                    self.parser.peek_token(),
986                );
987            }
988        }
989        Ok(options)
990    }
991}
992
993#[cfg(test)]
994mod tests {
995    use super::*;
996    use datafusion_common::assert_contains;
997    use sqlparser::ast::Expr::Identifier;
998    use sqlparser::ast::{BinaryOperator, DataType, Expr, Ident};
999    use sqlparser::dialect::SnowflakeDialect;
1000    use sqlparser::tokenizer::Span;
1001
1002    fn expect_parse_ok(sql: &str, expected: Statement) -> Result<(), ParserError> {
1003        let statements = DFParser::parse_sql(sql)?;
1004        assert_eq!(
1005            statements.len(),
1006            1,
1007            "Expected to parse exactly one statement"
1008        );
1009        assert_eq!(statements[0], expected, "actual:\n{:#?}", statements[0]);
1010        Ok(())
1011    }
1012
1013    /// Parses sql and asserts that the expected error message was found
1014    fn expect_parse_error(sql: &str, expected_error: &str) {
1015        match DFParser::parse_sql(sql) {
1016            Ok(statements) => {
1017                panic!(
1018                    "Expected parse error for '{sql}', but was successful: {statements:?}"
1019                );
1020            }
1021            Err(e) => {
1022                let error_message = e.to_string();
1023                assert!(
1024                    error_message.contains(expected_error),
1025                    "Expected error '{expected_error}' not found in actual error '{error_message}'"
1026                );
1027            }
1028        }
1029    }
1030
1031    fn make_column_def(name: impl Into<String>, data_type: DataType) -> ColumnDef {
1032        ColumnDef {
1033            name: Ident {
1034                value: name.into(),
1035                quote_style: None,
1036                span: Span::empty(),
1037            },
1038            data_type,
1039            options: vec![],
1040        }
1041    }
1042
1043    #[test]
1044    fn create_external_table() -> Result<(), ParserError> {
1045        // positive case
1046        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'";
1047        let display = None;
1048        let name = ObjectName::from(vec![Ident::from("t")]);
1049        let expected = Statement::CreateExternalTable(CreateExternalTable {
1050            name: name.clone(),
1051            columns: vec![make_column_def("c1", DataType::Int(display))],
1052            file_type: "CSV".to_string(),
1053            location: "foo.csv".into(),
1054            table_partition_cols: vec![],
1055            order_exprs: vec![],
1056            if_not_exists: false,
1057            temporary: false,
1058            unbounded: false,
1059            options: vec![],
1060            constraints: vec![],
1061        });
1062        expect_parse_ok(sql, expected)?;
1063
1064        // positive case: leading space
1065        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'     ";
1066        let expected = Statement::CreateExternalTable(CreateExternalTable {
1067            name: name.clone(),
1068            columns: vec![make_column_def("c1", DataType::Int(None))],
1069            file_type: "CSV".to_string(),
1070            location: "foo.csv".into(),
1071            table_partition_cols: vec![],
1072            order_exprs: vec![],
1073            if_not_exists: false,
1074            temporary: false,
1075            unbounded: false,
1076            options: vec![],
1077            constraints: vec![],
1078        });
1079        expect_parse_ok(sql, expected)?;
1080
1081        // positive case: leading space + semicolon
1082        let sql =
1083            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'      ;";
1084        let expected = Statement::CreateExternalTable(CreateExternalTable {
1085            name: name.clone(),
1086            columns: vec![make_column_def("c1", DataType::Int(None))],
1087            file_type: "CSV".to_string(),
1088            location: "foo.csv".into(),
1089            table_partition_cols: vec![],
1090            order_exprs: vec![],
1091            if_not_exists: false,
1092            temporary: false,
1093            unbounded: false,
1094            options: vec![],
1095            constraints: vec![],
1096        });
1097        expect_parse_ok(sql, expected)?;
1098
1099        // positive case with delimiter
1100        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS (format.delimiter '|')";
1101        let display = None;
1102        let expected = Statement::CreateExternalTable(CreateExternalTable {
1103            name: name.clone(),
1104            columns: vec![make_column_def("c1", DataType::Int(display))],
1105            file_type: "CSV".to_string(),
1106            location: "foo.csv".into(),
1107            table_partition_cols: vec![],
1108            order_exprs: vec![],
1109            if_not_exists: false,
1110            temporary: false,
1111            unbounded: false,
1112            options: vec![(
1113                "format.delimiter".into(),
1114                Value::SingleQuotedString("|".into()),
1115            )],
1116            constraints: vec![],
1117        });
1118        expect_parse_ok(sql, expected)?;
1119
1120        // positive case: partitioned by
1121        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1, p2) LOCATION 'foo.csv'";
1122        let display = None;
1123        let expected = Statement::CreateExternalTable(CreateExternalTable {
1124            name: name.clone(),
1125            columns: vec![make_column_def("c1", DataType::Int(display))],
1126            file_type: "CSV".to_string(),
1127            location: "foo.csv".into(),
1128            table_partition_cols: vec!["p1".to_string(), "p2".to_string()],
1129            order_exprs: vec![],
1130            if_not_exists: false,
1131            temporary: false,
1132            unbounded: false,
1133            options: vec![],
1134            constraints: vec![],
1135        });
1136        expect_parse_ok(sql, expected)?;
1137
1138        // positive case: it is ok for sql stmt with `COMPRESSION TYPE GZIP` tokens
1139        let sqls =
1140            vec![
1141             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1142             ('format.compression' 'GZIP')", "GZIP"),
1143             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1144             ('format.compression' 'BZIP2')", "BZIP2"),
1145             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1146             ('format.compression' 'XZ')", "XZ"),
1147             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1148             ('format.compression' 'ZSTD')", "ZSTD"),
1149         ];
1150        for (sql, compression) in sqls {
1151            let expected = Statement::CreateExternalTable(CreateExternalTable {
1152                name: name.clone(),
1153                columns: vec![make_column_def("c1", DataType::Int(display))],
1154                file_type: "CSV".to_string(),
1155                location: "foo.csv".into(),
1156                table_partition_cols: vec![],
1157                order_exprs: vec![],
1158                if_not_exists: false,
1159                temporary: false,
1160                unbounded: false,
1161                options: vec![(
1162                    "format.compression".into(),
1163                    Value::SingleQuotedString(compression.into()),
1164                )],
1165                constraints: vec![],
1166            });
1167            expect_parse_ok(sql, expected)?;
1168        }
1169
1170        // positive case: it is ok for parquet files not to have columns specified
1171        let sql = "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'";
1172        let expected = Statement::CreateExternalTable(CreateExternalTable {
1173            name: name.clone(),
1174            columns: vec![],
1175            file_type: "PARQUET".to_string(),
1176            location: "foo.parquet".into(),
1177            table_partition_cols: vec![],
1178            order_exprs: vec![],
1179            if_not_exists: false,
1180            temporary: false,
1181            unbounded: false,
1182            options: vec![],
1183            constraints: vec![],
1184        });
1185        expect_parse_ok(sql, expected)?;
1186
1187        // positive case: it is ok for parquet files to be other than upper case
1188        let sql = "CREATE EXTERNAL TABLE t STORED AS parqueT LOCATION 'foo.parquet'";
1189        let expected = Statement::CreateExternalTable(CreateExternalTable {
1190            name: name.clone(),
1191            columns: vec![],
1192            file_type: "PARQUET".to_string(),
1193            location: "foo.parquet".into(),
1194            table_partition_cols: vec![],
1195            order_exprs: vec![],
1196            if_not_exists: false,
1197            temporary: false,
1198            unbounded: false,
1199            options: vec![],
1200            constraints: vec![],
1201        });
1202        expect_parse_ok(sql, expected)?;
1203
1204        // positive case: it is ok for avro files not to have columns specified
1205        let sql = "CREATE EXTERNAL TABLE t STORED AS AVRO LOCATION 'foo.avro'";
1206        let expected = Statement::CreateExternalTable(CreateExternalTable {
1207            name: name.clone(),
1208            columns: vec![],
1209            file_type: "AVRO".to_string(),
1210            location: "foo.avro".into(),
1211            table_partition_cols: vec![],
1212            order_exprs: vec![],
1213            if_not_exists: false,
1214            temporary: false,
1215            unbounded: false,
1216            options: vec![],
1217            constraints: vec![],
1218        });
1219        expect_parse_ok(sql, expected)?;
1220
1221        // positive case: it is ok for avro files not to have columns specified
1222        let sql =
1223            "CREATE EXTERNAL TABLE IF NOT EXISTS t STORED AS PARQUET LOCATION 'foo.parquet'";
1224        let expected = Statement::CreateExternalTable(CreateExternalTable {
1225            name: name.clone(),
1226            columns: vec![],
1227            file_type: "PARQUET".to_string(),
1228            location: "foo.parquet".into(),
1229            table_partition_cols: vec![],
1230            order_exprs: vec![],
1231            if_not_exists: true,
1232            temporary: false,
1233            unbounded: false,
1234            options: vec![],
1235            constraints: vec![],
1236        });
1237        expect_parse_ok(sql, expected)?;
1238
1239        // positive case: column definition allowed in 'partition by' clause
1240        let sql =
1241            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int) LOCATION 'foo.csv'";
1242        let expected = Statement::CreateExternalTable(CreateExternalTable {
1243            name: name.clone(),
1244            columns: vec![
1245                make_column_def("c1", DataType::Int(None)),
1246                make_column_def("p1", DataType::Int(None)),
1247            ],
1248            file_type: "CSV".to_string(),
1249            location: "foo.csv".into(),
1250            table_partition_cols: vec!["p1".to_string()],
1251            order_exprs: vec![],
1252            if_not_exists: false,
1253            temporary: false,
1254            unbounded: false,
1255            options: vec![],
1256            constraints: vec![],
1257        });
1258        expect_parse_ok(sql, expected)?;
1259
1260        // negative case: mixed column defs and column names in `PARTITIONED BY` clause
1261        let sql =
1262            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int, c1) LOCATION 'foo.csv'";
1263        expect_parse_error(
1264            sql,
1265            "sql parser error: Expected: a data type name, found: )",
1266        );
1267
1268        // negative case: mixed column defs and column names in `PARTITIONED BY` clause
1269        let sql =
1270            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (c1, p1 int) LOCATION 'foo.csv'";
1271        expect_parse_error(sql, "sql parser error: Expected ',' or ')' after partition definition, found: int");
1272
1273        // positive case: additional options (one entry) can be specified
1274        let sql =
1275            "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1') LOCATION 'blahblah'";
1276        let expected = Statement::CreateExternalTable(CreateExternalTable {
1277            name: name.clone(),
1278            columns: vec![],
1279            file_type: "X".to_string(),
1280            location: "blahblah".into(),
1281            table_partition_cols: vec![],
1282            order_exprs: vec![],
1283            if_not_exists: false,
1284            temporary: false,
1285            unbounded: false,
1286            options: vec![("k1".into(), Value::SingleQuotedString("v1".into()))],
1287            constraints: vec![],
1288        });
1289        expect_parse_ok(sql, expected)?;
1290
1291        // positive case: additional options (multiple entries) can be specified
1292        let sql =
1293            "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2) LOCATION 'blahblah'";
1294        let expected = Statement::CreateExternalTable(CreateExternalTable {
1295            name: name.clone(),
1296            columns: vec![],
1297            file_type: "X".to_string(),
1298            location: "blahblah".into(),
1299            table_partition_cols: vec![],
1300            order_exprs: vec![],
1301            if_not_exists: false,
1302            temporary: false,
1303            unbounded: false,
1304            options: vec![
1305                ("k1".into(), Value::SingleQuotedString("v1".into())),
1306                ("k2".into(), Value::SingleQuotedString("v2".into())),
1307            ],
1308            constraints: vec![],
1309        });
1310        expect_parse_ok(sql, expected)?;
1311
1312        // Ordered Col
1313        let sqls = ["CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1) LOCATION 'foo.csv'",
1314                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS FIRST) LOCATION 'foo.csv'",
1315                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS LAST) LOCATION 'foo.csv'",
1316                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC) LOCATION 'foo.csv'",
1317                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC) LOCATION 'foo.csv'",
1318                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS FIRST) LOCATION 'foo.csv'",
1319                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS LAST) LOCATION 'foo.csv'",
1320                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS FIRST) LOCATION 'foo.csv'",
1321                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS LAST) LOCATION 'foo.csv'"];
1322        let expected = vec![
1323            (None, None),
1324            (None, Some(true)),
1325            (None, Some(false)),
1326            (Some(true), None),
1327            (Some(false), None),
1328            (Some(false), Some(true)),
1329            (Some(false), Some(false)),
1330            (Some(true), Some(true)),
1331            (Some(true), Some(false)),
1332        ];
1333        for (sql, (asc, nulls_first)) in sqls.iter().zip(expected.into_iter()) {
1334            let expected = Statement::CreateExternalTable(CreateExternalTable {
1335                name: name.clone(),
1336                columns: vec![make_column_def("c1", DataType::Int(None))],
1337                file_type: "CSV".to_string(),
1338                location: "foo.csv".into(),
1339                table_partition_cols: vec![],
1340                order_exprs: vec![vec![OrderByExpr {
1341                    expr: Identifier(Ident {
1342                        value: "c1".to_owned(),
1343                        quote_style: None,
1344                        span: Span::empty(),
1345                    }),
1346                    options: OrderByOptions { asc, nulls_first },
1347                    with_fill: None,
1348                }]],
1349                if_not_exists: false,
1350                temporary: false,
1351                unbounded: false,
1352                options: vec![],
1353                constraints: vec![],
1354            });
1355            expect_parse_ok(sql, expected)?;
1356        }
1357
1358        // Ordered Col
1359        let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 ASC, c2 DESC NULLS FIRST) LOCATION 'foo.csv'";
1360        let display = None;
1361        let expected = Statement::CreateExternalTable(CreateExternalTable {
1362            name: name.clone(),
1363            columns: vec![
1364                make_column_def("c1", DataType::Int(display)),
1365                make_column_def("c2", DataType::Int(display)),
1366            ],
1367            file_type: "CSV".to_string(),
1368            location: "foo.csv".into(),
1369            table_partition_cols: vec![],
1370            order_exprs: vec![vec![
1371                OrderByExpr {
1372                    expr: Identifier(Ident {
1373                        value: "c1".to_owned(),
1374                        quote_style: None,
1375                        span: Span::empty(),
1376                    }),
1377                    options: OrderByOptions {
1378                        asc: Some(true),
1379                        nulls_first: None,
1380                    },
1381                    with_fill: None,
1382                },
1383                OrderByExpr {
1384                    expr: Identifier(Ident {
1385                        value: "c2".to_owned(),
1386                        quote_style: None,
1387                        span: Span::empty(),
1388                    }),
1389                    options: OrderByOptions {
1390                        asc: Some(false),
1391                        nulls_first: Some(true),
1392                    },
1393                    with_fill: None,
1394                },
1395            ]],
1396            if_not_exists: false,
1397            temporary: false,
1398            unbounded: false,
1399            options: vec![],
1400            constraints: vec![],
1401        });
1402        expect_parse_ok(sql, expected)?;
1403
1404        // Ordered Binary op
1405        let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 - c2 ASC) LOCATION 'foo.csv'";
1406        let display = None;
1407        let expected = Statement::CreateExternalTable(CreateExternalTable {
1408            name: name.clone(),
1409            columns: vec![
1410                make_column_def("c1", DataType::Int(display)),
1411                make_column_def("c2", DataType::Int(display)),
1412            ],
1413            file_type: "CSV".to_string(),
1414            location: "foo.csv".into(),
1415            table_partition_cols: vec![],
1416            order_exprs: vec![vec![OrderByExpr {
1417                expr: Expr::BinaryOp {
1418                    left: Box::new(Identifier(Ident {
1419                        value: "c1".to_owned(),
1420                        quote_style: None,
1421                        span: Span::empty(),
1422                    })),
1423                    op: BinaryOperator::Minus,
1424                    right: Box::new(Identifier(Ident {
1425                        value: "c2".to_owned(),
1426                        quote_style: None,
1427                        span: Span::empty(),
1428                    })),
1429                },
1430                options: OrderByOptions {
1431                    asc: Some(true),
1432                    nulls_first: None,
1433                },
1434                with_fill: None,
1435            }]],
1436            if_not_exists: false,
1437            temporary: false,
1438            unbounded: false,
1439            options: vec![],
1440            constraints: vec![],
1441        });
1442        expect_parse_ok(sql, expected)?;
1443
1444        // Most complete CREATE EXTERNAL TABLE statement possible
1445        let sql = "
1446            CREATE UNBOUNDED EXTERNAL TABLE IF NOT EXISTS t (c1 int, c2 float)
1447            STORED AS PARQUET
1448            WITH ORDER (c1 - c2 ASC)
1449            PARTITIONED BY (c1)
1450            LOCATION 'foo.parquet'
1451            OPTIONS ('format.compression' 'zstd',
1452                     'format.delimiter' '*',
1453                     'ROW_GROUP_SIZE' '1024',
1454                     'TRUNCATE' 'NO',
1455                     'format.has_header' 'true')";
1456        let expected = Statement::CreateExternalTable(CreateExternalTable {
1457            name: name.clone(),
1458            columns: vec![
1459                make_column_def("c1", DataType::Int(None)),
1460                make_column_def("c2", DataType::Float(None)),
1461            ],
1462            file_type: "PARQUET".to_string(),
1463            location: "foo.parquet".into(),
1464            table_partition_cols: vec!["c1".into()],
1465            order_exprs: vec![vec![OrderByExpr {
1466                expr: Expr::BinaryOp {
1467                    left: Box::new(Identifier(Ident {
1468                        value: "c1".to_owned(),
1469                        quote_style: None,
1470                        span: Span::empty(),
1471                    })),
1472                    op: BinaryOperator::Minus,
1473                    right: Box::new(Identifier(Ident {
1474                        value: "c2".to_owned(),
1475                        quote_style: None,
1476                        span: Span::empty(),
1477                    })),
1478                },
1479                options: OrderByOptions {
1480                    asc: Some(true),
1481                    nulls_first: None,
1482                },
1483                with_fill: None,
1484            }]],
1485            if_not_exists: true,
1486            temporary: false,
1487            unbounded: true,
1488            options: vec![
1489                (
1490                    "format.compression".into(),
1491                    Value::SingleQuotedString("zstd".into()),
1492                ),
1493                (
1494                    "format.delimiter".into(),
1495                    Value::SingleQuotedString("*".into()),
1496                ),
1497                (
1498                    "ROW_GROUP_SIZE".into(),
1499                    Value::SingleQuotedString("1024".into()),
1500                ),
1501                ("TRUNCATE".into(), Value::SingleQuotedString("NO".into())),
1502                (
1503                    "format.has_header".into(),
1504                    Value::SingleQuotedString("true".into()),
1505                ),
1506            ],
1507            constraints: vec![],
1508        });
1509        expect_parse_ok(sql, expected)?;
1510
1511        // For error cases, see: `create_external_table.slt`
1512
1513        Ok(())
1514    }
1515
1516    #[test]
1517    fn copy_to_table_to_table() -> Result<(), ParserError> {
1518        // positive case
1519        let sql = "COPY foo TO bar STORED AS CSV";
1520        let expected = Statement::CopyTo(CopyToStatement {
1521            source: object_name("foo"),
1522            target: "bar".to_string(),
1523            partitioned_by: vec![],
1524            stored_as: Some("CSV".to_owned()),
1525            options: vec![],
1526        });
1527
1528        assert_eq!(verified_stmt(sql), expected);
1529        Ok(())
1530    }
1531
1532    #[test]
1533    fn skip_copy_into_snowflake() -> Result<(), ParserError> {
1534        let sql = "COPY INTO foo FROM @~/staged FILE_FORMAT = (FORMAT_NAME = 'mycsv');";
1535        let dialect = Box::new(SnowflakeDialect);
1536        let statements = DFParser::parse_sql_with_dialect(sql, dialect.as_ref())?;
1537
1538        assert_eq!(
1539            statements.len(),
1540            1,
1541            "Expected to parse exactly one statement"
1542        );
1543        if let Statement::CopyTo(_) = &statements[0] {
1544            panic!("Expected non COPY TO statement, but was successful: {statements:?}");
1545        }
1546        Ok(())
1547    }
1548
1549    #[test]
1550    fn explain_copy_to_table_to_table() -> Result<(), ParserError> {
1551        let cases = vec![
1552            ("EXPLAIN COPY foo TO bar STORED AS PARQUET", false, false),
1553            (
1554                "EXPLAIN ANALYZE COPY foo TO bar STORED AS PARQUET",
1555                true,
1556                false,
1557            ),
1558            (
1559                "EXPLAIN VERBOSE COPY foo TO bar STORED AS PARQUET",
1560                false,
1561                true,
1562            ),
1563            (
1564                "EXPLAIN ANALYZE VERBOSE COPY foo TO bar STORED AS PARQUET",
1565                true,
1566                true,
1567            ),
1568        ];
1569        for (sql, analyze, verbose) in cases {
1570            println!("sql: {sql}, analyze: {analyze}, verbose: {verbose}");
1571
1572            let expected_copy = Statement::CopyTo(CopyToStatement {
1573                source: object_name("foo"),
1574                target: "bar".to_string(),
1575                partitioned_by: vec![],
1576                stored_as: Some("PARQUET".to_owned()),
1577                options: vec![],
1578            });
1579            let expected = Statement::Explain(ExplainStatement {
1580                analyze,
1581                verbose,
1582                format: None,
1583                statement: Box::new(expected_copy),
1584            });
1585            assert_eq!(verified_stmt(sql), expected);
1586        }
1587        Ok(())
1588    }
1589
1590    #[test]
1591    fn copy_to_query_to_table() -> Result<(), ParserError> {
1592        let statement = verified_stmt("SELECT 1");
1593
1594        // unwrap the various layers
1595        let statement = if let Statement::Statement(statement) = statement {
1596            *statement
1597        } else {
1598            panic!("Expected statement, got {statement:?}");
1599        };
1600
1601        let query = if let SQLStatement::Query(query) = statement {
1602            query
1603        } else {
1604            panic!("Expected query, got {statement:?}");
1605        };
1606
1607        let sql =
1608            "COPY (SELECT 1) TO bar STORED AS CSV OPTIONS ('format.has_header' 'true')";
1609        let expected = Statement::CopyTo(CopyToStatement {
1610            source: CopyToSource::Query(query),
1611            target: "bar".to_string(),
1612            partitioned_by: vec![],
1613            stored_as: Some("CSV".to_owned()),
1614            options: vec![(
1615                "format.has_header".into(),
1616                Value::SingleQuotedString("true".into()),
1617            )],
1618        });
1619        assert_eq!(verified_stmt(sql), expected);
1620        Ok(())
1621    }
1622
1623    #[test]
1624    fn copy_to_options() -> Result<(), ParserError> {
1625        let sql = "COPY foo TO bar STORED AS CSV OPTIONS ('row_group_size' '55')";
1626        let expected = Statement::CopyTo(CopyToStatement {
1627            source: object_name("foo"),
1628            target: "bar".to_string(),
1629            partitioned_by: vec![],
1630            stored_as: Some("CSV".to_owned()),
1631            options: vec![(
1632                "row_group_size".to_string(),
1633                Value::SingleQuotedString("55".to_string()),
1634            )],
1635        });
1636        assert_eq!(verified_stmt(sql), expected);
1637        Ok(())
1638    }
1639
1640    #[test]
1641    fn copy_to_partitioned_by() -> Result<(), ParserError> {
1642        let sql = "COPY foo TO bar STORED AS CSV PARTITIONED BY (a) OPTIONS ('row_group_size' '55')";
1643        let expected = Statement::CopyTo(CopyToStatement {
1644            source: object_name("foo"),
1645            target: "bar".to_string(),
1646            partitioned_by: vec!["a".to_string()],
1647            stored_as: Some("CSV".to_owned()),
1648            options: vec![(
1649                "row_group_size".to_string(),
1650                Value::SingleQuotedString("55".to_string()),
1651            )],
1652        });
1653        assert_eq!(verified_stmt(sql), expected);
1654        Ok(())
1655    }
1656
1657    #[test]
1658    fn copy_to_multi_options() -> Result<(), ParserError> {
1659        // order of options is preserved
1660        let sql =
1661            "COPY foo TO bar STORED AS parquet OPTIONS ('format.row_group_size' 55, 'format.compression' snappy, 'execution.keep_partition_by_columns' true)";
1662
1663        let expected_options = vec![
1664            (
1665                "format.row_group_size".to_string(),
1666                Value::Number("55".to_string(), false),
1667            ),
1668            (
1669                "format.compression".to_string(),
1670                Value::SingleQuotedString("snappy".to_string()),
1671            ),
1672            (
1673                "execution.keep_partition_by_columns".to_string(),
1674                Value::SingleQuotedString("true".to_string()),
1675            ),
1676        ];
1677
1678        let mut statements = DFParser::parse_sql(sql).unwrap();
1679        assert_eq!(statements.len(), 1);
1680        let only_statement = statements.pop_front().unwrap();
1681
1682        let options = if let Statement::CopyTo(copy_to) = only_statement {
1683            copy_to.options
1684        } else {
1685            panic!("Expected copy");
1686        };
1687
1688        assert_eq!(options, expected_options);
1689
1690        Ok(())
1691    }
1692
1693    // For error cases, see: `copy.slt`
1694
1695    fn object_name(name: &str) -> CopyToSource {
1696        CopyToSource::Relation(ObjectName::from(vec![Ident::new(name)]))
1697    }
1698
1699    // Based on  sqlparser-rs
1700    // https://github.com/sqlparser-rs/sqlparser-rs/blob/ae3b5844c839072c235965fe0d1bddc473dced87/src/test_utils.rs#L104-L116
1701
1702    /// Ensures that `sql` parses as a single [Statement]
1703    ///
1704    /// If `canonical` is non empty,this function additionally asserts
1705    /// that:
1706    ///
1707    /// 1. parsing `sql` results in the same [`Statement`] as parsing
1708    ///    `canonical`.
1709    ///
1710    /// 2. re-serializing the result of parsing `sql` produces the same
1711    ///    `canonical` sql string
1712    fn one_statement_parses_to(sql: &str, canonical: &str) -> Statement {
1713        let mut statements = DFParser::parse_sql(sql).unwrap();
1714        assert_eq!(statements.len(), 1);
1715
1716        if sql != canonical {
1717            assert_eq!(DFParser::parse_sql(canonical).unwrap(), statements);
1718        }
1719
1720        let only_statement = statements.pop_front().unwrap();
1721        assert_eq!(
1722            canonical.to_uppercase(),
1723            only_statement.to_string().to_uppercase()
1724        );
1725        only_statement
1726    }
1727
1728    /// Ensures that `sql` parses as a single [Statement], and that
1729    /// re-serializing the parse result produces the same `sql`
1730    /// string (is not modified after a serialization round-trip).
1731    fn verified_stmt(sql: &str) -> Statement {
1732        one_statement_parses_to(sql, sql)
1733    }
1734
1735    #[test]
1736    /// Checks the recursion limit works for sql queries
1737    /// Recursion can happen easily with binary exprs (i.e, AND or OR)
1738    fn test_recursion_limit() {
1739        let sql = "SELECT 1 OR 2";
1740
1741        // Expect parse to succeed
1742        DFParserBuilder::new(sql)
1743            .build()
1744            .unwrap()
1745            .parse_statements()
1746            .unwrap();
1747
1748        let err = DFParserBuilder::new(sql)
1749            .with_recursion_limit(1)
1750            .build()
1751            .unwrap()
1752            .parse_statements()
1753            .unwrap_err();
1754
1755        assert_contains!(
1756            err.to_string(),
1757            "sql parser error: recursion limit exceeded"
1758        );
1759    }
1760}