datafusion_sql/
parser.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`DFParser`]: DataFusion SQL Parser based on [`sqlparser`]
19
20use std::collections::VecDeque;
21use std::fmt;
22
23use sqlparser::ast::ExprWithAlias;
24use sqlparser::tokenizer::TokenWithSpan;
25use sqlparser::{
26    ast::{
27        ColumnDef, ColumnOptionDef, ObjectName, OrderByExpr, Query,
28        Statement as SQLStatement, TableConstraint, Value,
29    },
30    dialect::{keywords::Keyword, Dialect, GenericDialect},
31    parser::{Parser, ParserError},
32    tokenizer::{Token, Tokenizer, Word},
33};
34
35// Use `Parser::expected` instead, if possible
36macro_rules! parser_err {
37    ($MSG:expr) => {
38        Err(ParserError::ParserError($MSG.to_string()))
39    };
40}
41
42fn parse_file_type(s: &str) -> Result<String, ParserError> {
43    Ok(s.to_uppercase())
44}
45
46/// DataFusion specific EXPLAIN (needed so we can EXPLAIN datafusion
47/// specific COPY and other statements)
48#[derive(Debug, Clone, PartialEq, Eq)]
49pub struct ExplainStatement {
50    pub analyze: bool,
51    pub verbose: bool,
52    pub statement: Box<Statement>,
53}
54
55impl fmt::Display for ExplainStatement {
56    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
57        let Self {
58            analyze,
59            verbose,
60            statement,
61        } = self;
62
63        write!(f, "EXPLAIN ")?;
64        if *analyze {
65            write!(f, "ANALYZE ")?;
66        }
67        if *verbose {
68            write!(f, "VERBOSE ")?;
69        }
70
71        write!(f, "{statement}")
72    }
73}
74
75/// DataFusion extension DDL for `COPY`
76///
77/// # Syntax:
78///
79/// ```text
80/// COPY <table_name | (<query>)>
81/// TO
82/// <destination_url>
83/// (key_value_list)
84/// ```
85///
86/// # Examples
87///
88/// ```sql
89/// COPY lineitem  TO 'lineitem'
90/// STORED AS PARQUET (
91///   partitions 16,
92///   row_group_limit_rows 100000,
93///   row_group_limit_bytes 200000
94/// )
95///
96/// COPY (SELECT l_orderkey from lineitem) to 'lineitem.parquet';
97/// ```
98#[derive(Debug, Clone, PartialEq, Eq)]
99pub struct CopyToStatement {
100    /// From where the data comes from
101    pub source: CopyToSource,
102    /// The URL to where the data is heading
103    pub target: String,
104    /// Partition keys
105    pub partitioned_by: Vec<String>,
106    /// File type (Parquet, NDJSON, CSV etc.)
107    pub stored_as: Option<String>,
108    /// Target specific options
109    pub options: Vec<(String, Value)>,
110}
111
112impl fmt::Display for CopyToStatement {
113    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
114        let Self {
115            source,
116            target,
117            partitioned_by,
118            stored_as,
119            options,
120            ..
121        } = self;
122
123        write!(f, "COPY {source} TO {target}")?;
124        if let Some(file_type) = stored_as {
125            write!(f, " STORED AS {}", file_type)?;
126        }
127        if !partitioned_by.is_empty() {
128            write!(f, " PARTITIONED BY ({})", partitioned_by.join(", "))?;
129        }
130
131        if !options.is_empty() {
132            let opts: Vec<_> =
133                options.iter().map(|(k, v)| format!("'{k}' {v}")).collect();
134            write!(f, " OPTIONS ({})", opts.join(", "))?;
135        }
136
137        Ok(())
138    }
139}
140
141#[derive(Debug, Clone, PartialEq, Eq)]
142pub enum CopyToSource {
143    /// `COPY <table> TO ...`
144    Relation(ObjectName),
145    /// COPY (...query...) TO ...
146    Query(Box<Query>),
147}
148
149impl fmt::Display for CopyToSource {
150    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
151        match self {
152            CopyToSource::Relation(r) => write!(f, "{r}"),
153            CopyToSource::Query(q) => write!(f, "({q})"),
154        }
155    }
156}
157
158/// This type defines a lexicographical ordering.
159pub(crate) type LexOrdering = Vec<OrderByExpr>;
160
161/// DataFusion extension DDL for `CREATE EXTERNAL TABLE`
162///
163/// Syntax:
164///
165/// ```text
166/// CREATE EXTERNAL TABLE
167/// [ IF NOT EXISTS ]
168/// <TABLE_NAME>[ (<column_definition>) ]
169/// STORED AS <file_type>
170/// [ PARTITIONED BY (<column_definition list> | <column list>) ]
171/// [ WITH ORDER (<ordered column list>)
172/// [ OPTIONS (<key_value_list>) ]
173/// LOCATION <literal>
174///
175/// <column_definition> := (<column_name> <data_type>, ...)
176///
177/// <column_list> := (<column_name>, ...)
178///
179/// <ordered_column_list> := (<column_name> <sort_clause>, ...)
180///
181/// <key_value_list> := (<literal> <literal, <literal> <literal>, ...)
182/// ```
183#[derive(Debug, Clone, PartialEq, Eq)]
184pub struct CreateExternalTable {
185    /// Table name
186    pub name: ObjectName,
187    /// Optional schema
188    pub columns: Vec<ColumnDef>,
189    /// File type (Parquet, NDJSON, CSV, etc)
190    pub file_type: String,
191    /// Path to file
192    pub location: String,
193    /// Partition Columns
194    pub table_partition_cols: Vec<String>,
195    /// Ordered expressions
196    pub order_exprs: Vec<LexOrdering>,
197    /// Option to not error if table already exists
198    pub if_not_exists: bool,
199    /// Whether the table is a temporary table
200    pub temporary: bool,
201    /// Infinite streams?
202    pub unbounded: bool,
203    /// Table(provider) specific options
204    pub options: Vec<(String, Value)>,
205    /// A table-level constraint
206    pub constraints: Vec<TableConstraint>,
207}
208
209impl fmt::Display for CreateExternalTable {
210    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
211        write!(f, "CREATE EXTERNAL TABLE ")?;
212        if self.if_not_exists {
213            write!(f, "IF NOT EXISTS ")?;
214        }
215        write!(f, "{} ", self.name)?;
216        write!(f, "STORED AS {} ", self.file_type)?;
217        write!(f, "LOCATION {} ", self.location)
218    }
219}
220
221/// DataFusion SQL Statement.
222///
223/// This can either be a [`Statement`] from [`sqlparser`] from a
224/// standard SQL dialect, or a DataFusion extension such as `CREATE
225/// EXTERNAL TABLE`. See [`DFParser`] for more information.
226///
227/// [`Statement`]: sqlparser::ast::Statement
228#[derive(Debug, Clone, PartialEq, Eq)]
229pub enum Statement {
230    /// ANSI SQL AST node (from sqlparser-rs)
231    Statement(Box<SQLStatement>),
232    /// Extension: `CREATE EXTERNAL TABLE`
233    CreateExternalTable(CreateExternalTable),
234    /// Extension: `COPY TO`
235    CopyTo(CopyToStatement),
236    /// EXPLAIN for extensions
237    Explain(ExplainStatement),
238}
239
240impl fmt::Display for Statement {
241    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
242        match self {
243            Statement::Statement(stmt) => write!(f, "{stmt}"),
244            Statement::CreateExternalTable(stmt) => write!(f, "{stmt}"),
245            Statement::CopyTo(stmt) => write!(f, "{stmt}"),
246            Statement::Explain(stmt) => write!(f, "{stmt}"),
247        }
248    }
249}
250
251fn ensure_not_set<T>(field: &Option<T>, name: &str) -> Result<(), ParserError> {
252    if field.is_some() {
253        return Err(ParserError::ParserError(format!(
254            "{name} specified more than once",
255        )));
256    }
257    Ok(())
258}
259
260/// DataFusion SQL Parser based on [`sqlparser`]
261///
262/// Parses DataFusion's SQL dialect, often delegating to [`sqlparser`]'s [`Parser`].
263///
264/// DataFusion mostly follows existing SQL dialects via
265/// `sqlparser`. However, certain statements such as `COPY` and
266/// `CREATE EXTERNAL TABLE` have special syntax in DataFusion. See
267/// [`Statement`] for a list of this special syntax
268pub struct DFParser<'a> {
269    pub parser: Parser<'a>,
270}
271
272/// Same as `sqlparser`
273const DEFAULT_RECURSION_LIMIT: usize = 50;
274const DEFAULT_DIALECT: GenericDialect = GenericDialect {};
275
276/// Builder for [`DFParser`]
277///
278/// # Example: Create and Parse SQL statements
279/// ```
280/// # use datafusion_sql::parser::DFParserBuilder;
281/// # use datafusion_common::Result;
282/// # fn test() -> Result<()> {
283/// let mut parser = DFParserBuilder::new("SELECT * FROM foo; SELECT 1 + 2")
284///   .build()?;
285/// // parse the SQL into DFStatements
286/// let statements = parser.parse_statements()?;
287/// assert_eq!(statements.len(), 2);
288/// # Ok(())
289/// # }
290/// ```
291///
292/// # Example: Create and Parse expression with a different dialect
293/// ```
294/// # use datafusion_sql::parser::DFParserBuilder;
295/// # use datafusion_common::Result;
296/// # use datafusion_sql::sqlparser::dialect::MySqlDialect;
297/// # use datafusion_sql::sqlparser::ast::Expr;
298/// # fn test() -> Result<()> {
299/// let dialect = MySqlDialect{}; // Parse using MySQL dialect
300/// let mut parser = DFParserBuilder::new("1 + 2")
301///   .with_dialect(&dialect)
302///   .build()?;
303/// // parse 1+2 into an sqlparser::ast::Expr
304/// let res = parser.parse_expr()?;
305/// assert!(matches!(res.expr, Expr::BinaryOp {..}));
306/// # Ok(())
307/// # }
308/// ```
309pub struct DFParserBuilder<'a> {
310    /// The SQL string to parse
311    sql: &'a str,
312    /// The Dialect to use (defaults to [`GenericDialect`]
313    dialect: &'a dyn Dialect,
314    /// The recursion limit while parsing
315    recursion_limit: usize,
316}
317
318impl<'a> DFParserBuilder<'a> {
319    /// Create a new parser builder for the specified tokens using the
320    /// [`GenericDialect`].
321    pub fn new(sql: &'a str) -> Self {
322        Self {
323            sql,
324            dialect: &DEFAULT_DIALECT,
325            recursion_limit: DEFAULT_RECURSION_LIMIT,
326        }
327    }
328
329    /// Adjust the parser builder's dialect. Defaults to [`GenericDialect`]
330    pub fn with_dialect(mut self, dialect: &'a dyn Dialect) -> Self {
331        self.dialect = dialect;
332        self
333    }
334
335    /// Adjust the recursion limit of sql parsing.  Defaults to 50
336    pub fn with_recursion_limit(mut self, recursion_limit: usize) -> Self {
337        self.recursion_limit = recursion_limit;
338        self
339    }
340
341    pub fn build(self) -> Result<DFParser<'a>, ParserError> {
342        let mut tokenizer = Tokenizer::new(self.dialect, self.sql);
343        let tokens = tokenizer.tokenize_with_location()?;
344
345        Ok(DFParser {
346            parser: Parser::new(self.dialect)
347                .with_tokens_with_locations(tokens)
348                .with_recursion_limit(self.recursion_limit),
349        })
350    }
351}
352
353impl<'a> DFParser<'a> {
354    #[deprecated(since = "46.0.0", note = "DFParserBuilder")]
355    pub fn new(sql: &'a str) -> Result<Self, ParserError> {
356        DFParserBuilder::new(sql).build()
357    }
358
359    #[deprecated(since = "46.0.0", note = "DFParserBuilder")]
360    pub fn new_with_dialect(
361        sql: &'a str,
362        dialect: &'a dyn Dialect,
363    ) -> Result<Self, ParserError> {
364        DFParserBuilder::new(sql).with_dialect(dialect).build()
365    }
366
367    /// Parse a sql string into one or [`Statement`]s using the
368    /// [`GenericDialect`].
369    pub fn parse_sql(sql: &'a str) -> Result<VecDeque<Statement>, ParserError> {
370        let mut parser = DFParserBuilder::new(sql).build()?;
371
372        parser.parse_statements()
373    }
374
375    /// Parse a SQL string and produce one or more [`Statement`]s with
376    /// with the specified dialect.
377    pub fn parse_sql_with_dialect(
378        sql: &str,
379        dialect: &dyn Dialect,
380    ) -> Result<VecDeque<Statement>, ParserError> {
381        let mut parser = DFParserBuilder::new(sql).with_dialect(dialect).build()?;
382        parser.parse_statements()
383    }
384
385    pub fn parse_sql_into_expr_with_dialect(
386        sql: &str,
387        dialect: &dyn Dialect,
388    ) -> Result<ExprWithAlias, ParserError> {
389        let mut parser = DFParserBuilder::new(sql).with_dialect(dialect).build()?;
390
391        parser.parse_expr()
392    }
393
394    /// Parse a sql string into one or [`Statement`]s
395    pub fn parse_statements(&mut self) -> Result<VecDeque<Statement>, ParserError> {
396        let mut stmts = VecDeque::new();
397        let mut expecting_statement_delimiter = false;
398        loop {
399            // ignore empty statements (between successive statement delimiters)
400            while self.parser.consume_token(&Token::SemiColon) {
401                expecting_statement_delimiter = false;
402            }
403
404            if self.parser.peek_token() == Token::EOF {
405                break;
406            }
407            if expecting_statement_delimiter {
408                return self.expected("end of statement", self.parser.peek_token());
409            }
410
411            let statement = self.parse_statement()?;
412            stmts.push_back(statement);
413            expecting_statement_delimiter = true;
414        }
415        Ok(stmts)
416    }
417
418    /// Report an unexpected token
419    fn expected<T>(
420        &self,
421        expected: &str,
422        found: TokenWithSpan,
423    ) -> Result<T, ParserError> {
424        parser_err!(format!("Expected {expected}, found: {found}"))
425    }
426
427    /// Parse a new expression
428    pub fn parse_statement(&mut self) -> Result<Statement, ParserError> {
429        match self.parser.peek_token().token {
430            Token::Word(w) => {
431                match w.keyword {
432                    Keyword::CREATE => {
433                        self.parser.next_token(); // CREATE
434                        self.parse_create()
435                    }
436                    Keyword::COPY => {
437                        if let Token::Word(w) = self.parser.peek_nth_token(1).token {
438                            // use native parser for COPY INTO
439                            if w.keyword == Keyword::INTO {
440                                return Ok(Statement::Statement(Box::from(
441                                    self.parser.parse_statement()?,
442                                )));
443                            }
444                        }
445                        self.parser.next_token(); // COPY
446                        self.parse_copy()
447                    }
448                    Keyword::EXPLAIN => {
449                        // (TODO parse all supported statements)
450                        self.parser.next_token(); // EXPLAIN
451                        self.parse_explain()
452                    }
453                    _ => {
454                        // use sqlparser-rs parser
455                        Ok(Statement::Statement(Box::from(
456                            self.parser.parse_statement()?,
457                        )))
458                    }
459                }
460            }
461            _ => {
462                // use the native parser
463                Ok(Statement::Statement(Box::from(
464                    self.parser.parse_statement()?,
465                )))
466            }
467        }
468    }
469
470    pub fn parse_expr(&mut self) -> Result<ExprWithAlias, ParserError> {
471        if let Token::Word(w) = self.parser.peek_token().token {
472            match w.keyword {
473                Keyword::CREATE | Keyword::COPY | Keyword::EXPLAIN => {
474                    return parser_err!("Unsupported command in expression");
475                }
476                _ => {}
477            }
478        }
479
480        self.parser.parse_expr_with_alias()
481    }
482
483    /// Parse a SQL `COPY TO` statement
484    pub fn parse_copy(&mut self) -> Result<Statement, ParserError> {
485        // parse as a query
486        let source = if self.parser.consume_token(&Token::LParen) {
487            let query = self.parser.parse_query()?;
488            self.parser.expect_token(&Token::RParen)?;
489            CopyToSource::Query(query)
490        } else {
491            // parse as table reference
492            let table_name = self.parser.parse_object_name(true)?;
493            CopyToSource::Relation(table_name)
494        };
495
496        #[derive(Default)]
497        struct Builder {
498            stored_as: Option<String>,
499            target: Option<String>,
500            partitioned_by: Option<Vec<String>>,
501            options: Option<Vec<(String, Value)>>,
502        }
503
504        let mut builder = Builder::default();
505
506        loop {
507            if let Some(keyword) = self.parser.parse_one_of_keywords(&[
508                Keyword::STORED,
509                Keyword::TO,
510                Keyword::PARTITIONED,
511                Keyword::OPTIONS,
512                Keyword::WITH,
513            ]) {
514                match keyword {
515                    Keyword::STORED => {
516                        self.parser.expect_keyword(Keyword::AS)?;
517                        ensure_not_set(&builder.stored_as, "STORED AS")?;
518                        builder.stored_as = Some(self.parse_file_format()?);
519                    }
520                    Keyword::TO => {
521                        ensure_not_set(&builder.target, "TO")?;
522                        builder.target = Some(self.parser.parse_literal_string()?);
523                    }
524                    Keyword::WITH => {
525                        self.parser.expect_keyword(Keyword::HEADER)?;
526                        self.parser.expect_keyword(Keyword::ROW)?;
527                        return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')");
528                    }
529                    Keyword::PARTITIONED => {
530                        self.parser.expect_keyword(Keyword::BY)?;
531                        ensure_not_set(&builder.partitioned_by, "PARTITIONED BY")?;
532                        builder.partitioned_by = Some(self.parse_partitions()?);
533                    }
534                    Keyword::OPTIONS => {
535                        ensure_not_set(&builder.options, "OPTIONS")?;
536                        builder.options = Some(self.parse_value_options()?);
537                    }
538                    _ => {
539                        unreachable!()
540                    }
541                }
542            } else {
543                let token = self.parser.next_token();
544                if token == Token::EOF || token == Token::SemiColon {
545                    break;
546                } else {
547                    return Err(ParserError::ParserError(format!(
548                        "Unexpected token {token}"
549                    )));
550                }
551            }
552        }
553
554        let Some(target) = builder.target else {
555            return Err(ParserError::ParserError(
556                "Missing TO clause in COPY statement".into(),
557            ));
558        };
559
560        Ok(Statement::CopyTo(CopyToStatement {
561            source,
562            target,
563            partitioned_by: builder.partitioned_by.unwrap_or(vec![]),
564            stored_as: builder.stored_as,
565            options: builder.options.unwrap_or(vec![]),
566        }))
567    }
568
569    /// Parse the next token as a key name for an option list
570    ///
571    /// Note this is different than [`parse_literal_string`]
572    /// because it allows keywords as well as other non words
573    ///
574    /// [`parse_literal_string`]: sqlparser::parser::Parser::parse_literal_string
575    pub fn parse_option_key(&mut self) -> Result<String, ParserError> {
576        let next_token = self.parser.next_token();
577        match next_token.token {
578            Token::Word(Word { value, .. }) => {
579                let mut parts = vec![value];
580                while self.parser.consume_token(&Token::Period) {
581                    let next_token = self.parser.next_token();
582                    if let Token::Word(Word { value, .. }) = next_token.token {
583                        parts.push(value);
584                    } else {
585                        // Unquoted namespaced keys have to conform to the syntax
586                        // "<WORD>[\.<WORD>]*". If we have a key that breaks this
587                        // pattern, error out:
588                        return self.parser.expected("key name", next_token);
589                    }
590                }
591                Ok(parts.join("."))
592            }
593            Token::SingleQuotedString(s) => Ok(s),
594            Token::DoubleQuotedString(s) => Ok(s),
595            Token::EscapedStringLiteral(s) => Ok(s),
596            _ => self.parser.expected("key name", next_token),
597        }
598    }
599
600    /// Parse the next token as a value for an option list
601    ///
602    /// Note this is different than [`parse_value`] as it allows any
603    /// word or keyword in this location.
604    ///
605    /// [`parse_value`]: sqlparser::parser::Parser::parse_value
606    pub fn parse_option_value(&mut self) -> Result<Value, ParserError> {
607        let next_token = self.parser.next_token();
608        match next_token.token {
609            // e.g. things like "snappy" or "gzip" that may be keywords
610            Token::Word(word) => Ok(Value::SingleQuotedString(word.value)),
611            Token::SingleQuotedString(s) => Ok(Value::SingleQuotedString(s)),
612            Token::DoubleQuotedString(s) => Ok(Value::DoubleQuotedString(s)),
613            Token::EscapedStringLiteral(s) => Ok(Value::EscapedStringLiteral(s)),
614            Token::Number(n, l) => Ok(Value::Number(n, l)),
615            _ => self.parser.expected("string or numeric value", next_token),
616        }
617    }
618
619    /// Parse a SQL `EXPLAIN`
620    pub fn parse_explain(&mut self) -> Result<Statement, ParserError> {
621        let analyze = self.parser.parse_keyword(Keyword::ANALYZE);
622        let verbose = self.parser.parse_keyword(Keyword::VERBOSE);
623        let statement = self.parse_statement()?;
624
625        Ok(Statement::Explain(ExplainStatement {
626            statement: Box::new(statement),
627            analyze,
628            verbose,
629        }))
630    }
631
632    /// Parse a SQL `CREATE` statement handling `CREATE EXTERNAL TABLE`
633    pub fn parse_create(&mut self) -> Result<Statement, ParserError> {
634        if self.parser.parse_keyword(Keyword::EXTERNAL) {
635            self.parse_create_external_table(false)
636        } else if self.parser.parse_keyword(Keyword::UNBOUNDED) {
637            self.parser.expect_keyword(Keyword::EXTERNAL)?;
638            self.parse_create_external_table(true)
639        } else {
640            Ok(Statement::Statement(Box::from(self.parser.parse_create()?)))
641        }
642    }
643
644    fn parse_partitions(&mut self) -> Result<Vec<String>, ParserError> {
645        let mut partitions: Vec<String> = vec![];
646        if !self.parser.consume_token(&Token::LParen)
647            || self.parser.consume_token(&Token::RParen)
648        {
649            return Ok(partitions);
650        }
651
652        loop {
653            if let Token::Word(_) = self.parser.peek_token().token {
654                let identifier = self.parser.parse_identifier()?;
655                partitions.push(identifier.to_string());
656            } else {
657                return self.expected("partition name", self.parser.peek_token());
658            }
659            let comma = self.parser.consume_token(&Token::Comma);
660            if self.parser.consume_token(&Token::RParen) {
661                // allow a trailing comma, even though it's not in standard
662                break;
663            } else if !comma {
664                return self.expected(
665                    "',' or ')' after partition definition",
666                    self.parser.peek_token(),
667                );
668            }
669        }
670        Ok(partitions)
671    }
672
673    /// Parse the ordering clause of a `CREATE EXTERNAL TABLE` SQL statement
674    pub fn parse_order_by_exprs(&mut self) -> Result<Vec<OrderByExpr>, ParserError> {
675        let mut values = vec![];
676        self.parser.expect_token(&Token::LParen)?;
677        loop {
678            values.push(self.parse_order_by_expr()?);
679            if !self.parser.consume_token(&Token::Comma) {
680                self.parser.expect_token(&Token::RParen)?;
681                return Ok(values);
682            }
683        }
684    }
685
686    /// Parse an ORDER BY sub-expression optionally followed by ASC or DESC.
687    pub fn parse_order_by_expr(&mut self) -> Result<OrderByExpr, ParserError> {
688        let expr = self.parser.parse_expr()?;
689
690        let asc = if self.parser.parse_keyword(Keyword::ASC) {
691            Some(true)
692        } else if self.parser.parse_keyword(Keyword::DESC) {
693            Some(false)
694        } else {
695            None
696        };
697
698        let nulls_first = if self
699            .parser
700            .parse_keywords(&[Keyword::NULLS, Keyword::FIRST])
701        {
702            Some(true)
703        } else if self.parser.parse_keywords(&[Keyword::NULLS, Keyword::LAST]) {
704            Some(false)
705        } else {
706            None
707        };
708
709        Ok(OrderByExpr {
710            expr,
711            asc,
712            nulls_first,
713            with_fill: None,
714        })
715    }
716
717    // This is a copy of the equivalent implementation in sqlparser.
718    fn parse_columns(
719        &mut self,
720    ) -> Result<(Vec<ColumnDef>, Vec<TableConstraint>), ParserError> {
721        let mut columns = vec![];
722        let mut constraints = vec![];
723        if !self.parser.consume_token(&Token::LParen)
724            || self.parser.consume_token(&Token::RParen)
725        {
726            return Ok((columns, constraints));
727        }
728
729        loop {
730            if let Some(constraint) = self.parser.parse_optional_table_constraint()? {
731                constraints.push(constraint);
732            } else if let Token::Word(_) = self.parser.peek_token().token {
733                let column_def = self.parse_column_def()?;
734                columns.push(column_def);
735            } else {
736                return self.expected(
737                    "column name or constraint definition",
738                    self.parser.peek_token(),
739                );
740            }
741            let comma = self.parser.consume_token(&Token::Comma);
742            if self.parser.consume_token(&Token::RParen) {
743                // allow a trailing comma, even though it's not in standard
744                break;
745            } else if !comma {
746                return self.expected(
747                    "',' or ')' after column definition",
748                    self.parser.peek_token(),
749                );
750            }
751        }
752
753        Ok((columns, constraints))
754    }
755
756    fn parse_column_def(&mut self) -> Result<ColumnDef, ParserError> {
757        let name = self.parser.parse_identifier()?;
758        let data_type = self.parser.parse_data_type()?;
759        let collation = if self.parser.parse_keyword(Keyword::COLLATE) {
760            Some(self.parser.parse_object_name(false)?)
761        } else {
762            None
763        };
764        let mut options = vec![];
765        loop {
766            if self.parser.parse_keyword(Keyword::CONSTRAINT) {
767                let name = Some(self.parser.parse_identifier()?);
768                if let Some(option) = self.parser.parse_optional_column_option()? {
769                    options.push(ColumnOptionDef { name, option });
770                } else {
771                    return self.expected(
772                        "constraint details after CONSTRAINT <name>",
773                        self.parser.peek_token(),
774                    );
775                }
776            } else if let Some(option) = self.parser.parse_optional_column_option()? {
777                options.push(ColumnOptionDef { name: None, option });
778            } else {
779                break;
780            };
781        }
782        Ok(ColumnDef {
783            name,
784            data_type,
785            collation,
786            options,
787        })
788    }
789
790    fn parse_create_external_table(
791        &mut self,
792        unbounded: bool,
793    ) -> Result<Statement, ParserError> {
794        let temporary = self
795            .parser
796            .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY])
797            .is_some();
798        self.parser.expect_keyword(Keyword::TABLE)?;
799        let if_not_exists =
800            self.parser
801                .parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]);
802        let table_name = self.parser.parse_object_name(true)?;
803        let (mut columns, constraints) = self.parse_columns()?;
804
805        #[derive(Default)]
806        struct Builder {
807            file_type: Option<String>,
808            location: Option<String>,
809            table_partition_cols: Option<Vec<String>>,
810            order_exprs: Vec<LexOrdering>,
811            options: Option<Vec<(String, Value)>>,
812        }
813        let mut builder = Builder::default();
814
815        loop {
816            if let Some(keyword) = self.parser.parse_one_of_keywords(&[
817                Keyword::STORED,
818                Keyword::LOCATION,
819                Keyword::WITH,
820                Keyword::DELIMITER,
821                Keyword::COMPRESSION,
822                Keyword::PARTITIONED,
823                Keyword::OPTIONS,
824            ]) {
825                match keyword {
826                    Keyword::STORED => {
827                        self.parser.expect_keyword(Keyword::AS)?;
828                        ensure_not_set(&builder.file_type, "STORED AS")?;
829                        builder.file_type = Some(self.parse_file_format()?);
830                    }
831                    Keyword::LOCATION => {
832                        ensure_not_set(&builder.location, "LOCATION")?;
833                        builder.location = Some(self.parser.parse_literal_string()?);
834                    }
835                    Keyword::WITH => {
836                        if self.parser.parse_keyword(Keyword::ORDER) {
837                            builder.order_exprs.push(self.parse_order_by_exprs()?);
838                        } else {
839                            self.parser.expect_keyword(Keyword::HEADER)?;
840                            self.parser.expect_keyword(Keyword::ROW)?;
841                            return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS (format.has_header true)");
842                        }
843                    }
844                    Keyword::DELIMITER => {
845                        return parser_err!("DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS (format.delimiter ',')");
846                    }
847                    Keyword::COMPRESSION => {
848                        self.parser.expect_keyword(Keyword::TYPE)?;
849                        return parser_err!("COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS (format.compression gzip)");
850                    }
851                    Keyword::PARTITIONED => {
852                        self.parser.expect_keyword(Keyword::BY)?;
853                        ensure_not_set(&builder.table_partition_cols, "PARTITIONED BY")?;
854                        // Expects either list of column names (col_name [, col_name]*)
855                        // or list of column definitions (col_name datatype [, col_name datatype]* )
856                        // use the token after the name to decide which parsing rule to use
857                        // Note that mixing both names and definitions is not allowed
858                        let peeked = self.parser.peek_nth_token(2);
859                        if peeked == Token::Comma || peeked == Token::RParen {
860                            // List of column names
861                            builder.table_partition_cols = Some(self.parse_partitions()?)
862                        } else {
863                            // List of column defs
864                            let (cols, cons) = self.parse_columns()?;
865                            builder.table_partition_cols = Some(
866                                cols.iter().map(|col| col.name.to_string()).collect(),
867                            );
868
869                            columns.extend(cols);
870
871                            if !cons.is_empty() {
872                                return Err(ParserError::ParserError(
873                                    "Constraints on Partition Columns are not supported"
874                                        .to_string(),
875                                ));
876                            }
877                        }
878                    }
879                    Keyword::OPTIONS => {
880                        ensure_not_set(&builder.options, "OPTIONS")?;
881                        builder.options = Some(self.parse_value_options()?);
882                    }
883                    _ => {
884                        unreachable!()
885                    }
886                }
887            } else {
888                let token = self.parser.next_token();
889                if token == Token::EOF || token == Token::SemiColon {
890                    break;
891                } else {
892                    return Err(ParserError::ParserError(format!(
893                        "Unexpected token {token}"
894                    )));
895                }
896            }
897        }
898
899        // Validations: location and file_type are required
900        if builder.file_type.is_none() {
901            return Err(ParserError::ParserError(
902                "Missing STORED AS clause in CREATE EXTERNAL TABLE statement".into(),
903            ));
904        }
905        if builder.location.is_none() {
906            return Err(ParserError::ParserError(
907                "Missing LOCATION clause in CREATE EXTERNAL TABLE statement".into(),
908            ));
909        }
910
911        let create = CreateExternalTable {
912            name: table_name,
913            columns,
914            file_type: builder.file_type.unwrap(),
915            location: builder.location.unwrap(),
916            table_partition_cols: builder.table_partition_cols.unwrap_or(vec![]),
917            order_exprs: builder.order_exprs,
918            if_not_exists,
919            temporary,
920            unbounded,
921            options: builder.options.unwrap_or(Vec::new()),
922            constraints,
923        };
924        Ok(Statement::CreateExternalTable(create))
925    }
926
927    /// Parses the set of valid formats
928    fn parse_file_format(&mut self) -> Result<String, ParserError> {
929        let token = self.parser.next_token();
930        match &token.token {
931            Token::Word(w) => parse_file_type(&w.value),
932            _ => self.expected("one of ARROW, PARQUET, NDJSON, or CSV", token),
933        }
934    }
935
936    /// Parses (key value) style options into a map of String --> [`Value`].
937    ///
938    /// This method supports keywords as key names as well as multiple
939    /// value types such as Numbers as well as Strings.
940    fn parse_value_options(&mut self) -> Result<Vec<(String, Value)>, ParserError> {
941        let mut options = vec![];
942        self.parser.expect_token(&Token::LParen)?;
943
944        loop {
945            let key = self.parse_option_key()?;
946            let value = self.parse_option_value()?;
947            options.push((key, value));
948            let comma = self.parser.consume_token(&Token::Comma);
949            if self.parser.consume_token(&Token::RParen) {
950                // Allow a trailing comma, even though it's not in standard
951                break;
952            } else if !comma {
953                return self.expected(
954                    "',' or ')' after option definition",
955                    self.parser.peek_token(),
956                );
957            }
958        }
959        Ok(options)
960    }
961}
962
963#[cfg(test)]
964mod tests {
965    use super::*;
966    use datafusion_common::assert_contains;
967    use sqlparser::ast::Expr::Identifier;
968    use sqlparser::ast::{BinaryOperator, DataType, Expr, Ident};
969    use sqlparser::dialect::SnowflakeDialect;
970    use sqlparser::tokenizer::Span;
971
972    fn expect_parse_ok(sql: &str, expected: Statement) -> Result<(), ParserError> {
973        let statements = DFParser::parse_sql(sql)?;
974        assert_eq!(
975            statements.len(),
976            1,
977            "Expected to parse exactly one statement"
978        );
979        assert_eq!(statements[0], expected, "actual:\n{:#?}", statements[0]);
980        Ok(())
981    }
982
983    /// Parses sql and asserts that the expected error message was found
984    fn expect_parse_error(sql: &str, expected_error: &str) {
985        match DFParser::parse_sql(sql) {
986            Ok(statements) => {
987                panic!(
988                    "Expected parse error for '{sql}', but was successful: {statements:?}"
989                );
990            }
991            Err(e) => {
992                let error_message = e.to_string();
993                assert!(
994                    error_message.contains(expected_error),
995                    "Expected error '{expected_error}' not found in actual error '{error_message}'"
996                );
997            }
998        }
999    }
1000
1001    fn make_column_def(name: impl Into<String>, data_type: DataType) -> ColumnDef {
1002        ColumnDef {
1003            name: Ident {
1004                value: name.into(),
1005                quote_style: None,
1006                span: Span::empty(),
1007            },
1008            data_type,
1009            collation: None,
1010            options: vec![],
1011        }
1012    }
1013
1014    #[test]
1015    fn create_external_table() -> Result<(), ParserError> {
1016        // positive case
1017        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'";
1018        let display = None;
1019        let name = ObjectName(vec![Ident::from("t")]);
1020        let expected = Statement::CreateExternalTable(CreateExternalTable {
1021            name: name.clone(),
1022            columns: vec![make_column_def("c1", DataType::Int(display))],
1023            file_type: "CSV".to_string(),
1024            location: "foo.csv".into(),
1025            table_partition_cols: vec![],
1026            order_exprs: vec![],
1027            if_not_exists: false,
1028            temporary: false,
1029            unbounded: false,
1030            options: vec![],
1031            constraints: vec![],
1032        });
1033        expect_parse_ok(sql, expected)?;
1034
1035        // positive case: leading space
1036        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'     ";
1037        let expected = Statement::CreateExternalTable(CreateExternalTable {
1038            name: name.clone(),
1039            columns: vec![make_column_def("c1", DataType::Int(None))],
1040            file_type: "CSV".to_string(),
1041            location: "foo.csv".into(),
1042            table_partition_cols: vec![],
1043            order_exprs: vec![],
1044            if_not_exists: false,
1045            temporary: false,
1046            unbounded: false,
1047            options: vec![],
1048            constraints: vec![],
1049        });
1050        expect_parse_ok(sql, expected)?;
1051
1052        // positive case: leading space + semicolon
1053        let sql =
1054            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'      ;";
1055        let expected = Statement::CreateExternalTable(CreateExternalTable {
1056            name: name.clone(),
1057            columns: vec![make_column_def("c1", DataType::Int(None))],
1058            file_type: "CSV".to_string(),
1059            location: "foo.csv".into(),
1060            table_partition_cols: vec![],
1061            order_exprs: vec![],
1062            if_not_exists: false,
1063            temporary: false,
1064            unbounded: false,
1065            options: vec![],
1066            constraints: vec![],
1067        });
1068        expect_parse_ok(sql, expected)?;
1069
1070        // positive case with delimiter
1071        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS (format.delimiter '|')";
1072        let display = None;
1073        let expected = Statement::CreateExternalTable(CreateExternalTable {
1074            name: name.clone(),
1075            columns: vec![make_column_def("c1", DataType::Int(display))],
1076            file_type: "CSV".to_string(),
1077            location: "foo.csv".into(),
1078            table_partition_cols: vec![],
1079            order_exprs: vec![],
1080            if_not_exists: false,
1081            temporary: false,
1082            unbounded: false,
1083            options: vec![(
1084                "format.delimiter".into(),
1085                Value::SingleQuotedString("|".into()),
1086            )],
1087            constraints: vec![],
1088        });
1089        expect_parse_ok(sql, expected)?;
1090
1091        // positive case: partitioned by
1092        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1, p2) LOCATION 'foo.csv'";
1093        let display = None;
1094        let expected = Statement::CreateExternalTable(CreateExternalTable {
1095            name: name.clone(),
1096            columns: vec![make_column_def("c1", DataType::Int(display))],
1097            file_type: "CSV".to_string(),
1098            location: "foo.csv".into(),
1099            table_partition_cols: vec!["p1".to_string(), "p2".to_string()],
1100            order_exprs: vec![],
1101            if_not_exists: false,
1102            temporary: false,
1103            unbounded: false,
1104            options: vec![],
1105            constraints: vec![],
1106        });
1107        expect_parse_ok(sql, expected)?;
1108
1109        // positive case: it is ok for sql stmt with `COMPRESSION TYPE GZIP` tokens
1110        let sqls =
1111            vec![
1112             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1113             ('format.compression' 'GZIP')", "GZIP"),
1114             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1115             ('format.compression' 'BZIP2')", "BZIP2"),
1116             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1117             ('format.compression' 'XZ')", "XZ"),
1118             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS
1119             ('format.compression' 'ZSTD')", "ZSTD"),
1120         ];
1121        for (sql, compression) in sqls {
1122            let expected = Statement::CreateExternalTable(CreateExternalTable {
1123                name: name.clone(),
1124                columns: vec![make_column_def("c1", DataType::Int(display))],
1125                file_type: "CSV".to_string(),
1126                location: "foo.csv".into(),
1127                table_partition_cols: vec![],
1128                order_exprs: vec![],
1129                if_not_exists: false,
1130                temporary: false,
1131                unbounded: false,
1132                options: vec![(
1133                    "format.compression".into(),
1134                    Value::SingleQuotedString(compression.into()),
1135                )],
1136                constraints: vec![],
1137            });
1138            expect_parse_ok(sql, expected)?;
1139        }
1140
1141        // positive case: it is ok for parquet files not to have columns specified
1142        let sql = "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'";
1143        let expected = Statement::CreateExternalTable(CreateExternalTable {
1144            name: name.clone(),
1145            columns: vec![],
1146            file_type: "PARQUET".to_string(),
1147            location: "foo.parquet".into(),
1148            table_partition_cols: vec![],
1149            order_exprs: vec![],
1150            if_not_exists: false,
1151            temporary: false,
1152            unbounded: false,
1153            options: vec![],
1154            constraints: vec![],
1155        });
1156        expect_parse_ok(sql, expected)?;
1157
1158        // positive case: it is ok for parquet files to be other than upper case
1159        let sql = "CREATE EXTERNAL TABLE t STORED AS parqueT LOCATION 'foo.parquet'";
1160        let expected = Statement::CreateExternalTable(CreateExternalTable {
1161            name: name.clone(),
1162            columns: vec![],
1163            file_type: "PARQUET".to_string(),
1164            location: "foo.parquet".into(),
1165            table_partition_cols: vec![],
1166            order_exprs: vec![],
1167            if_not_exists: false,
1168            temporary: false,
1169            unbounded: false,
1170            options: vec![],
1171            constraints: vec![],
1172        });
1173        expect_parse_ok(sql, expected)?;
1174
1175        // positive case: it is ok for avro files not to have columns specified
1176        let sql = "CREATE EXTERNAL TABLE t STORED AS AVRO LOCATION 'foo.avro'";
1177        let expected = Statement::CreateExternalTable(CreateExternalTable {
1178            name: name.clone(),
1179            columns: vec![],
1180            file_type: "AVRO".to_string(),
1181            location: "foo.avro".into(),
1182            table_partition_cols: vec![],
1183            order_exprs: vec![],
1184            if_not_exists: false,
1185            temporary: false,
1186            unbounded: false,
1187            options: vec![],
1188            constraints: vec![],
1189        });
1190        expect_parse_ok(sql, expected)?;
1191
1192        // positive case: it is ok for avro files not to have columns specified
1193        let sql =
1194            "CREATE EXTERNAL TABLE IF NOT EXISTS t STORED AS PARQUET LOCATION 'foo.parquet'";
1195        let expected = Statement::CreateExternalTable(CreateExternalTable {
1196            name: name.clone(),
1197            columns: vec![],
1198            file_type: "PARQUET".to_string(),
1199            location: "foo.parquet".into(),
1200            table_partition_cols: vec![],
1201            order_exprs: vec![],
1202            if_not_exists: true,
1203            temporary: false,
1204            unbounded: false,
1205            options: vec![],
1206            constraints: vec![],
1207        });
1208        expect_parse_ok(sql, expected)?;
1209
1210        // positive case: column definition allowed in 'partition by' clause
1211        let sql =
1212            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int) LOCATION 'foo.csv'";
1213        let expected = Statement::CreateExternalTable(CreateExternalTable {
1214            name: name.clone(),
1215            columns: vec![
1216                make_column_def("c1", DataType::Int(None)),
1217                make_column_def("p1", DataType::Int(None)),
1218            ],
1219            file_type: "CSV".to_string(),
1220            location: "foo.csv".into(),
1221            table_partition_cols: vec!["p1".to_string()],
1222            order_exprs: vec![],
1223            if_not_exists: false,
1224            temporary: false,
1225            unbounded: false,
1226            options: vec![],
1227            constraints: vec![],
1228        });
1229        expect_parse_ok(sql, expected)?;
1230
1231        // negative case: mixed column defs and column names in `PARTITIONED BY` clause
1232        let sql =
1233            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int, c1) LOCATION 'foo.csv'";
1234        expect_parse_error(
1235            sql,
1236            "sql parser error: Expected: a data type name, found: )",
1237        );
1238
1239        // negative case: mixed column defs and column names in `PARTITIONED BY` clause
1240        let sql =
1241            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (c1, p1 int) LOCATION 'foo.csv'";
1242        expect_parse_error(sql, "sql parser error: Expected ',' or ')' after partition definition, found: int");
1243
1244        // positive case: additional options (one entry) can be specified
1245        let sql =
1246            "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1') LOCATION 'blahblah'";
1247        let expected = Statement::CreateExternalTable(CreateExternalTable {
1248            name: name.clone(),
1249            columns: vec![],
1250            file_type: "X".to_string(),
1251            location: "blahblah".into(),
1252            table_partition_cols: vec![],
1253            order_exprs: vec![],
1254            if_not_exists: false,
1255            temporary: false,
1256            unbounded: false,
1257            options: vec![("k1".into(), Value::SingleQuotedString("v1".into()))],
1258            constraints: vec![],
1259        });
1260        expect_parse_ok(sql, expected)?;
1261
1262        // positive case: additional options (multiple entries) can be specified
1263        let sql =
1264            "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2) LOCATION 'blahblah'";
1265        let expected = Statement::CreateExternalTable(CreateExternalTable {
1266            name: name.clone(),
1267            columns: vec![],
1268            file_type: "X".to_string(),
1269            location: "blahblah".into(),
1270            table_partition_cols: vec![],
1271            order_exprs: vec![],
1272            if_not_exists: false,
1273            temporary: false,
1274            unbounded: false,
1275            options: vec![
1276                ("k1".into(), Value::SingleQuotedString("v1".into())),
1277                ("k2".into(), Value::SingleQuotedString("v2".into())),
1278            ],
1279            constraints: vec![],
1280        });
1281        expect_parse_ok(sql, expected)?;
1282
1283        // Ordered Col
1284        let sqls = ["CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1) LOCATION 'foo.csv'",
1285                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS FIRST) LOCATION 'foo.csv'",
1286                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS LAST) LOCATION 'foo.csv'",
1287                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC) LOCATION 'foo.csv'",
1288                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC) LOCATION 'foo.csv'",
1289                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS FIRST) LOCATION 'foo.csv'",
1290                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS LAST) LOCATION 'foo.csv'",
1291                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS FIRST) LOCATION 'foo.csv'",
1292                        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS LAST) LOCATION 'foo.csv'"];
1293        let expected = vec![
1294            (None, None),
1295            (None, Some(true)),
1296            (None, Some(false)),
1297            (Some(true), None),
1298            (Some(false), None),
1299            (Some(false), Some(true)),
1300            (Some(false), Some(false)),
1301            (Some(true), Some(true)),
1302            (Some(true), Some(false)),
1303        ];
1304        for (sql, (asc, nulls_first)) in sqls.iter().zip(expected.into_iter()) {
1305            let expected = Statement::CreateExternalTable(CreateExternalTable {
1306                name: name.clone(),
1307                columns: vec![make_column_def("c1", DataType::Int(None))],
1308                file_type: "CSV".to_string(),
1309                location: "foo.csv".into(),
1310                table_partition_cols: vec![],
1311                order_exprs: vec![vec![OrderByExpr {
1312                    expr: Identifier(Ident {
1313                        value: "c1".to_owned(),
1314                        quote_style: None,
1315                        span: Span::empty(),
1316                    }),
1317                    asc,
1318                    nulls_first,
1319                    with_fill: None,
1320                }]],
1321                if_not_exists: false,
1322                temporary: false,
1323                unbounded: false,
1324                options: vec![],
1325                constraints: vec![],
1326            });
1327            expect_parse_ok(sql, expected)?;
1328        }
1329
1330        // Ordered Col
1331        let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 ASC, c2 DESC NULLS FIRST) LOCATION 'foo.csv'";
1332        let display = None;
1333        let expected = Statement::CreateExternalTable(CreateExternalTable {
1334            name: name.clone(),
1335            columns: vec![
1336                make_column_def("c1", DataType::Int(display)),
1337                make_column_def("c2", DataType::Int(display)),
1338            ],
1339            file_type: "CSV".to_string(),
1340            location: "foo.csv".into(),
1341            table_partition_cols: vec![],
1342            order_exprs: vec![vec![
1343                OrderByExpr {
1344                    expr: Identifier(Ident {
1345                        value: "c1".to_owned(),
1346                        quote_style: None,
1347                        span: Span::empty(),
1348                    }),
1349                    asc: Some(true),
1350                    nulls_first: None,
1351                    with_fill: None,
1352                },
1353                OrderByExpr {
1354                    expr: Identifier(Ident {
1355                        value: "c2".to_owned(),
1356                        quote_style: None,
1357                        span: Span::empty(),
1358                    }),
1359                    asc: Some(false),
1360                    nulls_first: Some(true),
1361                    with_fill: None,
1362                },
1363            ]],
1364            if_not_exists: false,
1365            temporary: false,
1366            unbounded: false,
1367            options: vec![],
1368            constraints: vec![],
1369        });
1370        expect_parse_ok(sql, expected)?;
1371
1372        // Ordered Binary op
1373        let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 - c2 ASC) LOCATION 'foo.csv'";
1374        let display = None;
1375        let expected = Statement::CreateExternalTable(CreateExternalTable {
1376            name: name.clone(),
1377            columns: vec![
1378                make_column_def("c1", DataType::Int(display)),
1379                make_column_def("c2", DataType::Int(display)),
1380            ],
1381            file_type: "CSV".to_string(),
1382            location: "foo.csv".into(),
1383            table_partition_cols: vec![],
1384            order_exprs: vec![vec![OrderByExpr {
1385                expr: Expr::BinaryOp {
1386                    left: Box::new(Identifier(Ident {
1387                        value: "c1".to_owned(),
1388                        quote_style: None,
1389                        span: Span::empty(),
1390                    })),
1391                    op: BinaryOperator::Minus,
1392                    right: Box::new(Identifier(Ident {
1393                        value: "c2".to_owned(),
1394                        quote_style: None,
1395                        span: Span::empty(),
1396                    })),
1397                },
1398                asc: Some(true),
1399                nulls_first: None,
1400                with_fill: None,
1401            }]],
1402            if_not_exists: false,
1403            temporary: false,
1404            unbounded: false,
1405            options: vec![],
1406            constraints: vec![],
1407        });
1408        expect_parse_ok(sql, expected)?;
1409
1410        // Most complete CREATE EXTERNAL TABLE statement possible
1411        let sql = "
1412            CREATE UNBOUNDED EXTERNAL TABLE IF NOT EXISTS t (c1 int, c2 float)
1413            STORED AS PARQUET
1414            WITH ORDER (c1 - c2 ASC)
1415            PARTITIONED BY (c1)
1416            LOCATION 'foo.parquet'
1417            OPTIONS ('format.compression' 'zstd',
1418                     'format.delimiter' '*',
1419                     'ROW_GROUP_SIZE' '1024',
1420                     'TRUNCATE' 'NO',
1421                     'format.has_header' 'true')";
1422        let expected = Statement::CreateExternalTable(CreateExternalTable {
1423            name: name.clone(),
1424            columns: vec![
1425                make_column_def("c1", DataType::Int(None)),
1426                make_column_def("c2", DataType::Float(None)),
1427            ],
1428            file_type: "PARQUET".to_string(),
1429            location: "foo.parquet".into(),
1430            table_partition_cols: vec!["c1".into()],
1431            order_exprs: vec![vec![OrderByExpr {
1432                expr: Expr::BinaryOp {
1433                    left: Box::new(Identifier(Ident {
1434                        value: "c1".to_owned(),
1435                        quote_style: None,
1436                        span: Span::empty(),
1437                    })),
1438                    op: BinaryOperator::Minus,
1439                    right: Box::new(Identifier(Ident {
1440                        value: "c2".to_owned(),
1441                        quote_style: None,
1442                        span: Span::empty(),
1443                    })),
1444                },
1445                asc: Some(true),
1446                nulls_first: None,
1447                with_fill: None,
1448            }]],
1449            if_not_exists: true,
1450            temporary: false,
1451            unbounded: true,
1452            options: vec![
1453                (
1454                    "format.compression".into(),
1455                    Value::SingleQuotedString("zstd".into()),
1456                ),
1457                (
1458                    "format.delimiter".into(),
1459                    Value::SingleQuotedString("*".into()),
1460                ),
1461                (
1462                    "ROW_GROUP_SIZE".into(),
1463                    Value::SingleQuotedString("1024".into()),
1464                ),
1465                ("TRUNCATE".into(), Value::SingleQuotedString("NO".into())),
1466                (
1467                    "format.has_header".into(),
1468                    Value::SingleQuotedString("true".into()),
1469                ),
1470            ],
1471            constraints: vec![],
1472        });
1473        expect_parse_ok(sql, expected)?;
1474
1475        // For error cases, see: `create_external_table.slt`
1476
1477        Ok(())
1478    }
1479
1480    #[test]
1481    fn copy_to_table_to_table() -> Result<(), ParserError> {
1482        // positive case
1483        let sql = "COPY foo TO bar STORED AS CSV";
1484        let expected = Statement::CopyTo(CopyToStatement {
1485            source: object_name("foo"),
1486            target: "bar".to_string(),
1487            partitioned_by: vec![],
1488            stored_as: Some("CSV".to_owned()),
1489            options: vec![],
1490        });
1491
1492        assert_eq!(verified_stmt(sql), expected);
1493        Ok(())
1494    }
1495
1496    #[test]
1497    fn skip_copy_into_snowflake() -> Result<(), ParserError> {
1498        let sql = "COPY INTO foo FROM @~/staged FILE_FORMAT = (FORMAT_NAME = 'mycsv');";
1499        let dialect = Box::new(SnowflakeDialect);
1500        let statements = DFParser::parse_sql_with_dialect(sql, dialect.as_ref())?;
1501
1502        assert_eq!(
1503            statements.len(),
1504            1,
1505            "Expected to parse exactly one statement"
1506        );
1507        if let Statement::CopyTo(_) = &statements[0] {
1508            panic!("Expected non COPY TO statement, but was successful: {statements:?}");
1509        }
1510        Ok(())
1511    }
1512
1513    #[test]
1514    fn explain_copy_to_table_to_table() -> Result<(), ParserError> {
1515        let cases = vec![
1516            ("EXPLAIN COPY foo TO bar STORED AS PARQUET", false, false),
1517            (
1518                "EXPLAIN ANALYZE COPY foo TO bar STORED AS PARQUET",
1519                true,
1520                false,
1521            ),
1522            (
1523                "EXPLAIN VERBOSE COPY foo TO bar STORED AS PARQUET",
1524                false,
1525                true,
1526            ),
1527            (
1528                "EXPLAIN ANALYZE VERBOSE COPY foo TO bar STORED AS PARQUET",
1529                true,
1530                true,
1531            ),
1532        ];
1533        for (sql, analyze, verbose) in cases {
1534            println!("sql: {sql}, analyze: {analyze}, verbose: {verbose}");
1535
1536            let expected_copy = Statement::CopyTo(CopyToStatement {
1537                source: object_name("foo"),
1538                target: "bar".to_string(),
1539                partitioned_by: vec![],
1540                stored_as: Some("PARQUET".to_owned()),
1541                options: vec![],
1542            });
1543            let expected = Statement::Explain(ExplainStatement {
1544                analyze,
1545                verbose,
1546                statement: Box::new(expected_copy),
1547            });
1548            assert_eq!(verified_stmt(sql), expected);
1549        }
1550        Ok(())
1551    }
1552
1553    #[test]
1554    fn copy_to_query_to_table() -> Result<(), ParserError> {
1555        let statement = verified_stmt("SELECT 1");
1556
1557        // unwrap the various layers
1558        let statement = if let Statement::Statement(statement) = statement {
1559            *statement
1560        } else {
1561            panic!("Expected statement, got {statement:?}");
1562        };
1563
1564        let query = if let SQLStatement::Query(query) = statement {
1565            query
1566        } else {
1567            panic!("Expected query, got {statement:?}");
1568        };
1569
1570        let sql =
1571            "COPY (SELECT 1) TO bar STORED AS CSV OPTIONS ('format.has_header' 'true')";
1572        let expected = Statement::CopyTo(CopyToStatement {
1573            source: CopyToSource::Query(query),
1574            target: "bar".to_string(),
1575            partitioned_by: vec![],
1576            stored_as: Some("CSV".to_owned()),
1577            options: vec![(
1578                "format.has_header".into(),
1579                Value::SingleQuotedString("true".into()),
1580            )],
1581        });
1582        assert_eq!(verified_stmt(sql), expected);
1583        Ok(())
1584    }
1585
1586    #[test]
1587    fn copy_to_options() -> Result<(), ParserError> {
1588        let sql = "COPY foo TO bar STORED AS CSV OPTIONS ('row_group_size' '55')";
1589        let expected = Statement::CopyTo(CopyToStatement {
1590            source: object_name("foo"),
1591            target: "bar".to_string(),
1592            partitioned_by: vec![],
1593            stored_as: Some("CSV".to_owned()),
1594            options: vec![(
1595                "row_group_size".to_string(),
1596                Value::SingleQuotedString("55".to_string()),
1597            )],
1598        });
1599        assert_eq!(verified_stmt(sql), expected);
1600        Ok(())
1601    }
1602
1603    #[test]
1604    fn copy_to_partitioned_by() -> Result<(), ParserError> {
1605        let sql = "COPY foo TO bar STORED AS CSV PARTITIONED BY (a) OPTIONS ('row_group_size' '55')";
1606        let expected = Statement::CopyTo(CopyToStatement {
1607            source: object_name("foo"),
1608            target: "bar".to_string(),
1609            partitioned_by: vec!["a".to_string()],
1610            stored_as: Some("CSV".to_owned()),
1611            options: vec![(
1612                "row_group_size".to_string(),
1613                Value::SingleQuotedString("55".to_string()),
1614            )],
1615        });
1616        assert_eq!(verified_stmt(sql), expected);
1617        Ok(())
1618    }
1619
1620    #[test]
1621    fn copy_to_multi_options() -> Result<(), ParserError> {
1622        // order of options is preserved
1623        let sql =
1624            "COPY foo TO bar STORED AS parquet OPTIONS ('format.row_group_size' 55, 'format.compression' snappy, 'execution.keep_partition_by_columns' true)";
1625
1626        let expected_options = vec![
1627            (
1628                "format.row_group_size".to_string(),
1629                Value::Number("55".to_string(), false),
1630            ),
1631            (
1632                "format.compression".to_string(),
1633                Value::SingleQuotedString("snappy".to_string()),
1634            ),
1635            (
1636                "execution.keep_partition_by_columns".to_string(),
1637                Value::SingleQuotedString("true".to_string()),
1638            ),
1639        ];
1640
1641        let mut statements = DFParser::parse_sql(sql).unwrap();
1642        assert_eq!(statements.len(), 1);
1643        let only_statement = statements.pop_front().unwrap();
1644
1645        let options = if let Statement::CopyTo(copy_to) = only_statement {
1646            copy_to.options
1647        } else {
1648            panic!("Expected copy");
1649        };
1650
1651        assert_eq!(options, expected_options);
1652
1653        Ok(())
1654    }
1655
1656    // For error cases, see: `copy.slt`
1657
1658    fn object_name(name: &str) -> CopyToSource {
1659        CopyToSource::Relation(ObjectName(vec![Ident::new(name)]))
1660    }
1661
1662    // Based on  sqlparser-rs
1663    // https://github.com/sqlparser-rs/sqlparser-rs/blob/ae3b5844c839072c235965fe0d1bddc473dced87/src/test_utils.rs#L104-L116
1664
1665    /// Ensures that `sql` parses as a single [Statement]
1666    ///
1667    /// If `canonical` is non empty,this function additionally asserts
1668    /// that:
1669    ///
1670    /// 1. parsing `sql` results in the same [`Statement`] as parsing
1671    ///    `canonical`.
1672    ///
1673    /// 2. re-serializing the result of parsing `sql` produces the same
1674    ///    `canonical` sql string
1675    fn one_statement_parses_to(sql: &str, canonical: &str) -> Statement {
1676        let mut statements = DFParser::parse_sql(sql).unwrap();
1677        assert_eq!(statements.len(), 1);
1678
1679        if sql != canonical {
1680            assert_eq!(DFParser::parse_sql(canonical).unwrap(), statements);
1681        }
1682
1683        let only_statement = statements.pop_front().unwrap();
1684        assert_eq!(
1685            canonical.to_uppercase(),
1686            only_statement.to_string().to_uppercase()
1687        );
1688        only_statement
1689    }
1690
1691    /// Ensures that `sql` parses as a single [Statement], and that
1692    /// re-serializing the parse result produces the same `sql`
1693    /// string (is not modified after a serialization round-trip).
1694    fn verified_stmt(sql: &str) -> Statement {
1695        one_statement_parses_to(sql, sql)
1696    }
1697
1698    #[test]
1699    /// Checks the recursion limit works for sql queries
1700    /// Recursion can happen easily with binary exprs (i.e, AND or OR)
1701    fn test_recursion_limit() {
1702        let sql = "SELECT 1 OR 2";
1703
1704        // Expect parse to succeed
1705        DFParserBuilder::new(sql)
1706            .build()
1707            .unwrap()
1708            .parse_statements()
1709            .unwrap();
1710
1711        let err = DFParserBuilder::new(sql)
1712            .with_recursion_limit(1)
1713            .build()
1714            .unwrap()
1715            .parse_statements()
1716            .unwrap_err();
1717
1718        assert_contains!(
1719            err.to_string(),
1720            "sql parser error: recursion limit exceeded"
1721        );
1722    }
1723}