sqltk_parser/dialect/
snowflake.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#[cfg(not(feature = "std"))]
19use crate::alloc::string::ToString;
20use crate::ast::helpers::key_value_options::{KeyValueOption, KeyValueOptionType, KeyValueOptions};
21use crate::ast::helpers::stmt_create_table::CreateTableBuilder;
22use crate::ast::helpers::stmt_data_loading::{
23    FileStagingCommand, StageLoadSelectItem, StageLoadSelectItemKind, StageParamsObject,
24};
25use crate::ast::{
26    ColumnOption, ColumnPolicy, ColumnPolicyProperty, CopyIntoSnowflakeKind, Ident,
27    IdentityParameters, IdentityProperty, IdentityPropertyFormatKind, IdentityPropertyKind,
28    IdentityPropertyOrder, ObjectName, RowAccessPolicy, ShowObjects, SqlOption, Statement,
29    TagsColumnOption, WrappedCollection,
30};
31use crate::dialect::{Dialect, Precedence};
32use crate::keywords::Keyword;
33use crate::parser::{IsOptional, Parser, ParserError};
34use crate::tokenizer::{Token, Word};
35#[cfg(not(feature = "std"))]
36use alloc::boxed::Box;
37#[cfg(not(feature = "std"))]
38use alloc::string::String;
39#[cfg(not(feature = "std"))]
40use alloc::vec::Vec;
41#[cfg(not(feature = "std"))]
42use alloc::{format, vec};
43
44use super::keywords::RESERVED_FOR_IDENTIFIER;
45use sqltk_parser::ast::StorageSerializationPolicy;
46
47const RESERVED_KEYWORDS_FOR_SELECT_ITEM_OPERATOR: [Keyword; 1] = [Keyword::CONNECT_BY_ROOT];
48/// A [`Dialect`] for [Snowflake](https://www.snowflake.com/)
49#[derive(Debug, Default)]
50pub struct SnowflakeDialect;
51
52impl Dialect for SnowflakeDialect {
53    // see https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html
54    fn is_identifier_start(&self, ch: char) -> bool {
55        ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_'
56    }
57
58    fn supports_projection_trailing_commas(&self) -> bool {
59        true
60    }
61
62    fn supports_from_trailing_commas(&self) -> bool {
63        true
64    }
65
66    // Snowflake supports double-dot notation when the schema name is not specified
67    // In this case the default PUBLIC schema is used
68    //
69    // see https://docs.snowflake.com/en/sql-reference/name-resolution#resolution-when-schema-omitted-double-dot-notation
70    fn supports_object_name_double_dot_notation(&self) -> bool {
71        true
72    }
73
74    fn is_identifier_part(&self, ch: char) -> bool {
75        ch.is_ascii_lowercase()
76            || ch.is_ascii_uppercase()
77            || ch.is_ascii_digit()
78            || ch == '$'
79            || ch == '_'
80    }
81
82    // See https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#escape_sequences
83    fn supports_string_literal_backslash_escape(&self) -> bool {
84        true
85    }
86
87    fn supports_within_after_array_aggregation(&self) -> bool {
88        true
89    }
90
91    /// See <https://docs.snowflake.com/en/sql-reference/constructs/where#joins-in-the-where-clause>
92    fn supports_outer_join_operator(&self) -> bool {
93        true
94    }
95
96    fn supports_connect_by(&self) -> bool {
97        true
98    }
99
100    /// See <https://docs.snowflake.com/en/sql-reference/sql/execute-immediate>
101    fn supports_execute_immediate(&self) -> bool {
102        true
103    }
104
105    fn supports_match_recognize(&self) -> bool {
106        true
107    }
108
109    // Snowflake uses this syntax for "object constants" (the values of which
110    // are not actually required to be constants).
111    //
112    // https://docs.snowflake.com/en/sql-reference/data-types-semistructured#label-object-constant
113    fn supports_dictionary_syntax(&self) -> bool {
114        true
115    }
116
117    // Snowflake doesn't document this but `FIRST_VALUE(arg, { IGNORE | RESPECT } NULLS)`
118    // works (i.e. inside the argument list instead of after).
119    fn supports_window_function_null_treatment_arg(&self) -> bool {
120        true
121    }
122
123    /// See [doc](https://docs.snowflake.com/en/sql-reference/sql/set#syntax)
124    fn supports_parenthesized_set_variables(&self) -> bool {
125        true
126    }
127
128    /// See [doc](https://docs.snowflake.com/en/sql-reference/sql/comment)
129    fn supports_comment_on(&self) -> bool {
130        true
131    }
132
133    fn parse_statement(&self, parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
134        if parser.parse_keywords(&[Keyword::ALTER, Keyword::SESSION]) {
135            // ALTER SESSION
136            let set = match parser.parse_one_of_keywords(&[Keyword::SET, Keyword::UNSET]) {
137                Some(Keyword::SET) => true,
138                Some(Keyword::UNSET) => false,
139                _ => return Some(parser.expected("SET or UNSET", parser.peek_token())),
140            };
141            return Some(parse_alter_session(parser, set));
142        }
143
144        if parser.parse_keyword(Keyword::CREATE) {
145            // possibly CREATE STAGE
146            //[ OR  REPLACE ]
147            let or_replace = parser.parse_keywords(&[Keyword::OR, Keyword::REPLACE]);
148            // LOCAL | GLOBAL
149            let global = match parser.parse_one_of_keywords(&[Keyword::LOCAL, Keyword::GLOBAL]) {
150                Some(Keyword::LOCAL) => Some(false),
151                Some(Keyword::GLOBAL) => Some(true),
152                _ => None,
153            };
154
155            let mut temporary = false;
156            let mut volatile = false;
157            let mut transient = false;
158            let mut iceberg = false;
159
160            match parser.parse_one_of_keywords(&[
161                Keyword::TEMP,
162                Keyword::TEMPORARY,
163                Keyword::VOLATILE,
164                Keyword::TRANSIENT,
165                Keyword::ICEBERG,
166            ]) {
167                Some(Keyword::TEMP | Keyword::TEMPORARY) => temporary = true,
168                Some(Keyword::VOLATILE) => volatile = true,
169                Some(Keyword::TRANSIENT) => transient = true,
170                Some(Keyword::ICEBERG) => iceberg = true,
171                _ => {}
172            }
173
174            if parser.parse_keyword(Keyword::STAGE) {
175                // OK - this is CREATE STAGE statement
176                return Some(parse_create_stage(or_replace, temporary, parser));
177            } else if parser.parse_keyword(Keyword::TABLE) {
178                return Some(parse_create_table(
179                    or_replace, global, temporary, volatile, transient, iceberg, parser,
180                ));
181            } else {
182                // need to go back with the cursor
183                let mut back = 1;
184                if or_replace {
185                    back += 2
186                }
187                if temporary {
188                    back += 1
189                }
190                for _i in 0..back {
191                    parser.prev_token();
192                }
193            }
194        }
195        if parser.parse_keywords(&[Keyword::COPY, Keyword::INTO]) {
196            // COPY INTO
197            return Some(parse_copy_into(parser));
198        }
199
200        if let Some(kw) = parser.parse_one_of_keywords(&[
201            Keyword::LIST,
202            Keyword::LS,
203            Keyword::REMOVE,
204            Keyword::RM,
205        ]) {
206            return Some(parse_file_staging_command(kw, parser));
207        }
208
209        if parser.parse_keyword(Keyword::SHOW) {
210            let terse = parser.parse_keyword(Keyword::TERSE);
211            if parser.parse_keyword(Keyword::OBJECTS) {
212                return Some(parse_show_objects(terse, parser));
213            }
214            //Give back Keyword::TERSE
215            if terse {
216                parser.prev_token();
217            }
218            //Give back Keyword::SHOW
219            parser.prev_token();
220        }
221
222        None
223    }
224
225    fn parse_column_option(
226        &self,
227        parser: &mut Parser,
228    ) -> Result<Option<Result<Option<ColumnOption>, ParserError>>, ParserError> {
229        parser.maybe_parse(|parser| {
230            let with = parser.parse_keyword(Keyword::WITH);
231
232            if parser.parse_keyword(Keyword::IDENTITY) {
233                Ok(parse_identity_property(parser)
234                    .map(|p| Some(ColumnOption::Identity(IdentityPropertyKind::Identity(p)))))
235            } else if parser.parse_keyword(Keyword::AUTOINCREMENT) {
236                Ok(parse_identity_property(parser).map(|p| {
237                    Some(ColumnOption::Identity(IdentityPropertyKind::Autoincrement(
238                        p,
239                    )))
240                }))
241            } else if parser.parse_keywords(&[Keyword::MASKING, Keyword::POLICY]) {
242                Ok(parse_column_policy_property(parser, with)
243                    .map(|p| Some(ColumnOption::Policy(ColumnPolicy::MaskingPolicy(p)))))
244            } else if parser.parse_keywords(&[Keyword::PROJECTION, Keyword::POLICY]) {
245                Ok(parse_column_policy_property(parser, with)
246                    .map(|p| Some(ColumnOption::Policy(ColumnPolicy::ProjectionPolicy(p)))))
247            } else if parser.parse_keywords(&[Keyword::TAG]) {
248                Ok(parse_column_tags(parser, with).map(|p| Some(ColumnOption::Tags(p))))
249            } else {
250                Err(ParserError::ParserError("not found match".to_string()))
251            }
252        })
253    }
254
255    fn get_next_precedence(&self, parser: &Parser) -> Option<Result<u8, ParserError>> {
256        let token = parser.peek_token();
257        // Snowflake supports the `:` cast operator unlike other dialects
258        match token.token {
259            Token::Colon => Some(Ok(self.prec_value(Precedence::DoubleColon))),
260            _ => None,
261        }
262    }
263
264    fn describe_requires_table_keyword(&self) -> bool {
265        true
266    }
267
268    fn allow_extract_custom(&self) -> bool {
269        true
270    }
271
272    fn allow_extract_single_quotes(&self) -> bool {
273        true
274    }
275
276    /// Snowflake expects the `LIKE` option before the `IN` option,
277    /// for example: <https://docs.snowflake.com/en/sql-reference/sql/show-views#syntax>
278    fn supports_show_like_before_in(&self) -> bool {
279        true
280    }
281
282    fn is_reserved_for_identifier(&self, kw: Keyword) -> bool {
283        // Unreserve some keywords that Snowflake accepts as identifiers
284        // See: https://docs.snowflake.com/en/sql-reference/reserved-keywords
285        if matches!(kw, Keyword::INTERVAL) {
286            false
287        } else {
288            RESERVED_FOR_IDENTIFIER.contains(&kw)
289        }
290    }
291
292    fn supports_partiql(&self) -> bool {
293        true
294    }
295
296    fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool {
297        explicit
298            || match kw {
299            // The following keywords can be considered an alias as long as 
300            // they are not followed by other tokens that may change their meaning
301            // e.g. `SELECT * EXCEPT (col1) FROM tbl`
302            Keyword::EXCEPT
303            // e.g. `SELECT 1 LIMIT 5`
304            | Keyword::LIMIT
305            // e.g. `SELECT 1 OFFSET 5 ROWS`
306            | Keyword::OFFSET
307            // e.g. `INSERT INTO t SELECT 1 RETURNING *`
308            | Keyword::RETURNING if !matches!(parser.peek_token_ref().token, Token::Comma | Token::EOF) =>
309            {
310                false
311            }
312
313            // `FETCH` can be considered an alias as long as it's not followed by `FIRST`` or `NEXT`
314            // which would give it a different meanings, for example: `SELECT 1 FETCH FIRST 10 ROWS` - not an alias
315            Keyword::FETCH
316                if parser.peek_keyword(Keyword::FIRST) || parser.peek_keyword(Keyword::NEXT) =>
317            {
318                false
319            }
320
321            // Reserved keywords by the Snowflake dialect, which seem to be less strictive 
322            // than what is listed in `keywords::RESERVED_FOR_COLUMN_ALIAS`. The following 
323            // keywords were tested with the this statement: `SELECT 1 <KW>`.
324            Keyword::FROM
325            | Keyword::GROUP
326            | Keyword::HAVING
327            | Keyword::INTERSECT
328            | Keyword::INTO
329            | Keyword::MINUS
330            | Keyword::ORDER
331            | Keyword::SELECT
332            | Keyword::UNION
333            | Keyword::WHERE
334            | Keyword::WITH => false,
335
336            // Any other word is considered an alias
337            _ => true,
338        }
339    }
340
341    /// See: <https://docs.snowflake.com/en/sql-reference/constructs/at-before>
342    fn supports_timestamp_versioning(&self) -> bool {
343        true
344    }
345
346    /// See: <https://docs.snowflake.com/en/sql-reference/constructs/group-by>
347    fn supports_group_by_expr(&self) -> bool {
348        true
349    }
350
351    /// See: <https://docs.snowflake.com/en/sql-reference/constructs/connect-by>
352    fn get_reserved_keywords_for_select_item_operator(&self) -> &[Keyword] {
353        &RESERVED_KEYWORDS_FOR_SELECT_ITEM_OPERATOR
354    }
355}
356
357fn parse_file_staging_command(kw: Keyword, parser: &mut Parser) -> Result<Statement, ParserError> {
358    let stage = parse_snowflake_stage_name(parser)?;
359    let pattern = if parser.parse_keyword(Keyword::PATTERN) {
360        parser.expect_token(&Token::Eq)?;
361        Some(parser.parse_literal_string()?)
362    } else {
363        None
364    };
365
366    match kw {
367        Keyword::LIST | Keyword::LS => Ok(Statement::List(FileStagingCommand { stage, pattern })),
368        Keyword::REMOVE | Keyword::RM => {
369            Ok(Statement::Remove(FileStagingCommand { stage, pattern }))
370        }
371        _ => Err(ParserError::ParserError(
372            "unexpected stage command, expecting LIST, LS, REMOVE or RM".to_string(),
373        )),
374    }
375}
376
377/// Parse snowflake alter session.
378/// <https://docs.snowflake.com/en/sql-reference/sql/alter-session>
379fn parse_alter_session(parser: &mut Parser, set: bool) -> Result<Statement, ParserError> {
380    let session_options = parse_session_options(parser, set)?;
381    Ok(Statement::AlterSession {
382        set,
383        session_params: KeyValueOptions {
384            options: session_options,
385        },
386    })
387}
388
389/// Parse snowflake create table statement.
390/// <https://docs.snowflake.com/en/sql-reference/sql/create-table>
391/// <https://docs.snowflake.com/en/sql-reference/sql/create-iceberg-table>
392pub fn parse_create_table(
393    or_replace: bool,
394    global: Option<bool>,
395    temporary: bool,
396    volatile: bool,
397    transient: bool,
398    iceberg: bool,
399    parser: &mut Parser,
400) -> Result<Statement, ParserError> {
401    let if_not_exists = parser.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]);
402    let table_name = parser.parse_object_name(false)?;
403
404    let mut builder = CreateTableBuilder::new(table_name)
405        .or_replace(or_replace)
406        .if_not_exists(if_not_exists)
407        .temporary(temporary)
408        .transient(transient)
409        .volatile(volatile)
410        .iceberg(iceberg)
411        .global(global)
412        .hive_formats(Some(Default::default()));
413
414    // Snowflake does not enforce order of the parameters in the statement. The parser needs to
415    // parse the statement in a loop.
416    //
417    // "CREATE TABLE x COPY GRANTS (c INT)" and "CREATE TABLE x (c INT) COPY GRANTS" are both
418    // accepted by Snowflake
419
420    let mut plain_options = vec![];
421
422    loop {
423        let next_token = parser.next_token();
424        match &next_token.token {
425            Token::Word(word) => match word.keyword {
426                Keyword::COPY => {
427                    parser.expect_keyword_is(Keyword::GRANTS)?;
428                    builder = builder.copy_grants(true);
429                }
430                Keyword::COMMENT => {
431                    // Rewind the COMMENT keyword
432                    parser.prev_token();
433                    if let Some(comment_def) = parser.parse_optional_inline_comment()? {
434                        plain_options.push(SqlOption::Comment(comment_def))
435                    }
436                }
437                Keyword::AS => {
438                    let query = parser.parse_query()?;
439                    builder = builder.query(Some(query));
440                    break;
441                }
442                Keyword::CLONE => {
443                    let clone = parser.parse_object_name(false).ok();
444                    builder = builder.clone_clause(clone);
445                    break;
446                }
447                Keyword::LIKE => {
448                    let like = parser.parse_object_name(false).ok();
449                    builder = builder.like(like);
450                    break;
451                }
452                Keyword::CLUSTER => {
453                    parser.expect_keyword_is(Keyword::BY)?;
454                    parser.expect_token(&Token::LParen)?;
455                    let cluster_by = Some(WrappedCollection::Parentheses(
456                        parser.parse_comma_separated(|p| p.parse_identifier())?,
457                    ));
458                    parser.expect_token(&Token::RParen)?;
459
460                    builder = builder.cluster_by(cluster_by)
461                }
462                Keyword::ENABLE_SCHEMA_EVOLUTION => {
463                    parser.expect_token(&Token::Eq)?;
464                    let enable_schema_evolution =
465                        match parser.parse_one_of_keywords(&[Keyword::TRUE, Keyword::FALSE]) {
466                            Some(Keyword::TRUE) => true,
467                            Some(Keyword::FALSE) => false,
468                            _ => {
469                                return parser.expected("TRUE or FALSE", next_token);
470                            }
471                        };
472
473                    builder = builder.enable_schema_evolution(Some(enable_schema_evolution));
474                }
475                Keyword::CHANGE_TRACKING => {
476                    parser.expect_token(&Token::Eq)?;
477                    let change_tracking =
478                        match parser.parse_one_of_keywords(&[Keyword::TRUE, Keyword::FALSE]) {
479                            Some(Keyword::TRUE) => true,
480                            Some(Keyword::FALSE) => false,
481                            _ => {
482                                return parser.expected("TRUE or FALSE", next_token);
483                            }
484                        };
485
486                    builder = builder.change_tracking(Some(change_tracking));
487                }
488                Keyword::DATA_RETENTION_TIME_IN_DAYS => {
489                    parser.expect_token(&Token::Eq)?;
490                    let data_retention_time_in_days = parser.parse_literal_uint()?;
491                    builder =
492                        builder.data_retention_time_in_days(Some(data_retention_time_in_days));
493                }
494                Keyword::MAX_DATA_EXTENSION_TIME_IN_DAYS => {
495                    parser.expect_token(&Token::Eq)?;
496                    let max_data_extension_time_in_days = parser.parse_literal_uint()?;
497                    builder = builder
498                        .max_data_extension_time_in_days(Some(max_data_extension_time_in_days));
499                }
500                Keyword::DEFAULT_DDL_COLLATION => {
501                    parser.expect_token(&Token::Eq)?;
502                    let default_ddl_collation = parser.parse_literal_string()?;
503                    builder = builder.default_ddl_collation(Some(default_ddl_collation));
504                }
505                // WITH is optional, we just verify that next token is one of the expected ones and
506                // fallback to the default match statement
507                Keyword::WITH => {
508                    parser.expect_one_of_keywords(&[
509                        Keyword::AGGREGATION,
510                        Keyword::TAG,
511                        Keyword::ROW,
512                    ])?;
513                    parser.prev_token();
514                }
515                Keyword::AGGREGATION => {
516                    parser.expect_keyword_is(Keyword::POLICY)?;
517                    let aggregation_policy = parser.parse_object_name(false)?;
518                    builder = builder.with_aggregation_policy(Some(aggregation_policy));
519                }
520                Keyword::ROW => {
521                    parser.expect_keywords(&[Keyword::ACCESS, Keyword::POLICY])?;
522                    let policy = parser.parse_object_name(false)?;
523                    parser.expect_keyword_is(Keyword::ON)?;
524                    parser.expect_token(&Token::LParen)?;
525                    let columns = parser.parse_comma_separated(|p| p.parse_identifier())?;
526                    parser.expect_token(&Token::RParen)?;
527
528                    builder =
529                        builder.with_row_access_policy(Some(RowAccessPolicy::new(policy, columns)))
530                }
531                Keyword::TAG => {
532                    parser.expect_token(&Token::LParen)?;
533                    let tags = parser.parse_comma_separated(Parser::parse_tag)?;
534                    parser.expect_token(&Token::RParen)?;
535                    builder = builder.with_tags(Some(tags));
536                }
537                Keyword::ON if parser.parse_keyword(Keyword::COMMIT) => {
538                    let on_commit = Some(parser.parse_create_table_on_commit()?);
539                    builder = builder.on_commit(on_commit);
540                }
541                Keyword::EXTERNAL_VOLUME => {
542                    parser.expect_token(&Token::Eq)?;
543                    builder.external_volume = Some(parser.parse_literal_string()?);
544                }
545                Keyword::CATALOG => {
546                    parser.expect_token(&Token::Eq)?;
547                    builder.catalog = Some(parser.parse_literal_string()?);
548                }
549                Keyword::BASE_LOCATION => {
550                    parser.expect_token(&Token::Eq)?;
551                    builder.base_location = Some(parser.parse_literal_string()?);
552                }
553                Keyword::CATALOG_SYNC => {
554                    parser.expect_token(&Token::Eq)?;
555                    builder.catalog_sync = Some(parser.parse_literal_string()?);
556                }
557                Keyword::STORAGE_SERIALIZATION_POLICY => {
558                    parser.expect_token(&Token::Eq)?;
559
560                    builder.storage_serialization_policy =
561                        Some(parse_storage_serialization_policy(parser)?);
562                }
563                _ => {
564                    return parser.expected("end of statement", next_token);
565                }
566            },
567            Token::LParen => {
568                parser.prev_token();
569                let (columns, constraints) = parser.parse_columns()?;
570                builder = builder.columns(columns).constraints(constraints);
571            }
572            Token::EOF => {
573                if builder.columns.is_empty() {
574                    return Err(ParserError::ParserError(
575                        "unexpected end of input".to_string(),
576                    ));
577                }
578
579                break;
580            }
581            Token::SemiColon => {
582                if builder.columns.is_empty() {
583                    return Err(ParserError::ParserError(
584                        "unexpected end of input".to_string(),
585                    ));
586                }
587
588                parser.prev_token();
589                break;
590            }
591            _ => {
592                return parser.expected("end of statement", next_token);
593            }
594        }
595    }
596    let table_options = if !plain_options.is_empty() {
597        crate::ast::CreateTableOptions::Plain(plain_options)
598    } else {
599        crate::ast::CreateTableOptions::None
600    };
601
602    builder = builder.table_options(table_options);
603
604    if iceberg && builder.base_location.is_none() {
605        return Err(ParserError::ParserError(
606            "BASE_LOCATION is required for ICEBERG tables".to_string(),
607        ));
608    }
609
610    Ok(builder.build())
611}
612
613pub fn parse_storage_serialization_policy(
614    parser: &mut Parser,
615) -> Result<StorageSerializationPolicy, ParserError> {
616    let next_token = parser.next_token();
617    match &next_token.token {
618        Token::Word(w) => match w.keyword {
619            Keyword::COMPATIBLE => Ok(StorageSerializationPolicy::Compatible),
620            Keyword::OPTIMIZED => Ok(StorageSerializationPolicy::Optimized),
621            _ => parser.expected("storage_serialization_policy", next_token),
622        },
623        _ => parser.expected("storage_serialization_policy", next_token),
624    }
625}
626
627pub fn parse_create_stage(
628    or_replace: bool,
629    temporary: bool,
630    parser: &mut Parser,
631) -> Result<Statement, ParserError> {
632    //[ IF NOT EXISTS ]
633    let if_not_exists = parser.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]);
634    let name = parser.parse_object_name(false)?;
635    let mut directory_table_params = Vec::new();
636    let mut file_format = Vec::new();
637    let mut copy_options = Vec::new();
638    let mut comment = None;
639
640    // [ internalStageParams | externalStageParams ]
641    let stage_params = parse_stage_params(parser)?;
642
643    // [ directoryTableParams ]
644    if parser.parse_keyword(Keyword::DIRECTORY) {
645        parser.expect_token(&Token::Eq)?;
646        directory_table_params = parse_parentheses_options(parser)?;
647    }
648
649    // [ file_format]
650    if parser.parse_keyword(Keyword::FILE_FORMAT) {
651        parser.expect_token(&Token::Eq)?;
652        file_format = parse_parentheses_options(parser)?;
653    }
654
655    // [ copy_options ]
656    if parser.parse_keyword(Keyword::COPY_OPTIONS) {
657        parser.expect_token(&Token::Eq)?;
658        copy_options = parse_parentheses_options(parser)?;
659    }
660
661    // [ comment ]
662    if parser.parse_keyword(Keyword::COMMENT) {
663        parser.expect_token(&Token::Eq)?;
664        comment = Some(parser.parse_comment_value()?);
665    }
666
667    Ok(Statement::CreateStage {
668        or_replace,
669        temporary,
670        if_not_exists,
671        name,
672        stage_params,
673        directory_table_params: KeyValueOptions {
674            options: directory_table_params,
675        },
676        file_format: KeyValueOptions {
677            options: file_format,
678        },
679        copy_options: KeyValueOptions {
680            options: copy_options,
681        },
682        comment,
683    })
684}
685
686pub fn parse_stage_name_identifier(parser: &mut Parser) -> Result<Ident, ParserError> {
687    let mut ident = String::new();
688    while let Some(next_token) = parser.next_token_no_skip() {
689        match &next_token.token {
690            Token::Whitespace(_) | Token::SemiColon => break,
691            Token::Period => {
692                parser.prev_token();
693                break;
694            }
695            Token::RParen => {
696                parser.prev_token();
697                break;
698            }
699            Token::AtSign => ident.push('@'),
700            Token::Tilde => ident.push('~'),
701            Token::Mod => ident.push('%'),
702            Token::Div => ident.push('/'),
703            Token::Word(w) => ident.push_str(&w.to_string()),
704            _ => return parser.expected("stage name identifier", parser.peek_token()),
705        }
706    }
707    Ok(Ident::new(ident))
708}
709
710pub fn parse_snowflake_stage_name(parser: &mut Parser) -> Result<ObjectName, ParserError> {
711    match parser.next_token().token {
712        Token::AtSign => {
713            parser.prev_token();
714            let mut idents = vec![];
715            loop {
716                idents.push(parse_stage_name_identifier(parser)?);
717                if !parser.consume_token(&Token::Period) {
718                    break;
719                }
720            }
721            Ok(ObjectName::from(idents))
722        }
723        _ => {
724            parser.prev_token();
725            Ok(parser.parse_object_name(false)?)
726        }
727    }
728}
729
730/// Parses a `COPY INTO` statement. Snowflake has two variants, `COPY INTO <table>`
731/// and `COPY INTO <location>` which have different syntax.
732pub fn parse_copy_into(parser: &mut Parser) -> Result<Statement, ParserError> {
733    let kind = match parser.peek_token().token {
734        // Indicates an internal stage
735        Token::AtSign => CopyIntoSnowflakeKind::Location,
736        // Indicates an external stage, i.e. s3://, gcs:// or azure://
737        Token::SingleQuotedString(s) if s.contains("://") => CopyIntoSnowflakeKind::Location,
738        _ => CopyIntoSnowflakeKind::Table,
739    };
740
741    let mut files: Vec<String> = vec![];
742    let mut from_transformations: Option<Vec<StageLoadSelectItemKind>> = None;
743    let mut from_stage_alias = None;
744    let mut from_stage = None;
745    let mut stage_params = StageParamsObject {
746        url: None,
747        encryption: KeyValueOptions { options: vec![] },
748        endpoint: None,
749        storage_integration: None,
750        credentials: KeyValueOptions { options: vec![] },
751    };
752    let mut from_query = None;
753    let mut partition = None;
754    let mut file_format = Vec::new();
755    let mut pattern = None;
756    let mut validation_mode = None;
757    let mut copy_options = Vec::new();
758
759    let into: ObjectName = parse_snowflake_stage_name(parser)?;
760    if kind == CopyIntoSnowflakeKind::Location {
761        stage_params = parse_stage_params(parser)?;
762    }
763
764    let into_columns = match &parser.peek_token().token {
765        Token::LParen => Some(parser.parse_parenthesized_column_list(IsOptional::Optional, true)?),
766        _ => None,
767    };
768
769    parser.expect_keyword_is(Keyword::FROM)?;
770    match parser.next_token().token {
771        Token::LParen if kind == CopyIntoSnowflakeKind::Table => {
772            // Data load with transformations
773            parser.expect_keyword_is(Keyword::SELECT)?;
774            from_transformations = parse_select_items_for_data_load(parser)?;
775
776            parser.expect_keyword_is(Keyword::FROM)?;
777            from_stage = Some(parse_snowflake_stage_name(parser)?);
778            stage_params = parse_stage_params(parser)?;
779
780            // Parse an optional alias
781            from_stage_alias = parser
782                .maybe_parse_table_alias()?
783                .map(|table_alias| table_alias.name);
784            parser.expect_token(&Token::RParen)?;
785        }
786        Token::LParen if kind == CopyIntoSnowflakeKind::Location => {
787            // Data unload with a query
788            from_query = Some(parser.parse_query()?);
789            parser.expect_token(&Token::RParen)?;
790        }
791        _ => {
792            parser.prev_token();
793            from_stage = Some(parse_snowflake_stage_name(parser)?);
794            stage_params = parse_stage_params(parser)?;
795
796            // as
797            from_stage_alias = if parser.parse_keyword(Keyword::AS) {
798                Some(match parser.next_token().token {
799                    Token::Word(w) => Ok(Ident::new(w.value)),
800                    _ => parser.expected("stage alias", parser.peek_token()),
801                }?)
802            } else {
803                None
804            };
805        }
806    }
807
808    loop {
809        // FILE_FORMAT
810        if parser.parse_keyword(Keyword::FILE_FORMAT) {
811            parser.expect_token(&Token::Eq)?;
812            file_format = parse_parentheses_options(parser)?;
813        // PARTITION BY
814        } else if parser.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) {
815            partition = Some(Box::new(parser.parse_expr()?))
816        // FILES
817        } else if parser.parse_keyword(Keyword::FILES) {
818            parser.expect_token(&Token::Eq)?;
819            parser.expect_token(&Token::LParen)?;
820            let mut continue_loop = true;
821            while continue_loop {
822                continue_loop = false;
823                let next_token = parser.next_token();
824                match next_token.token {
825                    Token::SingleQuotedString(s) => files.push(s),
826                    _ => parser.expected("file token", next_token)?,
827                };
828                if parser.next_token().token.eq(&Token::Comma) {
829                    continue_loop = true;
830                } else {
831                    parser.prev_token(); // not a comma, need to go back
832                }
833            }
834            parser.expect_token(&Token::RParen)?;
835        // PATTERN
836        } else if parser.parse_keyword(Keyword::PATTERN) {
837            parser.expect_token(&Token::Eq)?;
838            let next_token = parser.next_token();
839            pattern = Some(match next_token.token {
840                Token::SingleQuotedString(s) => s,
841                _ => parser.expected("pattern", next_token)?,
842            });
843        // VALIDATION MODE
844        } else if parser.parse_keyword(Keyword::VALIDATION_MODE) {
845            parser.expect_token(&Token::Eq)?;
846            validation_mode = Some(parser.next_token().token.to_string());
847        // COPY OPTIONS
848        } else if parser.parse_keyword(Keyword::COPY_OPTIONS) {
849            parser.expect_token(&Token::Eq)?;
850            copy_options = parse_parentheses_options(parser)?;
851        } else {
852            match parser.next_token().token {
853                Token::SemiColon | Token::EOF => break,
854                Token::Comma => continue,
855                // In `COPY INTO <location>` the copy options do not have a shared key
856                // like in `COPY INTO <table>`
857                Token::Word(key) => copy_options.push(parse_option(parser, key)?),
858                _ => return parser.expected("another copy option, ; or EOF'", parser.peek_token()),
859            }
860        }
861    }
862
863    Ok(Statement::CopyIntoSnowflake {
864        kind,
865        into,
866        into_columns,
867        from_obj: from_stage,
868        from_obj_alias: from_stage_alias,
869        stage_params,
870        from_transformations,
871        from_query,
872        files: if files.is_empty() { None } else { Some(files) },
873        pattern,
874        file_format: KeyValueOptions {
875            options: file_format,
876        },
877        copy_options: KeyValueOptions {
878            options: copy_options,
879        },
880        validation_mode,
881        partition,
882    })
883}
884
885fn parse_select_items_for_data_load(
886    parser: &mut Parser,
887) -> Result<Option<Vec<StageLoadSelectItemKind>>, ParserError> {
888    let mut select_items: Vec<StageLoadSelectItemKind> = vec![];
889    loop {
890        match parser.maybe_parse(parse_select_item_for_data_load)? {
891            // [<alias>.]$<file_col_num>[.<element>] [ , [<alias>.]$<file_col_num>[.<element>] ... ]
892            Some(item) => select_items.push(StageLoadSelectItemKind::StageLoadSelectItem(item)),
893            // Fallback, try to parse a standard SQL select item
894            None => select_items.push(StageLoadSelectItemKind::SelectItem(
895                parser.parse_select_item()?,
896            )),
897        }
898        if matches!(parser.peek_token_ref().token, Token::Comma) {
899            parser.advance_token();
900        } else {
901            break;
902        }
903    }
904    Ok(Some(select_items))
905}
906
907fn parse_select_item_for_data_load(
908    parser: &mut Parser,
909) -> Result<StageLoadSelectItem, ParserError> {
910    let mut alias: Option<Ident> = None;
911    let mut file_col_num: i32 = 0;
912    let mut element: Option<Ident> = None;
913    let mut item_as: Option<Ident> = None;
914
915    let next_token = parser.next_token();
916    match next_token.token {
917        Token::Placeholder(w) => {
918            file_col_num = w.to_string().split_off(1).parse::<i32>().map_err(|e| {
919                ParserError::ParserError(format!("Could not parse '{w}' as i32: {e}"))
920            })?;
921            Ok(())
922        }
923        Token::Word(w) => {
924            alias = Some(Ident::new(w.value));
925            Ok(())
926        }
927        _ => parser.expected("alias or file_col_num", next_token),
928    }?;
929
930    if alias.is_some() {
931        parser.expect_token(&Token::Period)?;
932        // now we get col_num token
933        let col_num_token = parser.next_token();
934        match col_num_token.token {
935            Token::Placeholder(w) => {
936                file_col_num = w.to_string().split_off(1).parse::<i32>().map_err(|e| {
937                    ParserError::ParserError(format!("Could not parse '{w}' as i32: {e}"))
938                })?;
939                Ok(())
940            }
941            _ => parser.expected("file_col_num", col_num_token),
942        }?;
943    }
944
945    // try extracting optional element
946    match parser.next_token().token {
947        Token::Colon => {
948            // parse element
949            element = Some(Ident::new(match parser.next_token().token {
950                Token::Word(w) => Ok(w.value),
951                _ => parser.expected("file_col_num", parser.peek_token()),
952            }?));
953        }
954        _ => {
955            // element not present move back
956            parser.prev_token();
957        }
958    }
959
960    // as
961    if parser.parse_keyword(Keyword::AS) {
962        item_as = Some(match parser.next_token().token {
963            Token::Word(w) => Ok(Ident::new(w.value)),
964            _ => parser.expected("column item alias", parser.peek_token()),
965        }?);
966    }
967
968    Ok(StageLoadSelectItem {
969        alias,
970        file_col_num,
971        element,
972        item_as,
973    })
974}
975
976fn parse_stage_params(parser: &mut Parser) -> Result<StageParamsObject, ParserError> {
977    let (mut url, mut storage_integration, mut endpoint) = (None, None, None);
978    let mut encryption: KeyValueOptions = KeyValueOptions { options: vec![] };
979    let mut credentials: KeyValueOptions = KeyValueOptions { options: vec![] };
980
981    // URL
982    if parser.parse_keyword(Keyword::URL) {
983        parser.expect_token(&Token::Eq)?;
984        url = Some(match parser.next_token().token {
985            Token::SingleQuotedString(word) => Ok(word),
986            _ => parser.expected("a URL statement", parser.peek_token()),
987        }?)
988    }
989
990    // STORAGE INTEGRATION
991    if parser.parse_keyword(Keyword::STORAGE_INTEGRATION) {
992        parser.expect_token(&Token::Eq)?;
993        storage_integration = Some(parser.next_token().token.to_string());
994    }
995
996    // ENDPOINT
997    if parser.parse_keyword(Keyword::ENDPOINT) {
998        parser.expect_token(&Token::Eq)?;
999        endpoint = Some(match parser.next_token().token {
1000            Token::SingleQuotedString(word) => Ok(word),
1001            _ => parser.expected("an endpoint statement", parser.peek_token()),
1002        }?)
1003    }
1004
1005    // CREDENTIALS
1006    if parser.parse_keyword(Keyword::CREDENTIALS) {
1007        parser.expect_token(&Token::Eq)?;
1008        credentials = KeyValueOptions {
1009            options: parse_parentheses_options(parser)?,
1010        };
1011    }
1012
1013    // ENCRYPTION
1014    if parser.parse_keyword(Keyword::ENCRYPTION) {
1015        parser.expect_token(&Token::Eq)?;
1016        encryption = KeyValueOptions {
1017            options: parse_parentheses_options(parser)?,
1018        };
1019    }
1020
1021    Ok(StageParamsObject {
1022        url,
1023        encryption,
1024        endpoint,
1025        storage_integration,
1026        credentials,
1027    })
1028}
1029
1030/// Parses options separated by blank spaces, commas, or new lines like:
1031/// ABORT_DETACHED_QUERY = { TRUE | FALSE }
1032///      [ ACTIVE_PYTHON_PROFILER = { 'LINE' | 'MEMORY' } ]
1033///      [ BINARY_INPUT_FORMAT = '\<string\>' ]
1034fn parse_session_options(
1035    parser: &mut Parser,
1036    set: bool,
1037) -> Result<Vec<KeyValueOption>, ParserError> {
1038    let mut options: Vec<KeyValueOption> = Vec::new();
1039    let empty = String::new;
1040    loop {
1041        let next_token = parser.peek_token();
1042        match next_token.token {
1043            Token::SemiColon | Token::EOF => break,
1044            Token::Comma => {
1045                parser.advance_token();
1046                continue;
1047            }
1048            Token::Word(key) => {
1049                parser.advance_token();
1050                if set {
1051                    let option = parse_option(parser, key)?;
1052                    options.push(option);
1053                } else {
1054                    options.push(KeyValueOption {
1055                        option_name: key.value,
1056                        option_type: KeyValueOptionType::STRING,
1057                        value: empty(),
1058                    });
1059                }
1060            }
1061            _ => {
1062                return parser.expected("another option or end of statement", next_token);
1063            }
1064        }
1065    }
1066    if options.is_empty() {
1067        Err(ParserError::ParserError(
1068            "expected at least one option".to_string(),
1069        ))
1070    } else {
1071        Ok(options)
1072    }
1073}
1074
1075/// Parses options provided within parentheses like:
1076/// ( ENABLE = { TRUE | FALSE }
1077///      [ AUTO_REFRESH = { TRUE | FALSE } ]
1078///      [ REFRESH_ON_CREATE =  { TRUE | FALSE } ]
1079///      [ NOTIFICATION_INTEGRATION = '<notification_integration_name>' ] )
1080///
1081fn parse_parentheses_options(parser: &mut Parser) -> Result<Vec<KeyValueOption>, ParserError> {
1082    let mut options: Vec<KeyValueOption> = Vec::new();
1083    parser.expect_token(&Token::LParen)?;
1084    loop {
1085        match parser.next_token().token {
1086            Token::RParen => break,
1087            Token::Comma => continue,
1088            Token::Word(key) => options.push(parse_option(parser, key)?),
1089            _ => return parser.expected("another option or ')'", parser.peek_token()),
1090        };
1091    }
1092    Ok(options)
1093}
1094
1095/// Parses a `KEY = VALUE` construct based on the specified key
1096fn parse_option(parser: &mut Parser, key: Word) -> Result<KeyValueOption, ParserError> {
1097    parser.expect_token(&Token::Eq)?;
1098    if parser.parse_keyword(Keyword::TRUE) {
1099        Ok(KeyValueOption {
1100            option_name: key.value,
1101            option_type: KeyValueOptionType::BOOLEAN,
1102            value: "TRUE".to_string(),
1103        })
1104    } else if parser.parse_keyword(Keyword::FALSE) {
1105        Ok(KeyValueOption {
1106            option_name: key.value,
1107            option_type: KeyValueOptionType::BOOLEAN,
1108            value: "FALSE".to_string(),
1109        })
1110    } else {
1111        match parser.next_token().token {
1112            Token::SingleQuotedString(value) => Ok(KeyValueOption {
1113                option_name: key.value,
1114                option_type: KeyValueOptionType::STRING,
1115                value,
1116            }),
1117            Token::Word(word) => Ok(KeyValueOption {
1118                option_name: key.value,
1119                option_type: KeyValueOptionType::ENUM,
1120                value: word.value,
1121            }),
1122            Token::Number(n, _) => Ok(KeyValueOption {
1123                option_name: key.value,
1124                option_type: KeyValueOptionType::NUMBER,
1125                value: n,
1126            }),
1127            _ => parser.expected("expected option value", parser.peek_token()),
1128        }
1129    }
1130}
1131
1132/// Parsing a property of identity or autoincrement column option
1133/// Syntax:
1134/// ```sql
1135/// [ (seed , increment) | START num INCREMENT num ] [ ORDER | NOORDER ]
1136/// ```
1137/// [Snowflake]: https://docs.snowflake.com/en/sql-reference/sql/create-table
1138fn parse_identity_property(parser: &mut Parser) -> Result<IdentityProperty, ParserError> {
1139    let parameters = if parser.consume_token(&Token::LParen) {
1140        let seed = parser.parse_number()?;
1141        parser.expect_token(&Token::Comma)?;
1142        let increment = parser.parse_number()?;
1143        parser.expect_token(&Token::RParen)?;
1144
1145        Some(IdentityPropertyFormatKind::FunctionCall(
1146            IdentityParameters { seed, increment },
1147        ))
1148    } else if parser.parse_keyword(Keyword::START) {
1149        let seed = parser.parse_number()?;
1150        parser.expect_keyword_is(Keyword::INCREMENT)?;
1151        let increment = parser.parse_number()?;
1152
1153        Some(IdentityPropertyFormatKind::StartAndIncrement(
1154            IdentityParameters { seed, increment },
1155        ))
1156    } else {
1157        None
1158    };
1159    let order = match parser.parse_one_of_keywords(&[Keyword::ORDER, Keyword::NOORDER]) {
1160        Some(Keyword::ORDER) => Some(IdentityPropertyOrder::Order),
1161        Some(Keyword::NOORDER) => Some(IdentityPropertyOrder::NoOrder),
1162        _ => None,
1163    };
1164    Ok(IdentityProperty { parameters, order })
1165}
1166
1167/// Parsing a policy property of column option
1168/// Syntax:
1169/// ```sql
1170/// <policy_name> [ USING ( <col_name> , <cond_col1> , ... )
1171/// ```
1172/// [Snowflake]: https://docs.snowflake.com/en/sql-reference/sql/create-table
1173fn parse_column_policy_property(
1174    parser: &mut Parser,
1175    with: bool,
1176) -> Result<ColumnPolicyProperty, ParserError> {
1177    let policy_name = parser.parse_identifier()?;
1178    let using_columns = if parser.parse_keyword(Keyword::USING) {
1179        parser.expect_token(&Token::LParen)?;
1180        let columns = parser.parse_comma_separated(|p| p.parse_identifier())?;
1181        parser.expect_token(&Token::RParen)?;
1182        Some(columns)
1183    } else {
1184        None
1185    };
1186
1187    Ok(ColumnPolicyProperty {
1188        with,
1189        policy_name,
1190        using_columns,
1191    })
1192}
1193
1194/// Parsing tags list of column
1195/// Syntax:
1196/// ```sql
1197/// ( <tag_name> = '<tag_value>' [ , <tag_name> = '<tag_value>' , ... ] )
1198/// ```
1199/// [Snowflake]: https://docs.snowflake.com/en/sql-reference/sql/create-table
1200fn parse_column_tags(parser: &mut Parser, with: bool) -> Result<TagsColumnOption, ParserError> {
1201    parser.expect_token(&Token::LParen)?;
1202    let tags = parser.parse_comma_separated(Parser::parse_tag)?;
1203    parser.expect_token(&Token::RParen)?;
1204
1205    Ok(TagsColumnOption { with, tags })
1206}
1207
1208/// Parse snowflake show objects.
1209/// <https://docs.snowflake.com/en/sql-reference/sql/show-objects>
1210fn parse_show_objects(terse: bool, parser: &mut Parser) -> Result<Statement, ParserError> {
1211    let show_options = parser.parse_show_stmt_options()?;
1212    Ok(Statement::ShowObjects(ShowObjects {
1213        terse,
1214        show_options,
1215    }))
1216}