sqlparser/dialect/
snowflake.rs

1// Licensed under the Apache License, Version 2.0 (the "License");
2// you may not use this file except in compliance with the License.
3// You may obtain a copy of the License at
4//
5// http://www.apache.org/licenses/LICENSE-2.0
6//
7// Unless required by applicable law or agreed to in writing, software
8// distributed under the License is distributed on an "AS IS" BASIS,
9// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10// See the License for the specific language governing permissions and
11// limitations under the License.
12
13#[cfg(not(feature = "std"))]
14use crate::alloc::string::ToString;
15use crate::ast::helpers::stmt_data_loading::{
16    DataLoadingOption, DataLoadingOptionType, DataLoadingOptions, StageLoadSelectItem,
17    StageParamsObject,
18};
19use crate::ast::{Ident, ObjectName, Statement};
20use crate::dialect::Dialect;
21use crate::keywords::Keyword;
22use crate::parser::{Parser, ParserError};
23use crate::tokenizer::Token;
24#[cfg(not(feature = "std"))]
25use alloc::string::String;
26#[cfg(not(feature = "std"))]
27use alloc::vec::Vec;
28#[cfg(not(feature = "std"))]
29use alloc::{format, vec};
30
31/// A [`Dialect`] for [Snowflake](https://www.snowflake.com/)
32#[derive(Debug, Default)]
33pub struct SnowflakeDialect;
34
35impl Dialect for SnowflakeDialect {
36    // see https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html
37    fn is_identifier_start(&self, ch: char) -> bool {
38        ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_'
39    }
40
41    fn is_identifier_part(&self, ch: char) -> bool {
42        ch.is_ascii_lowercase()
43            || ch.is_ascii_uppercase()
44            || ch.is_ascii_digit()
45            || ch == '$'
46            || ch == '_'
47    }
48
49    fn supports_within_after_array_aggregation(&self) -> bool {
50        true
51    }
52
53    fn parse_statement(&self, parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
54        if parser.parse_keyword(Keyword::CREATE) {
55            // possibly CREATE STAGE
56            //[ OR  REPLACE ]
57            let or_replace = parser.parse_keywords(&[Keyword::OR, Keyword::REPLACE]);
58            //[ TEMPORARY ]
59            let temporary = parser.parse_keyword(Keyword::TEMPORARY);
60
61            if parser.parse_keyword(Keyword::STAGE) {
62                // OK - this is CREATE STAGE statement
63                return Some(parse_create_stage(or_replace, temporary, parser));
64            } else {
65                // need to go back with the cursor
66                let mut back = 1;
67                if or_replace {
68                    back += 2
69                }
70                if temporary {
71                    back += 1
72                }
73                for _i in 0..back {
74                    parser.prev_token();
75                }
76            }
77        }
78        if parser.parse_keywords(&[Keyword::COPY, Keyword::INTO]) {
79            // COPY INTO
80            return Some(parse_copy_into(parser));
81        }
82
83        None
84    }
85}
86
87pub fn parse_create_stage(
88    or_replace: bool,
89    temporary: bool,
90    parser: &mut Parser,
91) -> Result<Statement, ParserError> {
92    //[ IF NOT EXISTS ]
93    let if_not_exists = parser.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]);
94    let name = parser.parse_object_name(false)?;
95    let mut directory_table_params = Vec::new();
96    let mut file_format = Vec::new();
97    let mut copy_options = Vec::new();
98    let mut comment = None;
99
100    // [ internalStageParams | externalStageParams ]
101    let stage_params = parse_stage_params(parser)?;
102
103    // [ directoryTableParams ]
104    if parser.parse_keyword(Keyword::DIRECTORY) {
105        parser.expect_token(&Token::Eq)?;
106        directory_table_params = parse_parentheses_options(parser)?;
107    }
108
109    // [ file_format]
110    if parser.parse_keyword(Keyword::FILE_FORMAT) {
111        parser.expect_token(&Token::Eq)?;
112        file_format = parse_parentheses_options(parser)?;
113    }
114
115    // [ copy_options ]
116    if parser.parse_keyword(Keyword::COPY_OPTIONS) {
117        parser.expect_token(&Token::Eq)?;
118        copy_options = parse_parentheses_options(parser)?;
119    }
120
121    // [ comment ]
122    if parser.parse_keyword(Keyword::COMMENT) {
123        parser.expect_token(&Token::Eq)?;
124        comment = Some(match parser.next_token().token {
125            Token::SingleQuotedString(word) => Ok(word),
126            _ => parser.expected("a comment statement", parser.peek_token()),
127        }?)
128    }
129
130    Ok(Statement::CreateStage {
131        or_replace,
132        temporary,
133        if_not_exists,
134        name,
135        stage_params,
136        directory_table_params: DataLoadingOptions {
137            options: directory_table_params,
138        },
139        file_format: DataLoadingOptions {
140            options: file_format,
141        },
142        copy_options: DataLoadingOptions {
143            options: copy_options,
144        },
145        comment,
146    })
147}
148
149pub fn parse_stage_name_identifier(parser: &mut Parser) -> Result<Ident, ParserError> {
150    let mut ident = String::new();
151    while let Some(next_token) = parser.next_token_no_skip() {
152        match &next_token.token {
153            Token::Whitespace(_) => break,
154            Token::Period => {
155                parser.prev_token();
156                break;
157            }
158            Token::AtSign => ident.push('@'),
159            Token::Tilde => ident.push('~'),
160            Token::Mod => ident.push('%'),
161            Token::Div => ident.push('/'),
162            Token::Word(w) => ident.push_str(&w.value),
163            _ => return parser.expected("stage name identifier", parser.peek_token()),
164        }
165    }
166    Ok(Ident::new(ident))
167}
168
169pub fn parse_snowflake_stage_name(parser: &mut Parser) -> Result<ObjectName, ParserError> {
170    match parser.next_token().token {
171        Token::AtSign => {
172            parser.prev_token();
173            let mut idents = vec![];
174            loop {
175                idents.push(parse_stage_name_identifier(parser)?);
176                if !parser.consume_token(&Token::Period) {
177                    break;
178                }
179            }
180            Ok(ObjectName(idents))
181        }
182        _ => {
183            parser.prev_token();
184            Ok(parser.parse_object_name(false)?)
185        }
186    }
187}
188
189pub fn parse_copy_into(parser: &mut Parser) -> Result<Statement, ParserError> {
190    let into: ObjectName = parse_snowflake_stage_name(parser)?;
191    let mut files: Vec<String> = vec![];
192    let mut from_transformations: Option<Vec<StageLoadSelectItem>> = None;
193    let from_stage_alias;
194    let from_stage: ObjectName;
195    let stage_params: StageParamsObject;
196
197    parser.expect_keyword(Keyword::FROM)?;
198    // check if data load transformations are present
199    match parser.next_token().token {
200        Token::LParen => {
201            // data load with transformations
202            parser.expect_keyword(Keyword::SELECT)?;
203            from_transformations = parse_select_items_for_data_load(parser)?;
204
205            parser.expect_keyword(Keyword::FROM)?;
206            from_stage = parse_snowflake_stage_name(parser)?;
207            stage_params = parse_stage_params(parser)?;
208
209            // as
210            from_stage_alias = if parser.parse_keyword(Keyword::AS) {
211                Some(match parser.next_token().token {
212                    Token::Word(w) => Ok(Ident::new(w.value)),
213                    _ => parser.expected("stage alias", parser.peek_token()),
214                }?)
215            } else {
216                None
217            };
218            parser.expect_token(&Token::RParen)?;
219        }
220        _ => {
221            parser.prev_token();
222            from_stage = parser.parse_object_name(false)?;
223            stage_params = parse_stage_params(parser)?;
224
225            // as
226            from_stage_alias = if parser.parse_keyword(Keyword::AS) {
227                Some(match parser.next_token().token {
228                    Token::Word(w) => Ok(Ident::new(w.value)),
229                    _ => parser.expected("stage alias", parser.peek_token()),
230                }?)
231            } else {
232                None
233            };
234        }
235    };
236
237    // [ files ]
238    if parser.parse_keyword(Keyword::FILES) {
239        parser.expect_token(&Token::Eq)?;
240        parser.expect_token(&Token::LParen)?;
241        let mut continue_loop = true;
242        while continue_loop {
243            continue_loop = false;
244            let next_token = parser.next_token();
245            match next_token.token {
246                Token::SingleQuotedString(s) => files.push(s),
247                _ => parser.expected("file token", next_token)?,
248            };
249            if parser.next_token().token.eq(&Token::Comma) {
250                continue_loop = true;
251            } else {
252                parser.prev_token(); // not a comma, need to go back
253            }
254        }
255        parser.expect_token(&Token::RParen)?;
256    }
257
258    // [ pattern ]
259    let mut pattern = None;
260    if parser.parse_keyword(Keyword::PATTERN) {
261        parser.expect_token(&Token::Eq)?;
262        let next_token = parser.next_token();
263        pattern = Some(match next_token.token {
264            Token::SingleQuotedString(s) => s,
265            _ => parser.expected("pattern", next_token)?,
266        });
267    }
268
269    // [ file_format]
270    let mut file_format = Vec::new();
271    if parser.parse_keyword(Keyword::FILE_FORMAT) {
272        parser.expect_token(&Token::Eq)?;
273        file_format = parse_parentheses_options(parser)?;
274    }
275
276    // [ copy_options ]
277    let mut copy_options = Vec::new();
278    if parser.parse_keyword(Keyword::COPY_OPTIONS) {
279        parser.expect_token(&Token::Eq)?;
280        copy_options = parse_parentheses_options(parser)?;
281    }
282
283    // [ VALIDATION_MODE ]
284    let mut validation_mode = None;
285    if parser.parse_keyword(Keyword::VALIDATION_MODE) {
286        parser.expect_token(&Token::Eq)?;
287        validation_mode = Some(parser.next_token().token.to_string());
288    }
289
290    Ok(Statement::CopyIntoSnowflake {
291        into,
292        from_stage,
293        from_stage_alias,
294        stage_params,
295        from_transformations,
296        files: if files.is_empty() { None } else { Some(files) },
297        pattern,
298        file_format: DataLoadingOptions {
299            options: file_format,
300        },
301        copy_options: DataLoadingOptions {
302            options: copy_options,
303        },
304        validation_mode,
305    })
306}
307
308fn parse_select_items_for_data_load(
309    parser: &mut Parser,
310) -> Result<Option<Vec<StageLoadSelectItem>>, ParserError> {
311    // [<alias>.]$<file_col_num>[.<element>] [ , [<alias>.]$<file_col_num>[.<element>] ... ]
312    let mut select_items: Vec<StageLoadSelectItem> = vec![];
313    loop {
314        let mut alias: Option<Ident> = None;
315        let mut file_col_num: i32 = 0;
316        let mut element: Option<Ident> = None;
317        let mut item_as: Option<Ident> = None;
318
319        let next_token = parser.next_token();
320        match next_token.token {
321            Token::Placeholder(w) => {
322                file_col_num = w.to_string().split_off(1).parse::<i32>().map_err(|e| {
323                    ParserError::ParserError(format!("Could not parse '{w}' as i32: {e}"))
324                })?;
325                Ok(())
326            }
327            Token::Word(w) => {
328                alias = Some(Ident::new(w.value));
329                Ok(())
330            }
331            _ => parser.expected("alias or file_col_num", next_token),
332        }?;
333
334        if alias.is_some() {
335            parser.expect_token(&Token::Period)?;
336            // now we get col_num token
337            let col_num_token = parser.next_token();
338            match col_num_token.token {
339                Token::Placeholder(w) => {
340                    file_col_num = w.to_string().split_off(1).parse::<i32>().map_err(|e| {
341                        ParserError::ParserError(format!("Could not parse '{w}' as i32: {e}"))
342                    })?;
343                    Ok(())
344                }
345                _ => parser.expected("file_col_num", col_num_token),
346            }?;
347        }
348
349        // try extracting optional element
350        match parser.next_token().token {
351            Token::Colon => {
352                // parse element
353                element = Some(Ident::new(match parser.next_token().token {
354                    Token::Word(w) => Ok(w.value),
355                    _ => parser.expected("file_col_num", parser.peek_token()),
356                }?));
357            }
358            _ => {
359                // element not present move back
360                parser.prev_token();
361            }
362        }
363
364        // as
365        if parser.parse_keyword(Keyword::AS) {
366            item_as = Some(match parser.next_token().token {
367                Token::Word(w) => Ok(Ident::new(w.value)),
368                _ => parser.expected("column item alias", parser.peek_token()),
369            }?);
370        }
371
372        select_items.push(StageLoadSelectItem {
373            alias,
374            file_col_num,
375            element,
376            item_as,
377        });
378
379        match parser.next_token().token {
380            Token::Comma => {
381                // continue
382            }
383            _ => {
384                parser.prev_token(); // need to move back
385                break;
386            }
387        }
388    }
389    Ok(Some(select_items))
390}
391
392fn parse_stage_params(parser: &mut Parser) -> Result<StageParamsObject, ParserError> {
393    let (mut url, mut storage_integration, mut endpoint) = (None, None, None);
394    let mut encryption: DataLoadingOptions = DataLoadingOptions { options: vec![] };
395    let mut credentials: DataLoadingOptions = DataLoadingOptions { options: vec![] };
396
397    // URL
398    if parser.parse_keyword(Keyword::URL) {
399        parser.expect_token(&Token::Eq)?;
400        url = Some(match parser.next_token().token {
401            Token::SingleQuotedString(word) => Ok(word),
402            _ => parser.expected("a URL statement", parser.peek_token()),
403        }?)
404    }
405
406    // STORAGE INTEGRATION
407    if parser.parse_keyword(Keyword::STORAGE_INTEGRATION) {
408        parser.expect_token(&Token::Eq)?;
409        storage_integration = Some(parser.next_token().token.to_string());
410    }
411
412    // ENDPOINT
413    if parser.parse_keyword(Keyword::ENDPOINT) {
414        parser.expect_token(&Token::Eq)?;
415        endpoint = Some(match parser.next_token().token {
416            Token::SingleQuotedString(word) => Ok(word),
417            _ => parser.expected("an endpoint statement", parser.peek_token()),
418        }?)
419    }
420
421    // CREDENTIALS
422    if parser.parse_keyword(Keyword::CREDENTIALS) {
423        parser.expect_token(&Token::Eq)?;
424        credentials = DataLoadingOptions {
425            options: parse_parentheses_options(parser)?,
426        };
427    }
428
429    // ENCRYPTION
430    if parser.parse_keyword(Keyword::ENCRYPTION) {
431        parser.expect_token(&Token::Eq)?;
432        encryption = DataLoadingOptions {
433            options: parse_parentheses_options(parser)?,
434        };
435    }
436
437    Ok(StageParamsObject {
438        url,
439        encryption,
440        endpoint,
441        storage_integration,
442        credentials,
443    })
444}
445
446/// Parses options provided within parentheses like:
447/// ( ENABLE = { TRUE | FALSE }
448///      [ AUTO_REFRESH = { TRUE | FALSE } ]
449///      [ REFRESH_ON_CREATE =  { TRUE | FALSE } ]
450///      [ NOTIFICATION_INTEGRATION = '<notification_integration_name>' ] )
451///
452fn parse_parentheses_options(parser: &mut Parser) -> Result<Vec<DataLoadingOption>, ParserError> {
453    let mut options: Vec<DataLoadingOption> = Vec::new();
454
455    parser.expect_token(&Token::LParen)?;
456    loop {
457        match parser.next_token().token {
458            Token::RParen => break,
459            Token::Word(key) => {
460                parser.expect_token(&Token::Eq)?;
461                if parser.parse_keyword(Keyword::TRUE) {
462                    options.push(DataLoadingOption {
463                        option_name: key.value,
464                        option_type: DataLoadingOptionType::BOOLEAN,
465                        value: "TRUE".to_string(),
466                    });
467                    Ok(())
468                } else if parser.parse_keyword(Keyword::FALSE) {
469                    options.push(DataLoadingOption {
470                        option_name: key.value,
471                        option_type: DataLoadingOptionType::BOOLEAN,
472                        value: "FALSE".to_string(),
473                    });
474                    Ok(())
475                } else {
476                    match parser.next_token().token {
477                        Token::SingleQuotedString(value) => {
478                            options.push(DataLoadingOption {
479                                option_name: key.value,
480                                option_type: DataLoadingOptionType::STRING,
481                                value,
482                            });
483                            Ok(())
484                        }
485                        Token::Word(word) => {
486                            options.push(DataLoadingOption {
487                                option_name: key.value,
488                                option_type: DataLoadingOptionType::ENUM,
489                                value: word.value,
490                            });
491                            Ok(())
492                        }
493                        _ => parser.expected("expected option value", parser.peek_token()),
494                    }
495                }
496            }
497            _ => parser.expected("another option or ')'", parser.peek_token()),
498        }?;
499    }
500    Ok(options)
501}