mysqldump_mutator/
parser.rs

1// Licensed under the Apache License, Version 2.0 (the "License");
2// you may not use this file except in compliance with the License.
3// You may obtain a copy of the License at
4//
5// http://www.apache.org/licenses/LICENSE-2.0
6//
7// Unless required by applicable law or agreed to in writing, software
8// distributed under the License is distributed on an "AS IS" BASIS,
9// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10// See the License for the specific language governing permissions and
11// limitations under the License.
12
13//! SQL Parser
14
15use log::debug;
16use std::io::BufRead;
17
18use super::ast::*;
19use super::dialect::keywords;
20use super::dialect::MySqlDialect;
21use super::tokenizer::*;
22use std::error::Error;
23use std::fmt;
24
25#[derive(Debug, Clone, PartialEq)]
26pub enum ParserError {
27    TokenizerError(String),
28    ParserError(String),
29    Ignored,
30    End,
31}
32
33// Use `Parser::expected` instead, if possible
34macro_rules! parser_err {
35    ($MSG:expr) => {
36        Err(ParserError::ParserError($MSG.to_string()))
37    };
38}
39
40#[derive(PartialEq)]
41pub enum IsOptional {
42    Optional,
43    Mandatory,
44}
45use IsOptional::*;
46
47impl From<TokenizerError> for ParserError {
48    fn from(e: TokenizerError) -> Self {
49        ParserError::TokenizerError(format!("{:?}", e))
50    }
51}
52
53impl fmt::Display for ParserError {
54    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
55        write!(
56            f,
57            "sql parser error: {}",
58            match self {
59                ParserError::TokenizerError(s) => s,
60                ParserError::ParserError(s) => s,
61                ParserError::Ignored => "Ignored",
62                ParserError::End => "EOF",
63            }
64        )
65    }
66}
67
68impl Error for ParserError {}
69
70/// Context given to the value handler clousure. This indicates where in the query is the parser.
71///
72/// For example, SQLContextType::ColumnDefinition((table_name, column_name, column_index))
73/// Or SQLContextType::Insert(InsertContext::Value(table_name, column_index))
74#[derive(Debug, Clone)]
75pub enum SQLContextType {
76    None,
77    /// Contains the table name
78    CreateTable(String),
79    /// Contains the table name, the column name and the column index
80    ColumnDefinition((String, String, usize)),
81    /// Contains an Inser context
82    Insert(InsertContext),
83}
84
85#[derive(Debug, Clone)]
86pub enum InsertContext {
87    None,
88    /// Contains the table name
89    Table(String),
90    /// Contains the table name and the column index
91    Value((String, usize)),
92}
93
94#[derive(Debug)]
95pub struct SQLContext {
96    context: SQLContextType,
97}
98
99impl Default for SQLContext {
100    fn default() -> Self {
101        SQLContext::new()
102    }
103}
104
105impl SQLContext {
106    pub fn new() -> SQLContext {
107        debug!("SQLContext::new");
108        SQLContext {
109            context: SQLContextType::None,
110        }
111    }
112
113    pub fn get_context(&self) -> SQLContextType {
114        self.context.clone()
115    }
116
117    fn started_create_table(&mut self, table: String) {
118        debug!("started_create_table {:?} {}", self.context, table);
119
120        if let SQLContextType::None = self.context {
121            return self.context = SQLContextType::CreateTable(table);
122        }
123
124        panic!("Invalid context state");
125    }
126
127    fn ended_create_table(&mut self) {
128        debug!("ended_create_table {:?}", self.context);
129
130        if let SQLContextType::CreateTable(_) = self.context {
131            return self.context = SQLContextType::None;
132        }
133
134        panic!("Invalid context state");
135    }
136
137    fn started_column_definition(&mut self, column: String, index: usize) {
138        debug!(
139            "started_column_definition {:?} {} {}",
140            self.context, column, index
141        );
142
143        if let SQLContextType::CreateTable(table) = &self.context {
144            return self.context = SQLContextType::ColumnDefinition((table.clone(), column, index));
145        }
146
147        panic!("Invalid context state");
148    }
149
150    fn ended_column_definition(&mut self) {
151        debug!("ended_column_definition {:?}", self.context);
152
153        if let SQLContextType::ColumnDefinition((table, _, _)) = &self.context {
154            return self.context = SQLContextType::CreateTable(table.clone());
155        }
156
157        panic!("Invalid context state");
158    }
159
160    fn started_insert(&mut self) {
161        debug!("started_insert {:?}", self.context);
162
163        if let SQLContextType::None = self.context {
164            return self.context = SQLContextType::Insert(InsertContext::None);
165        }
166
167        panic!("Invalid context state");
168    }
169
170    fn ended_insert(&mut self) {
171        debug!("ended_insert {:?}", self.context);
172
173        if let SQLContextType::Insert(_) = self.context {
174            return self.context = SQLContextType::None;
175        }
176
177        panic!("Invalid context state");
178    }
179
180    fn started_insert_table(&mut self, table: String) {
181        debug!("started_insert_table {:?} {}", self.context, table);
182
183        if let SQLContextType::Insert(InsertContext::None) = self.context {
184            return self.context = SQLContextType::Insert(InsertContext::Table(table));
185        }
186
187        panic!("Invalid context state");
188    }
189
190    fn ended_insert_table(&mut self) {
191        debug!("ended_insert_table");
192
193        if let SQLContextType::Insert(InsertContext::Table(_)) = self.context {
194            return self.context = SQLContextType::Insert(InsertContext::None);
195        }
196
197        panic!("Invalid context state");
198    }
199
200    fn started_insert_value(&mut self, column: usize) {
201        debug!("started_insert_value {:?} {}", self.context, column);
202
203        if let SQLContextType::Insert(InsertContext::Table(table)) = &self.context {
204            return self.context =
205                SQLContextType::Insert(InsertContext::Value((table.clone(), column)));
206        }
207
208        panic!("Invalid context state");
209    }
210
211    fn ended_insert_value(&mut self) {
212        debug!("ended_insert_value {:?}", self.context);
213
214        if let SQLContextType::Insert(InsertContext::Value((table, _))) = &self.context {
215            return self.context = SQLContextType::Insert(InsertContext::Table(table.clone()));
216        }
217
218        panic!("Invalid context state");
219    }
220}
221
222/// SQL Parser
223pub struct Parser<'a, R: BufRead, H: FnMut(&SQLContextType, Token) -> Token, CH: FnMut(&[Token])> {
224    index: usize,
225    commited_tokens: Vec<Token>,
226    tokenizer: Tokenizer<'a, R, MySqlDialect>,
227    last_tokens: Vec<Token>,
228    context: SQLContext,
229    value_handler: Option<H>,
230    commit_handler: Option<CH>,
231}
232
233impl<'a, R: BufRead, H: FnMut(&SQLContextType, Token) -> Token, CH: FnMut(&[Token])>
234    Parser<'a, R, H, CH>
235{
236    /// Parse the specified tokens
237    fn new(sql: &'a mut R, handler: H, commit_handler: CH) -> Self {
238        Parser {
239            index: 0,
240            commited_tokens: vec![],
241            tokenizer: Tokenizer::new(MySqlDialect {}, sql),
242            last_tokens: vec![],
243            context: SQLContext::new(),
244            value_handler: Some(handler),
245            commit_handler: Some(commit_handler),
246        }
247    }
248
249    /// Parse a SQL statement. Calls handler for each row definition and commit_handler each time the parser finalizes parsing and mutating some set of tokens.
250    pub fn parse_mysqldump(mut sql: R, handler: H, commit_handler: CH) -> Result<(), ParserError> {
251        let mut parser = Parser::new(&mut sql, handler, commit_handler);
252        let mut expecting_statement_delimiter = false;
253
254        loop {
255            // ignore empty statements (between successive statement delimiters)
256            while parser.consume_token(&Token::SemiColon) {
257                expecting_statement_delimiter = false;
258            }
259
260            if parser.peek_token().is_none() {
261                break;
262            } else if expecting_statement_delimiter {
263                let token = parser.peek_token();
264                return parser.expected("end of statement", token);
265            }
266
267            let result = parser.parse_statement();
268
269            match result {
270                Err(ParserError::Ignored) => {
271                    parser.commit_tokens();
272                    continue;
273                }
274                Err(error) => {
275                    println!();
276                    for token in parser.commited_tokens.drain(0..) {
277                        print!("{}", token);
278                    }
279                    println!();
280                    return Err(error);
281                }
282                Ok(_) => {
283                    expecting_statement_delimiter = true;
284                    parser.commit_tokens();
285                }
286            }
287        }
288        parser.commit_tokens();
289        Ok(())
290    }
291
292    /// Parse a single top-level statement (such as SELECT, INSERT, CREATE, etc.),
293    /// stopping before the statement separator, if any.
294    fn parse_statement(&mut self) -> Result<(), ParserError> {
295        match self.next_token() {
296            Some(Token::Word(ref w)) if w.keyword != "" => match w.keyword.as_ref() {
297                "CREATE" => Ok(self.parse_create()?),
298                "INSERT" => Ok(self.parse_insert()?),
299                _ => Err(ParserError::Ignored),
300            },
301            None => Err(ParserError::End),
302            _ => Err(ParserError::Ignored),
303            // TODO: Diferenciate between None and Some with other value
304        }
305    }
306
307    /// Parse a new expression
308    fn parse_expr(&mut self) -> Result<Expr, ParserError> {
309        self.parse_subexpr(0)
310    }
311
312    /// Parse tokens until the precedence changes
313    fn parse_subexpr(&mut self, precedence: u8) -> Result<Expr, ParserError> {
314        debug!("parsing expr");
315        let mut expr = self.parse_prefix()?;
316        debug!("prefix: {:?}", expr);
317        loop {
318            let next_precedence = self.get_next_precedence()?;
319            //debug!("next precedence: {:?}", next_precedence);
320            if precedence >= next_precedence {
321                break;
322            }
323
324            expr = self.parse_infix(expr, next_precedence)?;
325        }
326        Ok(expr)
327    }
328
329    /// Parse an expression prefix
330    fn parse_prefix(&mut self) -> Result<Expr, ParserError> {
331        let tok = self
332            .next_token()
333            .ok_or_else(|| ParserError::ParserError("Unexpected EOF".to_string()))?;
334        let expr = match tok {
335            Token::Word(w) => match w.keyword.as_ref() {
336                "TRUE" | "FALSE" | "NULL" => {
337                    self.prev_token();
338                    Ok(Expr::Value(self.parse_value()?))
339                }
340                // Here `w` is a word, check if it's a part of a multi-part
341                // identifier, a function call, or a simple identifier:
342                _ => Ok(Expr::Identifier(w.to_ident())),
343            },
344            Token::Number(_)
345            | Token::SingleQuotedString(_)
346            | Token::NationalStringLiteral(_)
347            | Token::HexStringLiteral(_) => {
348                self.prev_token();
349                Ok(Expr::Value(self.parse_value()?))
350            }
351            unexpected => self.expected("an expression", Some(unexpected)),
352        }?;
353
354        if self.parse_keyword("COLLATE") {
355            Ok(Expr::Collate {
356                expr: Box::new(expr),
357                collation: self.parse_object_name()?,
358            })
359        } else {
360            Ok(expr)
361        }
362    }
363
364    /// Parse an operator following an expression
365    fn parse_infix(&mut self, expr: Expr, precedence: u8) -> Result<Expr, ParserError> {
366        debug!("parsing infix");
367        let tok = self.next_token().unwrap(); // safe as EOF's precedence is the lowest
368
369        let regular_binary_operator = match tok {
370            Token::Eq => Some(BinaryOperator::Eq),
371            Token::Neq(_) => Some(BinaryOperator::NotEq),
372            Token::Gt => Some(BinaryOperator::Gt),
373            Token::GtEq => Some(BinaryOperator::GtEq),
374            Token::Lt => Some(BinaryOperator::Lt),
375            Token::LtEq => Some(BinaryOperator::LtEq),
376            Token::Plus => Some(BinaryOperator::Plus),
377            Token::Minus => Some(BinaryOperator::Minus),
378            Token::Mult => Some(BinaryOperator::Multiply),
379            Token::Mod => Some(BinaryOperator::Modulus),
380            Token::Div => Some(BinaryOperator::Divide),
381            Token::Word(ref k) => match k.keyword.as_ref() {
382                "AND" => Some(BinaryOperator::And),
383                "OR" => Some(BinaryOperator::Or),
384                "LIKE" => Some(BinaryOperator::Like),
385                "NOT" => {
386                    if self.parse_keyword("LIKE") {
387                        Some(BinaryOperator::NotLike)
388                    } else {
389                        None
390                    }
391                }
392                _ => None,
393            },
394            _ => None,
395        };
396
397        if let Some(op) = regular_binary_operator {
398            Ok(Expr::BinaryOp {
399                left: Box::new(expr),
400                op,
401                right: Box::new(self.parse_subexpr(precedence)?),
402            })
403        } else if let Token::Word(ref k) = tok {
404            match k.keyword.as_ref() {
405                "IS" => {
406                    if self.parse_keyword("NULL") {
407                        Ok(Expr::IsNull(Box::new(expr)))
408                    } else if self.parse_keywords(&["NOT", "NULL"]) {
409                        Ok(Expr::IsNotNull(Box::new(expr)))
410                    } else {
411                        let token = self.peek_token();
412                        self.expected("NULL or NOT NULL after IS", token)
413                    }
414                }
415                "NOT" | "IN" | "BETWEEN" => {
416                    self.prev_token();
417                    let negated = self.parse_keyword("NOT");
418                    if self.parse_keyword("IN") {
419                        self.parse_in(expr, negated)
420                    } else if self.parse_keyword("BETWEEN") {
421                        self.parse_between(expr, negated)
422                    } else {
423                        let token = self.peek_token();
424                        self.expected("IN or BETWEEN after NOT", token)
425                    }
426                }
427                // Can only happen if `get_next_precedence` got out of sync with this function
428                _ => panic!("No infix parser for token {:?}", tok),
429            }
430        } else if Token::DoubleColon == tok {
431            self.parse_pg_cast(expr)
432        } else {
433            // Can only happen if `get_next_precedence` got out of sync with this function
434            panic!("No infix parser for token {:?}", tok)
435        }
436    }
437
438    /// Parses the parens following the `[ NOT ] IN` operator
439    fn parse_in(&mut self, expr: Expr, negated: bool) -> Result<Expr, ParserError> {
440        self.expect_token(&Token::LParen)?;
441        let in_op = if self.parse_keyword("SELECT") || self.parse_keyword("WITH") {
442            self.prev_token();
443            Expr::InSubquery {
444                expr: Box::new(expr),
445                subquery: Box::new(self.parse_query()?),
446                negated,
447            }
448        } else {
449            Expr::InList {
450                expr: Box::new(expr),
451                list: self.parse_comma_separated(|parser| parser.parse_expr())?,
452                negated,
453            }
454        };
455        self.expect_token(&Token::RParen)?;
456        Ok(in_op)
457    }
458
459    /// Parses `BETWEEN <low> AND <high>`, assuming the `BETWEEN` keyword was already consumed
460    fn parse_between(&mut self, expr: Expr, negated: bool) -> Result<Expr, ParserError> {
461        // Stop parsing subexpressions for <low> and <high> on tokens with
462        // precedence lower than that of `BETWEEN`, such as `AND`, `IS`, etc.
463        let low = self.parse_subexpr(Self::BETWEEN_PREC)?;
464        self.expect_keyword("AND")?;
465        let high = self.parse_subexpr(Self::BETWEEN_PREC)?;
466        Ok(Expr::Between {
467            expr: Box::new(expr),
468            negated,
469            low: Box::new(low),
470            high: Box::new(high),
471        })
472    }
473
474    /// Parse a postgresql casting style which is in the form of `expr::datatype`
475    fn parse_pg_cast(&mut self, expr: Expr) -> Result<Expr, ParserError> {
476        Ok(Expr::Cast {
477            expr: Box::new(expr),
478            data_type: self.parse_data_type()?,
479        })
480    }
481
482    const BETWEEN_PREC: u8 = 20;
483    const PLUS_MINUS_PREC: u8 = 30;
484
485    /// Get the precedence of the next token
486    fn get_next_precedence(&mut self) -> Result<u8, ParserError> {
487        if let Some(token) = self.peek_token() {
488            debug!("get_next_precedence() {:?}", token);
489
490            match &token {
491                Token::Word(k) if k.keyword == "OR" => Ok(5),
492                Token::Word(k) if k.keyword == "AND" => Ok(10),
493                Token::Word(k) if k.keyword == "NOT" => Ok(0),
494                Token::Word(k) if k.keyword == "IS" => Ok(17),
495                Token::Word(k) if k.keyword == "IN" => Ok(Self::BETWEEN_PREC),
496                Token::Word(k) if k.keyword == "BETWEEN" => Ok(Self::BETWEEN_PREC),
497                Token::Word(k) if k.keyword == "LIKE" => Ok(Self::BETWEEN_PREC),
498                Token::Eq | Token::Lt | Token::LtEq | Token::Neq(_) | Token::Gt | Token::GtEq => {
499                    Ok(20)
500                }
501                Token::Plus | Token::Minus => Ok(Self::PLUS_MINUS_PREC),
502                Token::Mult | Token::Div | Token::Mod => Ok(40),
503                Token::DoubleColon => Ok(50),
504                _ => Ok(0),
505            }
506        } else {
507            Ok(0)
508        }
509    }
510
511    /// Return the first non-whitespace token that has not yet been processed
512    /// (or None if reached end-of-file)
513    fn peek_token(&mut self) -> Option<Token> {
514        self.peek_nth_token(0)
515    }
516
517    /// Return nth non-whitespace token that has not yet been processed
518    fn peek_nth_token(&mut self, mut n: usize) -> Option<Token> {
519        let mut index = self.index;
520        loop {
521            index += 1;
522            match self.tokenizer.peek_token(index - self.index - 1) {
523                Ok(Some(Token::Whitespace(_))) => continue,
524                Ok(non_whitespace) => {
525                    if n == 0 {
526                        return non_whitespace;
527                    }
528                    n -= 1;
529                }
530                _ => return None,
531            }
532        }
533    }
534
535    fn check_ahead<F>(&mut self, max: usize, check_fn: F) -> bool
536    where
537        F: Fn(&Token) -> bool,
538    {
539        for n in 0..max {
540            let found_token = self.peek_nth_token(n);
541            if let Some(found_token) = found_token {
542                if check_fn(&found_token) {
543                    return true;
544                }
545            }
546        }
547
548        false
549    }
550
551    fn execute_value_handler(&mut self) {
552        let token = self.commited_tokens.pop();
553
554        if let Some(token) = token {
555            if let Some(ref mut value_handler) = self.value_handler {
556                let token = value_handler(&self.context.get_context(), token);
557
558                if self.last_tokens.pop().is_some() {
559                    self.last_tokens.push(token.clone());
560                }
561
562                self.commited_tokens.push(token);
563            } else {
564                self.commited_tokens.push(token);
565            }
566        }
567    }
568
569    /// Return the first non-whitespace token that has not yet been processed
570    /// (or None if reached end-of-file) and mark it as processed. OK to call
571    /// repeatedly after reaching EOF.
572    fn next_token(&mut self) -> Option<Token> {
573        self.last_tokens.truncate(0);
574        loop {
575            self.index += 1;
576            match self.tokenizer.next_token() {
577                Ok(Some(Token::Whitespace(token))) => {
578                    self.last_tokens.push(Token::Whitespace(token.clone()));
579                    self.commited_tokens.push(Token::Whitespace(token.clone()));
580                    continue;
581                }
582                Ok(Some(token)) => {
583                    self.last_tokens.push(token.clone());
584                    self.commited_tokens.push(token.clone());
585                    return Some(token);
586                }
587                _ => return None,
588            }
589        }
590    }
591
592    /// Push back the last one non-whitespace token. Must be called after
593    /// `next_token()`, otherwise might panic. OK to call after
594    /// `next_token()` indicates an EOF.
595    fn prev_token(&mut self) {
596        self.last_tokens.reverse();
597        for token in self.last_tokens.drain(0..) {
598            self.commited_tokens.pop();
599            let token = token.clone();
600            self.tokenizer.pushback_token(token);
601        }
602    }
603
604    fn commit_tokens(&mut self) {
605        self.last_tokens.truncate(0);
606        if let Some(ref mut handler) = self.commit_handler {
607            handler(&self.commited_tokens.drain(0..).collect::<Vec<_>>());
608        } else {
609            self.commited_tokens.truncate(0);
610        }
611    }
612
613    /// Report unexpected token
614    fn expected<T>(&self, expected: &str, found: Option<Token>) -> Result<T, ParserError> {
615        parser_err!(format!(
616            "Expected {}, found: {}",
617            expected,
618            found.map_or_else(|| "EOF".to_string(), |t| format!("{}", t))
619        ))
620    }
621
622    /// Look for an expected keyword and consume it if it exists
623    #[must_use]
624    fn parse_keyword(&mut self, expected: &'static str) -> bool {
625        // Ideally, we'd accept a enum variant, not a string, but since
626        // it's not trivial to maintain the enum without duplicating all
627        // the keywords three times, we'll settle for a run-time check that
628        // the string actually represents a known keyword...
629        assert!(keywords::ALL_KEYWORDS.contains(&expected));
630        match self.peek_token() {
631            Some(Token::Word(ref k)) if expected.eq_ignore_ascii_case(&k.keyword) => {
632                self.next_token();
633                true
634            }
635            _ => false,
636        }
637    }
638
639    /// Look for an expected sequence of keywords and consume them if they exist
640    #[must_use]
641    // TODO: Fix the index rollback. It should use keywords pushback.
642    fn parse_keywords(&mut self, keywords: &[&'static str]) -> bool {
643        let mut parse_keywords = true;
644
645        for (index, word) in keywords.iter().enumerate() {
646            let found_token = self.peek_nth_token(index);
647
648            match found_token {
649                Some(Token::Word(found_word)) if found_word.keyword == *word => {}
650                _ => {
651                    parse_keywords = false;
652                    break;
653                }
654            }
655        }
656
657        if parse_keywords {
658            for (_, word) in keywords.iter().enumerate() {
659                if !self.parse_keyword(word) {
660                    return false;
661                }
662            }
663            return true;
664        }
665
666        false
667    }
668
669    /// Bail out if the current token is not an expected keyword, or consume it if it is
670    fn expect_keyword(&mut self, expected: &'static str) -> Result<(), ParserError> {
671        let token = self.peek_token();
672        if self.parse_keyword(expected) {
673            Ok(())
674        } else {
675            self.expected(expected, token)
676        }
677    }
678
679    /// Bail out if the following tokens are not the expected sequence of
680    /// keywords, or consume them if they are.
681    fn expect_keywords(&mut self, expected: &[&'static str]) -> Result<(), ParserError> {
682        for kw in expected {
683            self.expect_keyword(kw)?;
684        }
685        Ok(())
686    }
687
688    /// Consume the next token if it matches the expected token, otherwise return false
689    #[must_use]
690    fn consume_token(&mut self, expected: &Token) -> bool {
691        match &self.peek_token() {
692            Some(t) if *t == *expected => {
693                self.next_token();
694                true
695            }
696            _ => false,
697        }
698    }
699
700    /// Bail out if the current token is not an expected keyword, or consume it if it is
701    fn expect_token(&mut self, expected: &Token) -> Result<(), ParserError> {
702        let token = self.peek_token();
703        if self.consume_token(expected) {
704            Ok(())
705        } else {
706            self.expected(&expected.to_string(), token)
707        }
708    }
709
710    /// Parse a comma-separated list of 1+ items accepted by `F`
711    fn parse_comma_separated<T, F>(&mut self, mut f: F) -> Result<Vec<T>, ParserError>
712    where
713        F: FnMut(&mut Parser<R, H, CH>) -> Result<T, ParserError>,
714    {
715        let values = vec![];
716        loop {
717            // Explanation: We don't want the parser to keep in memory HUGE tables, therefore, we just don't save them
718            /*values.push(*/
719            f(self)?/*)*/;
720            if !self.consume_token(&Token::Comma) {
721                break;
722            }
723        }
724        Ok(values)
725    }
726
727    /// Parse a SQL CREATE statement
728    fn parse_create(&mut self) -> Result<(), ParserError> {
729        if self.is_after_newline() {
730            if self.parse_keyword("TABLE") {
731                return self.parse_create_table();
732            } else if self.check_ahead(15, |token| match token {
733                Token::Word(word) if word.keyword == "PROCEDURE" => true,
734                _ => false,
735            }) {
736                self.take_create_procedure();
737                return Err(ParserError::Ignored);
738            }
739        };
740
741        Err(ParserError::Ignored)
742    }
743
744    fn is_after_newline(&mut self) -> bool {
745        if let Token::Whitespace(Whitespace::Newline) = self.last_tokens[self.last_tokens.len() - 2]
746        {
747            true
748        } else {
749            false
750        }
751    }
752
753    fn take_create_procedure(&mut self) {
754        //take until BEGIN
755        //TAKE UNTIL END
756        //  IN CASE OF IF OR LOOP, take until END IF or END LOOP recursively.
757        self.take_until(40, |_parser: &mut Parser<R, H, CH>, token| match token {
758            Token::Word(word) if word.keyword == "BEGIN" => true,
759            _ => false,
760        });
761        self.next_token();
762        self.take_until(20000, |parser: &mut Parser<R, H, CH>, token| match token {
763            Token::Word(_) if parser.peek_if_control_flow_start() => {
764                parser.take_control_flow_block();
765                true
766            }
767            Token::Word(word) if word.keyword == "END" => false,
768            _ => true,
769        });
770    }
771
772    fn take_control_flow_block(&mut self) {
773        let end_tokens = match self.next_token() {
774            Some(Token::Word(word)) if word.keyword == "IF" => vec!["END", "IF"],
775            Some(Token::Word(word)) if word.keyword == "LOOP" => vec!["END", "LOOP"],
776            Some(Token::Word(word)) if word.keyword == "BEGIN" => vec!["END"],
777            _ => return,
778        };
779
780        loop {
781            match self.peek_token() {
782                Some(Token::Word(_)) if self.peek_if_control_flow_start() => {
783                    self.take_control_flow_block();
784                }
785                Some(_) => {
786                    if self.parse_keywords(&end_tokens) {
787                        return;
788                    }
789
790                    self.next_token();
791                }
792                None => break,
793            }
794        }
795    }
796
797    fn peek_if_control_flow_start(&mut self) -> bool {
798        match self.peek_token() {
799            Some(Token::Word(word))
800                if word.keyword == "IF" || word.keyword == "LOOP" || word.keyword == "BEGIN" =>
801            {
802                true
803            }
804            _ => false,
805        }
806    }
807
808    fn parse_create_table(&mut self) -> Result<(), ParserError> {
809        let table_name = self.parse_object_name()?;
810        self.context.started_create_table(format!("{}", table_name));
811        // parse optional column list (schema)
812        let (_columns, _constraints) = self.parse_columns()?;
813
814        let _with_options = self.parse_with_options()?;
815
816        self.context.ended_create_table();
817
818        Ok(())
819    }
820
821    fn take_until<F>(&mut self, max: usize, check_fn: F)
822    where
823        F: Fn(&mut Parser<R, H, CH>, &Token) -> bool,
824    {
825        for _ in 0..max {
826            match self.peek_token() {
827                Some(token) if check_fn(self, &token) => self.next_token(),
828                _ => return,
829            };
830        }
831    }
832
833    fn parse_columns(&mut self) -> Result<(Vec<ColumnDef>, Vec<TableConstraint>), ParserError> {
834        let mut columns = vec![];
835        let mut constraints = vec![];
836        if !self.consume_token(&Token::LParen) || self.consume_token(&Token::RParen) {
837            return Ok((columns, constraints));
838        }
839
840        loop {
841            if let Some(constraint) = self.parse_optional_table_constraint()? {
842                debug!("Is a optional table constrain! {:?}", constraint);
843                constraints.push(constraint);
844            } else if let Some(Token::Word(column_name)) = self.peek_token() {
845                self.context
846                    .started_column_definition(format!("{}", column_name), columns.len());
847
848                self.next_token();
849
850                self.execute_value_handler();
851
852                let data_type = self.parse_data_type()?;
853
854                let data_config = if let Some(Token::LParen) = self.peek_token() {
855                    self.parse_data_config()?
856                } else {
857                    vec![]
858                };
859
860                if data_type == DataType::Int
861                    || data_type == DataType::BigInt
862                    || data_type == DataType::SmallInt
863                {
864                    let _ = self.parse_keyword("UNSIGNED");
865                    let _ = self.parse_keyword("SIGNED");
866                }
867
868                let mut options = vec![];
869                loop {
870                    match self.peek_token() {
871                        None | Some(Token::Comma) | Some(Token::RParen) => break,
872                        _ => options.push(self.parse_column_option_def()?),
873                    }
874                }
875
876                columns.push(ColumnDef {
877                    name: column_name.to_ident(),
878                    data_type,
879                    data_config,
880                    options,
881                });
882
883                self.context.ended_column_definition();
884            } else {
885                let token = self.peek_token();
886                return self.expected("column name or constraint definition", token);
887            }
888            let comma = self.consume_token(&Token::Comma);
889            if self.consume_token(&Token::RParen) {
890                // allow a trailing comma, even though it's not in standard
891                break;
892            } else if !comma {
893                let token = self.peek_token();
894                return self.expected("',' or ')' after column definition", token);
895            }
896        }
897
898        Ok((columns, constraints))
899    }
900
901    fn parse_column_option_def(&mut self) -> Result<ColumnOptionDef, ParserError> {
902        let name = if self.parse_keyword("CONSTRAINT") {
903            Some(self.parse_identifier()?)
904        } else {
905            None
906        };
907
908        let option = if self.parse_keywords(&["NOT", "NULL"]) {
909            ColumnOption::NotNull
910        } else if self.parse_keywords(&["CHARACTER", "SET"]) {
911            self.parse_object_name()?;
912            ColumnOption::NotNull
913        } else if self.parse_keyword("NULL") {
914            ColumnOption::Null
915        } else if self.parse_keyword("COMMENT") {
916            self.next_token();
917            ColumnOption::Comment
918        } else if self.parse_keyword("COLLATE") {
919            self.parse_object_name()?;
920            ColumnOption::Collate
921        } else if self.parse_keyword("AUTO_INCREMENT") {
922            ColumnOption::Autoincrement
923        } else if self.parse_keyword("DEFAULT") {
924            ColumnOption::Default(self.parse_expr()?)
925        } else if self.parse_keywords(&["PRIMARY", "KEY"]) {
926            ColumnOption::Unique { is_primary: true }
927        } else if self.parse_keyword("UNIQUE") {
928            ColumnOption::Unique { is_primary: false }
929        } else if self.parse_keywords(&["ON", "UPDATE"]) {
930            ColumnOption::Default(self.parse_expr()?)
931        } else if self.parse_keyword("REFERENCES") {
932            let foreign_table = self.parse_object_name()?;
933            let referred_columns = self.parse_parenthesized_column_list(Mandatory)?;
934            ColumnOption::ForeignKey {
935                foreign_table,
936                referred_columns,
937            }
938        } else if self.parse_keyword("CHECK") {
939            self.expect_token(&Token::LParen)?;
940            let expr = self.parse_expr()?;
941            self.expect_token(&Token::RParen)?;
942            ColumnOption::Check(expr)
943        } else {
944            let token = self.peek_token();
945            return self.expected("column option", token);
946        };
947
948        let column_definition = ColumnOptionDef { name, option };
949
950        Ok(column_definition)
951    }
952
953    fn parse_optional_table_constraint(&mut self) -> Result<Option<TableConstraint>, ParserError> {
954        let name = if self.parse_keyword("CONSTRAINT") {
955            Some(self.parse_identifier()?)
956        } else {
957            None
958        };
959        match self.next_token() {
960            Some(Token::Word(ref k))
961                if k.keyword == "PRIMARY"
962                    || k.keyword == "UNIQUE"
963                    || k.keyword == "KEY"
964                    || k.keyword == "FULLTEXT" =>
965            {
966                let is_primary = k.keyword == "PRIMARY";
967
968                if k.keyword == "UNIQUE" || k.keyword == "FULLTEXT" || k.keyword == "PRIMARY" {
969                    let _ = self.parse_keyword("KEY");
970                }
971
972                let _index_name = match self.peek_token() {
973                    Some(Token::Word(word)) if word.keyword == "" => self.next_token(),
974                    _ => None,
975                };
976
977                let columns = self.parse_parenthesized_column_list(Mandatory)?;
978                Ok(Some(TableConstraint::Unique {
979                    name,
980                    columns,
981                    is_primary,
982                }))
983            }
984            Some(Token::Word(ref k)) if k.keyword == "FOREIGN" => {
985                self.expect_keyword("KEY")?;
986                let columns = self.parse_parenthesized_column_list(Mandatory)?;
987                self.expect_keyword("REFERENCES")?;
988                let foreign_table = self.parse_object_name()?;
989                let referred_columns = self.parse_parenthesized_column_list(Mandatory)?;
990
991                // TODO: Match these configs into memory
992                while self.parse_keyword("ON") {
993                    let identifier = self.parse_identifier()?;
994                    if identifier.value != "DELETE" && identifier.value != "UPDATE" {
995                        return self
996                            .expected("DELETE, UPDATE", Some(Token::Word(identifier.to_word())));
997                    }
998
999                    let identifier = self.parse_identifier()?;
1000
1001                    match identifier.value.as_str() {
1002                        "RESTRICT" | "CASCADE" => {
1003                            continue;
1004                        }
1005                        "SET" | "NO" => match self.peek_token() {
1006                            Some(Token::Word(word))
1007                                if word.keyword == "NULL"
1008                                    || word.keyword == "ACTION"
1009                                    || word.keyword == "DEFAULT" =>
1010                            {
1011                                self.next_token();
1012                            }
1013                            Some(token) => {
1014                                return self.expected("NULL, ACTION, DEFAULT", Some(token))
1015                            }
1016                            None => {
1017                                return parser_err!(
1018                                    "Expecting a NULL, ACTION, DEFAULT but found EOF"
1019                                )
1020                            }
1021                        },
1022                        _ => {
1023                            return self.expected(
1024                                "RESTRICT, CASCADE, SET, NO",
1025                                Some(Token::Word(identifier.to_word())),
1026                            );
1027                        }
1028                    }
1029                }
1030
1031                Ok(Some(TableConstraint::ForeignKey {
1032                    name,
1033                    columns,
1034                    foreign_table,
1035                    referred_columns,
1036                }))
1037            }
1038            Some(Token::Word(ref k)) if k.keyword == "CHECK" => {
1039                self.expect_token(&Token::LParen)?;
1040                let expr = Box::new(self.parse_expr()?);
1041                self.expect_token(&Token::RParen)?;
1042                Ok(Some(TableConstraint::Check { name, expr }))
1043            }
1044            unexpected => {
1045                if name.is_some() {
1046                    self.expected("PRIMARY, UNIQUE, FOREIGN, or CHECK", unexpected)
1047                } else {
1048                    self.prev_token();
1049                    Ok(None)
1050                }
1051            }
1052        }
1053    }
1054
1055    fn parse_with_options(&mut self) -> Result<Vec<SqlOption>, ParserError> {
1056        if self.parse_keyword("WITH") {
1057            self.expect_token(&Token::LParen)?;
1058            let options = self.parse_comma_separated(|parser| parser.parse_sql_option())?;
1059            self.expect_token(&Token::RParen)?;
1060            Ok(options)
1061        } else {
1062            match self.peek_token() {
1063                Some(Token::Word(word)) if word.keyword != "" => self.parse_mysql_table_options(),
1064                _ => Ok(vec![]),
1065            }
1066        }
1067    }
1068
1069    fn parse_mysql_table_options(&mut self) -> Result<Vec<SqlOption>, ParserError> {
1070        let mut options: Vec<SqlOption> = vec![];
1071
1072        loop {
1073            let _ = self.parse_keyword("DEFAULT");
1074            match self.peek_token() {
1075                Some(Token::Word(word)) if word.keyword != "" => {}
1076                _ => {
1077                    break;
1078                }
1079            }
1080
1081            let name = self.parse_identifier()?;
1082            self.expect_token(&Token::Eq)?;
1083            let value = self.parse_value()?;
1084            options.push(SqlOption { name, value });
1085        }
1086
1087        Ok(options)
1088    }
1089
1090    fn parse_sql_option(&mut self) -> Result<SqlOption, ParserError> {
1091        let name = self.parse_identifier()?;
1092        self.expect_token(&Token::Eq)?;
1093        let value = self.parse_value()?;
1094        Ok(SqlOption { name, value })
1095    }
1096
1097    /// Parse a literal value (numbers, strings, date/time, booleans)
1098    fn parse_value(&mut self) -> Result<Value, ParserError> {
1099        let token = self.next_token();
1100
1101        if let SQLContextType::Insert(InsertContext::Value(_)) = self.context.context {
1102            self.execute_value_handler();
1103        }
1104
1105        match token {
1106            Some(t) => match t {
1107                Token::Word(k) => match k.keyword.as_ref() {
1108                    "TRUE" => Ok(Value::Boolean(true)),
1109                    "FALSE" => Ok(Value::Boolean(false)),
1110                    "NULL" => Ok(Value::Null),
1111                    "" => Ok(Value::Identifier(Ident {
1112                        value: k.value,
1113                        quote_style: None,
1114                    })),
1115                    "CSV" => Ok(Value::Identifier(Ident {
1116                        value: k.value,
1117                        quote_style: None,
1118                    })),
1119                    _ => {
1120                        return parser_err!(format!("No value parser for keyword {}", k.keyword));
1121                    }
1122                },
1123                // The call to n.parse() returns a bigdecimal when the
1124                // bigdecimal feature is enabled, and is otherwise a no-op
1125                // (i.e., it returns the input string).
1126                Token::Number(ref n) => match n.parse() {
1127                    Ok(n) => Ok(Value::Number(n)),
1128                    Err(e) => parser_err!(format!("Could not parse '{}' as number: {}", n, e)),
1129                },
1130                Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())),
1131                Token::NationalStringLiteral(ref s) => {
1132                    Ok(Value::NationalStringLiteral(s.to_string()))
1133                }
1134                Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())),
1135                _ => parser_err!(format!("Unsupported value: {:?}", t)),
1136            },
1137            None => parser_err!("Expecting a value, but found EOF"),
1138        }
1139    }
1140
1141    /// Parse an unsigned literal integer/long
1142    fn parse_literal_uint(&mut self) -> Result<u64, ParserError> {
1143        match self.next_token() {
1144            Some(Token::Number(s)) => s.parse::<u64>().map_err(|e| {
1145                ParserError::ParserError(format!("Could not parse '{}' as u64: {}", s, e))
1146            }),
1147            other => self.expected("literal int", other),
1148        }
1149    }
1150
1151    /// Parse a SQL datatype (in the context of a CREATE TABLE statement for example)
1152    fn parse_data_type(&mut self) -> Result<DataType, ParserError> {
1153        match self.next_token() {
1154            Some(Token::Word(k)) => match k.keyword.as_ref() {
1155                "BOOLEAN" => Ok(DataType::Boolean),
1156                "FLOAT" => Ok(DataType::Float(self.parse_optional_precision()?)),
1157                "REAL" => Ok(DataType::Real),
1158                "DOUBLE" => {
1159                    let _ = self.parse_keyword("PRECISION");
1160                    Ok(DataType::Double)
1161                }
1162                //TODO: Extend the types to recognize these culumn values
1163                "SMALLINT" | "TINYINT" | "MEDIUMINT" => Ok(DataType::SmallInt),
1164                "INT" | "INTEGER" => Ok(DataType::Int),
1165                "BIGINT" => Ok(DataType::BigInt),
1166                "VARCHAR" => Ok(DataType::Varchar(self.parse_optional_precision()?)),
1167                "CHAR" | "CHARACTER" => {
1168                    if self.parse_keyword("VARYING") {
1169                        Ok(DataType::Varchar(self.parse_optional_precision()?))
1170                    } else {
1171                        Ok(DataType::Char(self.parse_optional_precision()?))
1172                    }
1173                }
1174                "UUID" => Ok(DataType::Uuid),
1175                "DATE" => Ok(DataType::Date),
1176                "TIMESTAMP" => {
1177                    // TBD: we throw away "with/without timezone" information
1178                    if self.parse_keyword("WITH") || self.parse_keyword("WITHOUT") {
1179                        self.expect_keywords(&["TIME", "ZONE"])?;
1180                    }
1181                    Ok(DataType::Timestamp)
1182                }
1183                "TIME" => {
1184                    // TBD: we throw away "with/without timezone" information
1185                    if self.parse_keyword("WITH") || self.parse_keyword("WITHOUT") {
1186                        self.expect_keywords(&["TIME", "ZONE"])?;
1187                    }
1188                    Ok(DataType::Time)
1189                }
1190                // Interval types can be followed by a complicated interval
1191                // qualifier that we don't currently support. See
1192                // parse_interval_literal for a taste.
1193                "INTERVAL" => Ok(DataType::Interval),
1194                "REGCLASS" => Ok(DataType::Regclass),
1195                "TEXT" => {
1196                    if self.consume_token(&Token::LBracket) {
1197                        // Note: this is postgresql-specific
1198                        self.expect_token(&Token::RBracket)?;
1199                        Ok(DataType::Array(Box::new(DataType::Text)))
1200                    } else {
1201                        Ok(DataType::Text)
1202                    }
1203                }
1204                "BYTEA" => Ok(DataType::Bytea),
1205                "NUMERIC" | "DECIMAL" | "DEC" => {
1206                    let (precision, scale) = self.parse_optional_precision_scale()?;
1207                    Ok(DataType::Decimal(precision, scale))
1208                }
1209                _ => {
1210                    self.prev_token();
1211                    let type_name = self.parse_object_name()?;
1212                    Ok(DataType::Custom(type_name))
1213                }
1214            },
1215            other => self.expected("a data type name", other),
1216        }
1217    }
1218
1219    /// Parse a SQL datatype config (in the context of a CREATE TABLE statement for example)
1220    fn parse_data_config(&mut self) -> Result<Vec<Value>, ParserError> {
1221        self.expect_token(&Token::LParen)?;
1222        let values = self.parse_comma_separated(|parser| parser.parse_value())?;
1223        self.expect_token(&Token::RParen)?;
1224        Ok(values)
1225    }
1226
1227    /// Parse a possibly qualified, possibly quoted identifier, e.g.
1228    /// `foo` or `myschema."table"`
1229    fn parse_object_name(&mut self) -> Result<ObjectName, ParserError> {
1230        let mut idents = vec![];
1231        loop {
1232            idents.push(self.parse_identifier()?);
1233            if !self.consume_token(&Token::Period) {
1234                break;
1235            }
1236        }
1237        Ok(ObjectName(idents))
1238    }
1239
1240    /// Parse a simple one-word identifier (possibly quoted, possibly a keyword)
1241    fn parse_identifier(&mut self) -> Result<Ident, ParserError> {
1242        match self.next_token() {
1243            Some(Token::Word(w)) => Ok(w.to_ident()),
1244            unexpected => self.expected("identifier", unexpected),
1245        }
1246    }
1247
1248    /// Parse a parenthesized comma-separated list of unqualified, possibly quoted identifiers
1249    fn parse_parenthesized_column_list(
1250        &mut self,
1251        optional: IsOptional,
1252    ) -> Result<Vec<Ident>, ParserError> {
1253        if self.consume_token(&Token::LParen) {
1254            let cols = self.parse_comma_separated(|parser| {
1255                let ident = parser.parse_identifier();
1256                if let Some(Token::LParen) = parser.peek_token() {
1257                    parser.next_token();
1258                    let _ = parser.parse_value();
1259                    parser.expect_token(&Token::RParen)?;
1260                };
1261                ident
1262            })?;
1263            self.expect_token(&Token::RParen)?;
1264            Ok(cols)
1265        } else if optional == Optional {
1266            Ok(vec![])
1267        } else {
1268            let token = self.peek_token();
1269            self.expected("a list of columns in parentheses", token)
1270        }
1271    }
1272
1273    fn parse_optional_precision(&mut self) -> Result<Option<u64>, ParserError> {
1274        if self.consume_token(&Token::LParen) {
1275            let n = self.parse_literal_uint()?;
1276            self.expect_token(&Token::RParen)?;
1277            Ok(Some(n))
1278        } else {
1279            Ok(None)
1280        }
1281    }
1282
1283    fn parse_optional_precision_scale(
1284        &mut self,
1285    ) -> Result<(Option<u64>, Option<u64>), ParserError> {
1286        if self.consume_token(&Token::LParen) {
1287            let n = self.parse_literal_uint()?;
1288            let scale = if self.consume_token(&Token::Comma) {
1289                Some(self.parse_literal_uint()?)
1290            } else {
1291                None
1292            };
1293            self.expect_token(&Token::RParen)?;
1294            Ok((Some(n), scale))
1295        } else {
1296            Ok((None, None))
1297        }
1298    }
1299
1300    /// Parse a query expression, i.e. a `SELECT` statement optionally
1301    /// preceeded with some `WITH` CTE declarations and optionally followed
1302    /// by `ORDER BY`. Unlike some other parse_... methods, this one doesn't
1303    /// expect the initial keyword to be already consumed
1304    fn parse_query(&mut self) -> Result<Query, ParserError> {
1305        let ctes = vec![];
1306
1307        let body = self.parse_query_body(0)?;
1308
1309        let order_by = vec![];
1310
1311        let limit = None;
1312
1313        let offset = None;
1314
1315        let fetch = None;
1316
1317        Ok(Query {
1318            ctes,
1319            body,
1320            limit,
1321            order_by,
1322            offset,
1323            fetch,
1324        })
1325    }
1326
1327    /// Parse a "query body", which is an expression with roughly the
1328    /// following grammar:
1329    /// ```text
1330    ///   query_body ::= restricted_select | '(' subquery ')' | set_operation
1331    ///   restricted_select ::= 'SELECT' [expr_list] [ from ] [ where ] [ groupby_having ]
1332    ///   subquery ::= query_body [ order_by_limit ]
1333    ///   set_operation ::= query_body { 'UNION' | 'EXCEPT' | 'INTERSECT' } [ 'ALL' ] query_body
1334    /// ```
1335    fn parse_query_body(&mut self, _precedence: u8) -> Result<SetExpr, ParserError> {
1336        // We parse the expression using a Pratt parser, as in `parse_expr()`.
1337        // Start by parsing a restricted SELECT or a `(subquery)`:
1338        let expr = if self.parse_keyword("VALUES") {
1339            SetExpr::Values(self.parse_values()?)
1340        } else {
1341            let token = self.peek_token();
1342            return self.expected("VALUES", token);
1343        };
1344
1345        Ok(expr)
1346    }
1347
1348    /// Parse an INSERT statement
1349    fn parse_insert(&mut self) -> Result<(), ParserError> {
1350        if !self.is_after_newline() {
1351            return Err(ParserError::Ignored);
1352        }
1353        self.expect_keyword("INTO")?;
1354        self.context.started_insert();
1355        let table_name = self.parse_object_name()?;
1356
1357        self.context.started_insert_table(format!("{}", table_name));
1358
1359        let _columns = self.parse_parenthesized_column_list(Optional)?;
1360        let _source = Box::new(self.parse_query()?);
1361
1362        self.context.ended_insert_table();
1363        self.context.ended_insert();
1364
1365        Ok(())
1366    }
1367
1368    fn parse_values(&mut self) -> Result<Values, ParserError> {
1369        let _values = self.parse_comma_separated(|parser| {
1370            parser.expect_token(&Token::LParen)?;
1371            let mut counter = 0;
1372            let exprs = parser.parse_comma_separated(|parser| {
1373                parser.context.started_insert_value(counter);
1374                counter += 1;
1375                let value = parser.parse_expr();
1376                parser.context.ended_insert_value();
1377                value
1378            })?;
1379            parser.expect_token(&Token::RParen)?;
1380            Ok(exprs)
1381        })?;
1382        Ok(Values(vec![]))
1383        //Ok(Values(values))
1384    }
1385}
1386
1387impl Word {
1388    fn to_ident(&self) -> Ident {
1389        Ident {
1390            value: self.value.clone(),
1391            quote_style: self.quote_style,
1392        }
1393    }
1394}
1395
1396impl Ident {
1397    fn to_word(&self) -> Word {
1398        Word {
1399            value: self.value.clone(),
1400            quote_style: self.quote_style,
1401            keyword: self.value.clone(),
1402        }
1403    }
1404}