sql_parse/
parser.rs

1// Licensed under the Apache License, Version 2.0 (the "License");
2// you may not use this file except in compliance with the License.
3// You may obtain a copy of the License at
4//
5// http://www.apache.org/licenses/LICENSE-2.0
6//
7// Unless required by applicable law or agreed to in writing, software
8// distributed under the License is distributed on an "AS IS" BASIS,
9// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10// See the License for the specific language governing permissions and
11// limitations under the License.
12
13use alloc::{borrow::Cow, format, string::String, vec::Vec};
14
15use crate::{
16    issue::{IssueHandle, Issues},
17    keywords::Keyword,
18    lexer::{Lexer, Token},
19    Identifier, ParseOptions, SString, Span, Spanned,
20};
21
22#[derive(Debug)]
23pub(crate) enum ParseError {
24    Unrecovered,
25}
26
27pub(crate) struct Parser<'a, 'b> {
28    pub(crate) token: Token<'a>,
29    pub(crate) span: Span,
30    pub(crate) lexer: Lexer<'a>,
31    pub(crate) issues: &'b mut Issues<'a>,
32    pub(crate) arg: usize,
33    pub(crate) delimiter: Token<'a>,
34    pub(crate) options: &'b ParseOptions,
35    pub(crate) permit_compound_statements: bool,
36}
37
38pub(crate) fn decode_single_quoted_string(s: &str) -> Cow<'_, str> {
39    if !s.contains('\'') && !s.contains('\\') {
40        s.into()
41    } else {
42        let mut r = String::new();
43        let mut chars = s.chars();
44        loop {
45            match chars.next() {
46                None => break,
47                Some('\'') => {
48                    chars.next();
49                    r.push('\'');
50                }
51                Some(c) => r.push(c),
52            }
53        }
54        r.into()
55    }
56}
57
58pub(crate) fn decode_double_quoted_string(s: &str) -> Cow<'_, str> {
59    if !s.contains('"') && !s.contains('\\') {
60        s.into()
61    } else {
62        let mut r = String::new();
63        let mut chars = s.chars();
64        loop {
65            match chars.next() {
66                None => break,
67                Some('\'') => {
68                    chars.next();
69                    r.push('\'');
70                }
71                Some(c) => r.push(c),
72            }
73        }
74        r.into()
75    }
76}
77
78impl<'a, 'b> Parser<'a, 'b> {
79    pub(crate) fn new(src: &'a str, issues: &'b mut Issues<'a>, options: &'b ParseOptions) -> Self {
80        let mut lexer = Lexer::new(src);
81        let (token, span) = lexer.next_token();
82        Self {
83            token,
84            span,
85            lexer,
86            issues,
87            arg: 0,
88            delimiter: Token::SemiColon,
89            options,
90            permit_compound_statements: false,
91        }
92    }
93
94    pub(crate) fn recover(
95        &mut self,
96        success: impl Fn(&Token<'a>) -> bool,
97        fail: impl Fn(&Token<'a>) -> bool,
98    ) -> Result<(), ParseError> {
99        let mut brackets = Vec::new();
100        loop {
101            match &self.token {
102                t if brackets.is_empty() && success(t) => return Ok(()),
103                Token::Eof => return Err(ParseError::Unrecovered),
104                t if t == &self.delimiter => return Err(ParseError::Unrecovered),
105                t if brackets.is_empty() && fail(t) => return Err(ParseError::Unrecovered),
106                Token::LParen => {
107                    brackets.push(Token::LParen);
108                    self.next();
109                }
110                Token::LBracket => {
111                    brackets.push(Token::LBracket);
112                    self.next();
113                }
114                Token::LBrace => {
115                    brackets.push(Token::LBrace);
116                    self.next();
117                }
118                Token::RBrace => {
119                    self.next();
120                    while let Some(v) = brackets.pop() {
121                        if v == Token::LBrace {
122                            break;
123                        }
124                    }
125                }
126                Token::RBracket => {
127                    self.next();
128                    while let Some(v) = brackets.pop() {
129                        if v == Token::LBracket {
130                            break;
131                        }
132                    }
133                }
134                Token::RParen => {
135                    self.next();
136                    while let Some(v) = brackets.pop() {
137                        if v == Token::LParen {
138                            break;
139                        }
140                    }
141                }
142                _ => self.next(),
143            }
144        }
145    }
146
147    pub(crate) fn recovered<T: Default>(
148        &mut self,
149        expected: &'static str,
150        end: &impl Fn(&Token<'a>) -> bool,
151        fun: impl FnOnce(&mut Self) -> Result<T, ParseError>,
152    ) -> Result<T, ParseError> {
153        let ans = match fun(self) {
154            Ok(v) => v,
155            Err(_) => {
156                self.recover(end, |_| false)?;
157                T::default()
158            }
159        };
160        if !end(&self.token) {
161            self.expected_error(expected);
162            self.recover(end, |_| false)?;
163        }
164        Ok(ans)
165    }
166
167    pub(crate) fn read_from_stdin_and_next(&mut self) -> (&'a str, Span) {
168        let stdin = self.lexer.read_from_stdin();
169        let (token, span) = self.lexer.next_token();
170        self.token = token;
171        self.span = span;
172        stdin
173    }
174
175    pub(crate) fn next(&mut self) {
176        let (token, span) = self.lexer.next_token();
177        self.token = token;
178        self.span = span;
179    }
180
181    pub(crate) fn expected_error(&mut self, name: &'static str) {
182        self.err(format!("Expected '{}' here", name), &self.span.span());
183    }
184
185    pub(crate) fn err(
186        &mut self,
187        message: impl Into<Cow<'static, str>>,
188        span: &impl Spanned,
189    ) -> IssueHandle<'a, '_> {
190        self.issues.err(message, span)
191    }
192
193    pub(crate) fn warn(
194        &mut self,
195        message: impl Into<Cow<'static, str>>,
196        span: &impl Spanned,
197    ) -> IssueHandle<'a, '_> {
198        self.issues.warn(message, span)
199    }
200
201    pub(crate) fn expected_failure<T>(&mut self, name: &'static str) -> Result<T, ParseError> {
202        self.expected_error(name);
203        Err(ParseError::Unrecovered)
204    }
205
206    pub(crate) fn token_to_plain_identifier(
207        &mut self,
208        token: &Token<'a>,
209        span: Span,
210    ) -> Result<Identifier<'a>, ParseError> {
211        match &token {
212            Token::Ident(v, kw) => {
213                let v = *v;
214                if kw.reserved() {
215                    self.err(
216                        format!("'{}' is a reserved identifier use `{}`", v, v),
217                        &span,
218                    );
219                } else if kw != &Keyword::QUOTED_IDENTIFIER
220                    && self.options.warn_unquoted_identifiers
221                {
222                    self.warn(format!("identifiers should be quoted as `{}`", v), &span);
223                }
224                Ok(Identifier::new(v, span))
225            }
226            _ => self.expected_failure("identifier"),
227        }
228    }
229
230    pub(crate) fn consume_plain_identifier(&mut self) -> Result<Identifier<'a>, ParseError> {
231        match &self.token {
232            Token::Ident(v, kw) => {
233                let v = *v;
234                if kw.reserved() {
235                    self.err(
236                        format!("'{}' is a reserved identifier use `{}`", v, v),
237                        &self.span.span(),
238                    );
239                } else if kw != &Keyword::QUOTED_IDENTIFIER
240                    && self.options.warn_unquoted_identifiers
241                {
242                    self.err(
243                        format!("identifiers should be quoted as `{}`", v),
244                        &self.span.span(),
245                    );
246                } else if kw == &Keyword::QUOTED_IDENTIFIER && self.options.dialect.is_postgresql()
247                {
248                    self.err(
249                        "quoted identifiers not supported by postgresql",
250                        &self.span.span(),
251                    );
252                }
253                Ok(Identifier::new(v, self.consume()))
254            }
255            Token::DoubleQuotedString(v) if self.options.dialect.is_postgresql() => {
256                Ok(Identifier::new(v, self.consume()))
257            }
258            _ => self.expected_failure("identifier"),
259        }
260    }
261
262    pub(crate) fn consume_keyword(&mut self, keyword: Keyword) -> Result<Span, ParseError> {
263        match &self.token {
264            Token::Ident(v, kw) if kw == &keyword => {
265                if !v.chars().all(|c| c.is_ascii_uppercase())
266                    && self.options.warn_none_capital_keywords
267                {
268                    self.warn(
269                        format!(
270                            "keyword {} should be in ALL CAPS {}",
271                            v,
272                            v.to_ascii_uppercase()
273                        ),
274                        &self.span.span(),
275                    );
276                }
277                Ok(self.consume())
278            }
279            _ => self.expected_failure(keyword.name()),
280        }
281    }
282
283    pub(crate) fn consume_keywords(&mut self, keywords: &[Keyword]) -> Result<Span, ParseError> {
284        let mut span = self.consume_keyword(keywords[0])?;
285        for keyword in &keywords[1..] {
286            span = self.consume_keyword(*keyword)?.join_span(&span);
287        }
288        Ok(span)
289    }
290
291    pub(crate) fn skip_keyword(&mut self, keyword: Keyword) -> Option<Span> {
292        match &self.token {
293            Token::Ident(_, kw) if kw == &keyword => Some(self.consume_keyword(keyword).unwrap()),
294            _ => None,
295        }
296    }
297
298    pub(crate) fn consume_token(&mut self, token: Token) -> Result<Span, ParseError> {
299        if self.token != token {
300            self.expected_failure(token.name())
301        } else {
302            Ok(self.consume())
303        }
304    }
305
306    pub(crate) fn skip_token(&mut self, token: Token) -> Option<Span> {
307        if self.token != token {
308            None
309        } else {
310            Some(self.consume())
311        }
312    }
313
314    pub(crate) fn consume(&mut self) -> Span {
315        let span = self.span.clone();
316        self.next();
317        span
318    }
319
320    pub(crate) fn consume_string(&mut self) -> Result<SString<'a>, ParseError> {
321        let (mut a, mut b) = match &self.token {
322            Token::SingleQuotedString(v) => {
323                let v = *v;
324                let span = self.span.clone();
325                self.next();
326                (decode_single_quoted_string(v), span)
327            }
328            Token::DoubleQuotedString(v) => {
329                let v = *v;
330                let span = self.span.clone();
331                self.next();
332                (decode_double_quoted_string(v), span)
333            }
334            _ => self.expected_failure("string")?,
335        };
336        loop {
337            match self.token {
338                Token::SingleQuotedString(v) => {
339                    b = b.join_span(&self.span);
340                    a.to_mut().push_str(decode_single_quoted_string(v).as_ref());
341                    self.next();
342                }
343                Token::DoubleQuotedString(v) => {
344                    b = b.join_span(&self.span);
345                    a.to_mut().push_str(decode_double_quoted_string(v).as_ref());
346                    self.next();
347                }
348                _ => break,
349            }
350        }
351        Ok(SString::new(a, b))
352    }
353
354    pub(crate) fn consume_int<T: core::str::FromStr + Default>(
355        &mut self,
356    ) -> Result<(T, Span), ParseError> {
357        match &self.token {
358            Token::Integer(v) => {
359                let v = match v.parse() {
360                    Ok(v) => v,
361                    Err(_) => self.err_here("integer outside range").unwrap_or_default(),
362                };
363                let span = self.span.clone();
364                self.next();
365                Ok((v, span))
366            }
367            _ => self.expected_failure("integer"),
368        }
369    }
370
371    pub(crate) fn consume_float<T: core::str::FromStr + Default>(
372        &mut self,
373    ) -> Result<(T, Span), ParseError> {
374        match &self.token {
375            Token::Float(v) => {
376                let v = match v.parse() {
377                    Ok(v) => v,
378                    Err(_) => self.err_here("float outside range").unwrap_or_default(),
379                };
380                let span = self.span.clone();
381                self.next();
382                Ok((v, span))
383            }
384            _ => self.expected_failure("float"),
385        }
386    }
387
388    pub(crate) fn err_here<T>(
389        &mut self,
390        message: impl Into<Cow<'static, str>>,
391    ) -> Result<T, ParseError> {
392        self.err(message, &self.span.span());
393        Err(ParseError::Unrecovered)
394    }
395
396    pub(crate) fn ice<T>(&mut self, file: &'static str, line: u32) -> Result<T, ParseError> {
397        self.err_here(format!("Internal compiler error at {}:{}", file, line))
398    }
399
400    pub(crate) fn todo<T>(&mut self, file: &'static str, line: u32) -> Result<T, ParseError> {
401        self.err_here(format!("Not yet implemented at {}:{}", file, line))
402    }
403}