sql_parse/
parser.rs

1// Licensed under the Apache License, Version 2.0 (the "License");
2// you may not use this file except in compliance with the License.
3// You may obtain a copy of the License at
4//
5// http://www.apache.org/licenses/LICENSE-2.0
6//
7// Unless required by applicable law or agreed to in writing, software
8// distributed under the License is distributed on an "AS IS" BASIS,
9// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10// See the License for the specific language governing permissions and
11// limitations under the License.
12
13use alloc::{borrow::Cow, format, string::String, vec::Vec};
14
15use crate::{
16    issue::{IssueHandle, Issues},
17    keywords::Keyword,
18    lexer::{Lexer, Token},
19    Identifier, ParseOptions, SString, Span, Spanned,
20};
21
22#[derive(Debug)]
23pub(crate) enum ParseError {
24    Unrecovered,
25}
26
27pub(crate) struct Parser<'a, 'b> {
28    pub(crate) token: Token<'a>,
29    pub(crate) peeked_token: Option<(Token<'a>, Span)>,
30    pub(crate) span: Span,
31    pub(crate) lexer: Lexer<'a>,
32    pub(crate) issues: &'b mut Issues<'a>,
33    pub(crate) arg: usize,
34    pub(crate) delimiter: Token<'a>,
35    pub(crate) options: &'b ParseOptions,
36    pub(crate) permit_compound_statements: bool,
37}
38
39pub(crate) fn decode_single_quoted_string(s: &str) -> Cow<'_, str> {
40    if !s.contains('\'') && !s.contains('\\') {
41        s.into()
42    } else {
43        let mut r = String::new();
44        let mut chars = s.chars();
45        loop {
46            match chars.next() {
47                None => break,
48                Some('\'') => {
49                    chars.next();
50                    r.push('\'');
51                }
52                Some(c) => r.push(c),
53            }
54        }
55        r.into()
56    }
57}
58
59pub(crate) fn decode_double_quoted_string(s: &str) -> Cow<'_, str> {
60    if !s.contains('"') && !s.contains('\\') {
61        s.into()
62    } else {
63        let mut r = String::new();
64        let mut chars = s.chars();
65        loop {
66            match chars.next() {
67                None => break,
68                Some('\'') => {
69                    chars.next();
70                    r.push('\'');
71                }
72                Some(c) => r.push(c),
73            }
74        }
75        r.into()
76    }
77}
78
79impl<'a, 'b> Parser<'a, 'b> {
80    pub(crate) fn new(src: &'a str, issues: &'b mut Issues<'a>, options: &'b ParseOptions) -> Self {
81        let mut lexer = Lexer::new(src);
82        let (token, span) = lexer.next_token();
83        Self {
84            token,
85            peeked_token: None,
86            span,
87            lexer,
88            issues,
89            arg: 0,
90            delimiter: Token::SemiColon,
91            options,
92            permit_compound_statements: false,
93        }
94    }
95
96    pub(crate) fn recover(
97        &mut self,
98        success: impl Fn(&Token<'a>) -> bool,
99        fail: impl Fn(&Token<'a>) -> bool,
100    ) -> Result<(), ParseError> {
101        let mut brackets = Vec::new();
102        loop {
103            match &self.token {
104                t if brackets.is_empty() && success(t) => return Ok(()),
105                Token::Eof => return Err(ParseError::Unrecovered),
106                t if t == &self.delimiter => return Err(ParseError::Unrecovered),
107                t if brackets.is_empty() && fail(t) => return Err(ParseError::Unrecovered),
108                Token::LParen => {
109                    brackets.push(Token::LParen);
110                    self.next();
111                }
112                Token::LBracket => {
113                    brackets.push(Token::LBracket);
114                    self.next();
115                }
116                Token::LBrace => {
117                    brackets.push(Token::LBrace);
118                    self.next();
119                }
120                Token::RBrace => {
121                    self.next();
122                    while let Some(v) = brackets.pop() {
123                        if v == Token::LBrace {
124                            break;
125                        }
126                    }
127                }
128                Token::RBracket => {
129                    self.next();
130                    while let Some(v) = brackets.pop() {
131                        if v == Token::LBracket {
132                            break;
133                        }
134                    }
135                }
136                Token::RParen => {
137                    self.next();
138                    while let Some(v) = brackets.pop() {
139                        if v == Token::LParen {
140                            break;
141                        }
142                    }
143                }
144                _ => self.next(),
145            }
146        }
147    }
148
149    pub(crate) fn recovered<T: Default>(
150        &mut self,
151        expected: &'static str,
152        end: &impl Fn(&Token<'a>) -> bool,
153        fun: impl FnOnce(&mut Self) -> Result<T, ParseError>,
154    ) -> Result<T, ParseError> {
155        let ans = match fun(self) {
156            Ok(v) => v,
157            Err(_) => {
158                self.recover(end, |_| false)?;
159                T::default()
160            }
161        };
162        if !end(&self.token) {
163            self.expected_error(expected);
164            self.recover(end, |_| false)?;
165        }
166        Ok(ans)
167    }
168
169    pub(crate) fn read_from_stdin_and_next(&mut self) -> (&'a str, Span) {
170        let stdin = self.lexer.read_from_stdin();
171        let (token, span) = self.peeked_token.take().unwrap_or_else(|| self.lexer.next_token());
172        self.token = token;
173        self.span = span;
174        stdin
175    }
176
177    pub(crate) fn next(&mut self) {
178        let (token, span) = self.peeked_token.take().unwrap_or_else(|| self.lexer.next_token());
179        self.token = token;
180        self.span = span;
181    }
182
183    pub(crate) fn peek(&mut self) -> &Token<'a> {
184        if self.peeked_token.is_none() {
185            self.peeked_token = Some(self.lexer.next_token());
186        }
187        &self.peeked_token.as_ref().unwrap().0
188    }
189
190    pub(crate) fn expected_error(&mut self, name: &'static str) {
191        self.err(format!("Expected '{}' here", name), &self.span.span());
192    }
193
194    pub(crate) fn err(
195        &mut self,
196        message: impl Into<Cow<'static, str>>,
197        span: &impl Spanned,
198    ) -> IssueHandle<'a, '_> {
199        self.issues.err(message, span)
200    }
201
202    pub(crate) fn warn(
203        &mut self,
204        message: impl Into<Cow<'static, str>>,
205        span: &impl Spanned,
206    ) -> IssueHandle<'a, '_> {
207        self.issues.warn(message, span)
208    }
209
210    pub(crate) fn expected_failure<T>(&mut self, name: &'static str) -> Result<T, ParseError> {
211        self.expected_error(name);
212        Err(ParseError::Unrecovered)
213    }
214
215    pub(crate) fn token_to_plain_identifier(
216        &mut self,
217        token: &Token<'a>,
218        span: Span,
219    ) -> Result<Identifier<'a>, ParseError> {
220        match &token {
221            Token::Ident(v, kw) => {
222                let v = *v;
223                if kw.reserved() {
224                    self.err(
225                        format!("'{}' is a reserved identifier use `{}`", v, v),
226                        &span,
227                    );
228                } else if kw != &Keyword::QUOTED_IDENTIFIER
229                    && self.options.warn_unquoted_identifiers
230                {
231                    self.warn(format!("identifiers should be quoted as `{}`", v), &span);
232                }
233                Ok(Identifier::new(v, span))
234            }
235            _ => self.expected_failure("identifier"),
236        }
237    }
238
239    pub(crate) fn consume_plain_identifier(&mut self) -> Result<Identifier<'a>, ParseError> {
240        match &self.token {
241            Token::Ident(v, kw) => {
242                let v = *v;
243                if kw.reserved() {
244                    self.err(
245                        format!("'{}' is a reserved identifier use `{}`", v, v),
246                        &self.span.span(),
247                    );
248                } else if kw != &Keyword::QUOTED_IDENTIFIER
249                    && self.options.warn_unquoted_identifiers
250                {
251                    self.err(
252                        format!("identifiers should be quoted as `{}`", v),
253                        &self.span.span(),
254                    );
255                } else if kw == &Keyword::QUOTED_IDENTIFIER && self.options.dialect.is_postgresql()
256                {
257                    self.err(
258                        "quoted identifiers not supported by postgresql",
259                        &self.span.span(),
260                    );
261                }
262                Ok(Identifier::new(v, self.consume()))
263            }
264            Token::DoubleQuotedString(v) if self.options.dialect.is_postgresql() => {
265                Ok(Identifier::new(v, self.consume()))
266            }
267            _ => self.expected_failure("identifier"),
268        }
269    }
270
271    pub(crate) fn consume_keyword(&mut self, keyword: Keyword) -> Result<Span, ParseError> {
272        match &self.token {
273            Token::Ident(v, kw) if kw == &keyword => {
274                if !v.chars().all(|c| c.is_ascii_uppercase())
275                    && self.options.warn_none_capital_keywords
276                {
277                    self.warn(
278                        format!(
279                            "keyword {} should be in ALL CAPS {}",
280                            v,
281                            v.to_ascii_uppercase()
282                        ),
283                        &self.span.span(),
284                    );
285                }
286                Ok(self.consume())
287            }
288            _ => self.expected_failure(keyword.name()),
289        }
290    }
291
292    pub(crate) fn consume_keywords(&mut self, keywords: &[Keyword]) -> Result<Span, ParseError> {
293        let mut span = self.consume_keyword(keywords[0])?;
294        for keyword in &keywords[1..] {
295            span = self.consume_keyword(*keyword)?.join_span(&span);
296        }
297        Ok(span)
298    }
299
300    pub(crate) fn skip_keyword(&mut self, keyword: Keyword) -> Option<Span> {
301        match &self.token {
302            Token::Ident(_, kw) if kw == &keyword => Some(self.consume_keyword(keyword).unwrap()),
303            _ => None,
304        }
305    }
306
307    pub(crate) fn consume_token(&mut self, token: Token) -> Result<Span, ParseError> {
308        if self.token != token {
309            self.expected_failure(token.name())
310        } else {
311            Ok(self.consume())
312        }
313    }
314
315    pub(crate) fn skip_token(&mut self, token: Token) -> Option<Span> {
316        if self.token != token {
317            None
318        } else {
319            Some(self.consume())
320        }
321    }
322
323    pub(crate) fn consume(&mut self) -> Span {
324        let span = self.span.clone();
325        self.next();
326        span
327    }
328
329    pub(crate) fn consume_string(&mut self) -> Result<SString<'a>, ParseError> {
330        let (mut a, mut b) = match &self.token {
331            Token::SingleQuotedString(v) => {
332                let v = *v;
333                let span = self.span.clone();
334                self.next();
335                (decode_single_quoted_string(v), span)
336            }
337            Token::DoubleQuotedString(v) => {
338                let v = *v;
339                let span = self.span.clone();
340                self.next();
341                (decode_double_quoted_string(v), span)
342            }
343            _ => self.expected_failure("string")?,
344        };
345        loop {
346            match self.token {
347                Token::SingleQuotedString(v) => {
348                    b = b.join_span(&self.span);
349                    a.to_mut().push_str(decode_single_quoted_string(v).as_ref());
350                    self.next();
351                }
352                Token::DoubleQuotedString(v) => {
353                    b = b.join_span(&self.span);
354                    a.to_mut().push_str(decode_double_quoted_string(v).as_ref());
355                    self.next();
356                }
357                _ => break,
358            }
359        }
360        Ok(SString::new(a, b))
361    }
362
363    pub(crate) fn consume_int<T: core::str::FromStr + Default>(
364        &mut self,
365    ) -> Result<(T, Span), ParseError> {
366        match &self.token {
367            Token::Integer(v) => {
368                let v = match v.parse() {
369                    Ok(v) => v,
370                    Err(_) => self.err_here("integer outside range").unwrap_or_default(),
371                };
372                let span = self.span.clone();
373                self.next();
374                Ok((v, span))
375            }
376            _ => self.expected_failure("integer"),
377        }
378    }
379
380    pub(crate) fn consume_float<T: core::str::FromStr + Default>(
381        &mut self,
382    ) -> Result<(T, Span), ParseError> {
383        match &self.token {
384            Token::Float(v) => {
385                let v = match v.parse() {
386                    Ok(v) => v,
387                    Err(_) => self.err_here("float outside range").unwrap_or_default(),
388                };
389                let span = self.span.clone();
390                self.next();
391                Ok((v, span))
392            }
393            _ => self.expected_failure("float"),
394        }
395    }
396
397    pub(crate) fn err_here<T>(
398        &mut self,
399        message: impl Into<Cow<'static, str>>,
400    ) -> Result<T, ParseError> {
401        self.err(message, &self.span.span());
402        Err(ParseError::Unrecovered)
403    }
404
405    pub(crate) fn ice<T>(&mut self, file: &'static str, line: u32) -> Result<T, ParseError> {
406        self.err_here(format!("Internal compiler error at {}:{}", file, line))
407    }
408
409    pub(crate) fn todo<T>(&mut self, file: &'static str, line: u32) -> Result<T, ParseError> {
410        self.err_here(format!("Not yet implemented at {}:{}", file, line))
411    }
412}