sql_parse/
parser.rs

1// Licensed under the Apache License, Version 2.0 (the "License");
2// you may not use this file except in compliance with the License.
3// You may obtain a copy of the License at
4//
5// http://www.apache.org/licenses/LICENSE-2.0
6//
7// Unless required by applicable law or agreed to in writing, software
8// distributed under the License is distributed on an "AS IS" BASIS,
9// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10// See the License for the specific language governing permissions and
11// limitations under the License.
12
13use alloc::{borrow::Cow, fmt::Write, format, string::String, vec::Vec};
14
15use crate::{
16    issue::{IssueHandle, Issues},
17    keywords::Keyword,
18    lexer::{Lexer, Token},
19    Identifier, ParseOptions, SString, Span, Spanned,
20};
21
22#[derive(Debug)]
23pub(crate) enum ParseError {
24    Unrecovered,
25}
26
27pub(crate) struct Parser<'a, 'b> {
28    pub(crate) token: Token<'a>,
29    pub(crate) span: Span,
30    pub(crate) lexer: Lexer<'a>,
31    pub(crate) issues: &'b mut Issues<'a>,
32    pub(crate) arg: usize,
33    pub(crate) delimiter: Token<'a>,
34    pub(crate) options: &'b ParseOptions,
35    pub(crate) permit_compound_statements: bool,
36}
37
38pub(crate) fn decode_single_quoted_string(s: &str) -> Cow<'_, str> {
39    if !s.contains('\'') && !s.contains('\\') {
40        s.into()
41    } else {
42        let mut r = String::new();
43        let mut chars = s.chars();
44        loop {
45            match chars.next() {
46                None => break,
47                Some('\'') => {
48                    chars.next();
49                    r.push('\'');
50                }
51                Some(c) => r.push(c),
52            }
53        }
54        r.into()
55    }
56}
57
58pub(crate) fn decode_double_quoted_string(s: &str) -> Cow<'_, str> {
59    if !s.contains('"') && !s.contains('\\') {
60        s.into()
61    } else {
62        let mut r = String::new();
63        let mut chars = s.chars();
64        loop {
65            match chars.next() {
66                None => break,
67                Some('\'') => {
68                    chars.next();
69                    r.push('\'');
70                }
71                Some(c) => r.push(c),
72            }
73        }
74        r.into()
75    }
76}
77
78pub(crate) struct SingleQuotedString<'a>(pub(crate) &'a str);
79
80impl<'a> alloc::fmt::Display for SingleQuotedString<'a> {
81    fn fmt(&self, f: &mut alloc::fmt::Formatter<'_>) -> alloc::fmt::Result {
82        f.write_char('\'')?;
83        for c in self.0.chars() {
84            if c == '\'' {
85                f.write_char('\'')?;
86            }
87            f.write_char(c)?;
88        }
89        f.write_char('\'')
90    }
91}
92
93impl<'a, 'b> Parser<'a, 'b> {
94    pub(crate) fn new(src: &'a str, issues: &'b mut Issues<'a>, options: &'b ParseOptions) -> Self {
95        let mut lexer = Lexer::new(src);
96        let (token, span) = lexer.next_token();
97        Self {
98            token,
99            span,
100            lexer,
101            issues,
102            arg: 0,
103            delimiter: Token::SemiColon,
104            options,
105            permit_compound_statements: false,
106        }
107    }
108
109    pub(crate) fn recover(
110        &mut self,
111        success: impl Fn(&Token<'a>) -> bool,
112        fail: impl Fn(&Token<'a>) -> bool,
113    ) -> Result<(), ParseError> {
114        let mut brackets = Vec::new();
115        loop {
116            match &self.token {
117                t if brackets.is_empty() && success(t) => return Ok(()),
118                Token::Eof => return Err(ParseError::Unrecovered),
119                t if t == &self.delimiter => return Err(ParseError::Unrecovered),
120                t if brackets.is_empty() && fail(t) => return Err(ParseError::Unrecovered),
121                Token::LParen => {
122                    brackets.push(Token::LParen);
123                    self.next();
124                }
125                Token::LBracket => {
126                    brackets.push(Token::LBracket);
127                    self.next();
128                }
129                Token::LBrace => {
130                    brackets.push(Token::LBrace);
131                    self.next();
132                }
133                Token::RBrace => {
134                    self.next();
135                    while let Some(v) = brackets.pop() {
136                        if v == Token::LBrace {
137                            break;
138                        }
139                    }
140                }
141                Token::RBracket => {
142                    self.next();
143                    while let Some(v) = brackets.pop() {
144                        if v == Token::LBracket {
145                            break;
146                        }
147                    }
148                }
149                Token::RParen => {
150                    self.next();
151                    while let Some(v) = brackets.pop() {
152                        if v == Token::LParen {
153                            break;
154                        }
155                    }
156                }
157                _ => self.next(),
158            }
159        }
160    }
161
162    pub(crate) fn recovered<T: Default>(
163        &mut self,
164        expected: &'static str,
165        end: &impl Fn(&Token<'a>) -> bool,
166        fun: impl FnOnce(&mut Self) -> Result<T, ParseError>,
167    ) -> Result<T, ParseError> {
168        let ans = match fun(self) {
169            Ok(v) => v,
170            Err(_) => {
171                self.recover(end, |_| false)?;
172                T::default()
173            }
174        };
175        if !end(&self.token) {
176            self.expected_error(expected);
177            self.recover(end, |_| false)?;
178        }
179        Ok(ans)
180    }
181
182    pub(crate) fn read_from_stdin_and_next(&mut self) -> (&'a str, Span) {
183        let stdin = self.lexer.read_from_stdin();
184        let (token, span) = self.lexer.next_token();
185        self.token = token;
186        self.span = span;
187        stdin
188    }
189
190    pub(crate) fn next(&mut self) {
191        let (token, span) = self.lexer.next_token();
192        self.token = token;
193        self.span = span;
194    }
195
196    pub(crate) fn expected_error(&mut self, name: &'static str) {
197        self.err(format!("Expected '{}' here", name), &self.span.span());
198    }
199
200    pub(crate) fn err(
201        &mut self,
202        message: impl Into<Cow<'static, str>>,
203        span: &impl Spanned,
204    ) -> IssueHandle<'a, '_> {
205        self.issues.err(message, span)
206    }
207
208    pub(crate) fn warn(
209        &mut self,
210        message: impl Into<Cow<'static, str>>,
211        span: &impl Spanned,
212    ) -> IssueHandle<'a, '_> {
213        self.issues.warn(message, span)
214    }
215
216    pub(crate) fn expected_failure<T>(&mut self, name: &'static str) -> Result<T, ParseError> {
217        self.expected_error(name);
218        Err(ParseError::Unrecovered)
219    }
220
221    pub(crate) fn token_to_plain_identifier(
222        &mut self,
223        token: &Token<'a>,
224        span: Span,
225    ) -> Result<Identifier<'a>, ParseError> {
226        match &token {
227            Token::Ident(v, kw) => {
228                let v = *v;
229                if kw.reserved() {
230                    self.err(
231                        format!("'{}' is a reserved identifier use `{}`", v, v),
232                        &span,
233                    );
234                } else if kw != &Keyword::QUOTED_IDENTIFIER
235                    && self.options.warn_unquoted_identifiers
236                {
237                    self.warn(format!("identifiers should be quoted as `{}`", v), &span);
238                }
239                Ok(Identifier::new(v, span))
240            }
241            _ => self.expected_failure("identifier"),
242        }
243    }
244
245    pub(crate) fn consume_plain_identifier(&mut self) -> Result<Identifier<'a>, ParseError> {
246        match &self.token {
247            Token::Ident(v, kw) => {
248                let v = *v;
249                if kw.reserved() {
250                    self.err(
251                        format!("'{}' is a reserved identifier use `{}`", v, v),
252                        &self.span.span(),
253                    );
254                } else if kw != &Keyword::QUOTED_IDENTIFIER
255                    && self.options.warn_unquoted_identifiers
256                {
257                    self.err(
258                        format!("identifiers should be quoted as `{}`", v),
259                        &self.span.span(),
260                    );
261                } else if kw == &Keyword::QUOTED_IDENTIFIER && self.options.dialect.is_postgresql()
262                {
263                    self.err(
264                        "quoted identifiers not supported by postgresql",
265                        &self.span.span(),
266                    );
267                }
268                Ok(Identifier::new(v, self.consume()))
269            }
270            Token::DoubleQuotedString(v) if self.options.dialect.is_postgresql() => {
271                Ok(Identifier::new(v, self.consume()))
272            }
273            _ => self.expected_failure("identifier"),
274        }
275    }
276
277    pub(crate) fn consume_keyword(&mut self, keyword: Keyword) -> Result<Span, ParseError> {
278        match &self.token {
279            Token::Ident(v, kw) if kw == &keyword => {
280                if !v.chars().all(|c| c.is_ascii_uppercase())
281                    && self.options.warn_none_capital_keywords
282                {
283                    self.warn(
284                        format!(
285                            "keyword {} should be in ALL CAPS {}",
286                            v,
287                            v.to_ascii_uppercase()
288                        ),
289                        &self.span.span(),
290                    );
291                }
292                Ok(self.consume())
293            }
294            _ => self.expected_failure(keyword.name()),
295        }
296    }
297
298    pub(crate) fn consume_keywords(&mut self, keywords: &[Keyword]) -> Result<Span, ParseError> {
299        let mut span = self.consume_keyword(keywords[0])?;
300        for keyword in &keywords[1..] {
301            span = self.consume_keyword(*keyword)?.join_span(&span);
302        }
303        Ok(span)
304    }
305
306    pub(crate) fn skip_keyword(&mut self, keyword: Keyword) -> Option<Span> {
307        match &self.token {
308            Token::Ident(_, kw) if kw == &keyword => Some(self.consume_keyword(keyword).unwrap()),
309            _ => None,
310        }
311    }
312
313    pub(crate) fn consume_token(&mut self, token: Token) -> Result<Span, ParseError> {
314        if self.token != token {
315            self.expected_failure(token.name())
316        } else {
317            Ok(self.consume())
318        }
319    }
320
321    pub(crate) fn skip_token(&mut self, token: Token) -> Option<Span> {
322        if self.token != token {
323            None
324        } else {
325            Some(self.consume())
326        }
327    }
328
329    pub(crate) fn consume(&mut self) -> Span {
330        let span = self.span.clone();
331        self.next();
332        span
333    }
334
335    pub(crate) fn consume_string(&mut self) -> Result<SString<'a>, ParseError> {
336        let (mut a, mut b) = match &self.token {
337            Token::SingleQuotedString(v) => {
338                let v = *v;
339                let span = self.span.clone();
340                self.next();
341                (decode_single_quoted_string(v), span)
342            }
343            Token::DoubleQuotedString(v) => {
344                let v = *v;
345                let span = self.span.clone();
346                self.next();
347                (decode_double_quoted_string(v), span)
348            }
349            _ => self.expected_failure("string")?,
350        };
351        loop {
352            match self.token {
353                Token::SingleQuotedString(v) => {
354                    b = b.join_span(&self.span);
355                    a.to_mut().push_str(decode_single_quoted_string(v).as_ref());
356                    self.next();
357                }
358                Token::DoubleQuotedString(v) => {
359                    b = b.join_span(&self.span);
360                    a.to_mut().push_str(decode_double_quoted_string(v).as_ref());
361                    self.next();
362                }
363                _ => break,
364            }
365        }
366        Ok(SString::new(a, b))
367    }
368
369    pub(crate) fn consume_int<T: core::str::FromStr + Default>(
370        &mut self,
371    ) -> Result<(T, Span), ParseError> {
372        match &self.token {
373            Token::Integer(v) => {
374                let v = match v.parse() {
375                    Ok(v) => v,
376                    Err(_) => self.err_here("integer outside range").unwrap_or_default(),
377                };
378                let span = self.span.clone();
379                self.next();
380                Ok((v, span))
381            }
382            _ => self.expected_failure("integer"),
383        }
384    }
385
386    pub(crate) fn consume_float<T: core::str::FromStr + Default>(
387        &mut self,
388    ) -> Result<(T, Span), ParseError> {
389        match &self.token {
390            Token::Float(v) => {
391                let v = match v.parse() {
392                    Ok(v) => v,
393                    Err(_) => self.err_here("float outside range").unwrap_or_default(),
394                };
395                let span = self.span.clone();
396                self.next();
397                Ok((v, span))
398            }
399            _ => self.expected_failure("float"),
400        }
401    }
402
403    pub(crate) fn err_here<T>(
404        &mut self,
405        message: impl Into<Cow<'static, str>>,
406    ) -> Result<T, ParseError> {
407        self.err(message, &self.span.span());
408        Err(ParseError::Unrecovered)
409    }
410
411    pub(crate) fn ice<T>(&mut self, file: &'static str, line: u32) -> Result<T, ParseError> {
412        self.err_here(format!("Internal compiler error at {}:{}", file, line))
413    }
414
415    pub(crate) fn todo<T>(&mut self, file: &'static str, line: u32) -> Result<T, ParseError> {
416        self.err_here(format!("Not yet implemented at {}:{}", file, line))
417    }
418}