Skip to main content

squawk_parser/
lib.rs

1// via https://github.com/rust-lang/rust-analyzer/blob/d8887c0758bbd2d5f752d5bd405d4491e90e7ed6/crates/parser/src/lib.rs
2//
3// Permission is hereby granted, free of charge, to any
4// person obtaining a copy of this software and associated
5// documentation files (the "Software"), to deal in the
6// Software without restriction, including without
7// limitation the rights to use, copy, modify, merge,
8// publish, distribute, sublicense, and/or sell copies of
9// the Software, and to permit persons to whom the Software
10// is furnished to do so, subject to the following
11// conditions:
12//
13// The above copyright notice and this permission notice
14// shall be included in all copies or substantial portions
15// of the Software.
16//
17// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
18// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
19// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
20// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
21// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
22// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
24// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25// DEALINGS IN THE SOFTWARE.
26
27use drop_bomb::DropBomb;
28use event::Event;
29use grammar::OPERATOR_FIRST;
30use std::cell::Cell;
31use token_set::TokenSet;
32mod event;
33mod generated;
34mod grammar;
35mod input;
36mod lexed_str;
37mod output;
38mod shortcuts;
39mod syntax_kind;
40mod token_set;
41
42pub use crate::{
43    lexed_str::LexedStr,
44    // output::{Output, Step},
45    shortcuts::StrStep,
46    syntax_kind::SyntaxKind,
47};
48
49use crate::input::Input;
50pub use crate::output::Output;
51
52/// See [`Parser::start`].
53pub(crate) struct Marker {
54    pos: u32,
55    bomb: DropBomb,
56}
57
58impl Marker {
59    fn new(pos: u32) -> Marker {
60        Marker {
61            pos,
62            bomb: DropBomb::new("Marker must be either completed or abandoned"),
63        }
64    }
65
66    /// Finishes the syntax tree node and assigns `kind` to it,
67    /// and mark the create a `CompletedMarker` for possible future
68    /// operation like `.precede()` to deal with `forward_parent`.
69    pub(crate) fn complete(mut self, p: &mut Parser<'_>, kind: SyntaxKind) -> CompletedMarker {
70        self.bomb.defuse();
71        let idx = self.pos as usize;
72        match &mut p.events[idx] {
73            Event::Start { kind: slot, .. } => {
74                *slot = kind;
75            }
76            _ => unreachable!(),
77        }
78        p.push_event(Event::Finish);
79        CompletedMarker::new(self.pos, kind)
80    }
81
82    /// Abandons the syntax tree node. All its children
83    /// are attached to its parent instead.
84    pub(crate) fn abandon(mut self, p: &mut Parser<'_>) {
85        self.bomb.defuse();
86        let idx = self.pos as usize;
87        if idx == p.events.len() - 1 {
88            match p.events.pop() {
89                Some(Event::Start {
90                    kind: SyntaxKind::TOMBSTONE,
91                    forward_parent: None,
92                }) => (),
93                _ => unreachable!(),
94            }
95        }
96    }
97}
98
99pub(crate) struct CompletedMarker {
100    pos: u32,
101    kind: SyntaxKind,
102}
103
104impl CompletedMarker {
105    fn new(pos: u32, kind: SyntaxKind) -> Self {
106        CompletedMarker { pos, kind }
107    }
108
109    /// This method allows to create a new node which starts
110    /// *before* the current one. That is, parser could start
111    /// node `A`, then complete it, and then after parsing the
112    /// whole `A`, decide that it should have started some node
113    /// `B` before starting `A`. `precede` allows to do exactly
114    /// that. See also docs about
115    /// [`Event::Start::forward_parent`](crate::event::Event::Start::forward_parent).
116    ///
117    /// Given completed events `[START, FINISH]` and its corresponding
118    /// `CompletedMarker(pos: 0, _)`.
119    /// Append a new `START` events as `[START, FINISH, NEWSTART]`,
120    /// then mark `NEWSTART` as `START`'s parent with saving its relative
121    /// distance to `NEWSTART` into `forward_parent`(=2 in this case);
122    pub(crate) fn precede(self, p: &mut Parser<'_>) -> Marker {
123        let new_pos = p.start();
124        let idx = self.pos as usize;
125        match &mut p.events[idx] {
126            Event::Start { forward_parent, .. } => {
127                *forward_parent = Some(new_pos.pos - self.pos);
128            }
129            _ => unreachable!(),
130        }
131        new_pos
132    }
133
134    /// Extends this completed marker *to the left* up to `m`.
135    pub(crate) fn extend_to(self, p: &mut Parser<'_>, mut m: Marker) -> CompletedMarker {
136        m.bomb.defuse();
137        let idx = m.pos as usize;
138        match &mut p.events[idx] {
139            Event::Start { forward_parent, .. } => {
140                *forward_parent = Some(self.pos - m.pos);
141            }
142            _ => unreachable!(),
143        }
144        self
145    }
146
147    pub(crate) fn kind(&self) -> SyntaxKind {
148        self.kind
149    }
150}
151
152pub fn parse(input: &Input) -> Output {
153    let mut p = Parser::new(input);
154    // 2. lex tokens to event vec via parser aka actually run the parser code,
155    // it calls the methods on the parser to create a vector of events
156    grammar::entry_point(&mut p);
157    let events = p.finish();
158    // 3. forward parents
159    event::process(events)
160}
161
162pub(crate) struct Parser<'t> {
163    inp: &'t Input,
164    pos: usize,
165    events: Vec<Event>,
166    steps: Cell<u32>,
167}
168
169const PARSER_STEP_LIMIT: usize = 15_000_000;
170
171enum TrivaBetween {
172    NotAllowed,
173    Allowed,
174}
175
176const OPERATOR_SIGN: TokenSet = TokenSet::new(&[SyntaxKind::PLUS, SyntaxKind::MINUS]);
177
178/// In order for an operator to end in `+` or `-`, it must contain one of the
179/// following chars:
180///
181/// ```sql
182/// ~ ! @ # % ^ & | ` ?
183/// ```
184///
185/// see: <https://www.postgresql.org/docs/18/sql-createoperator.html>
186const SPECIAL_OP_CHARS: TokenSet = TokenSet::new(&[
187    SyntaxKind::TILDE,
188    SyntaxKind::BANG,
189    SyntaxKind::AT,
190    SyntaxKind::POUND,
191    SyntaxKind::PERCENT,
192    SyntaxKind::CARET,
193    SyntaxKind::AMP,
194    SyntaxKind::PIPE,
195    SyntaxKind::BACKTICK,
196    SyntaxKind::QUESTION,
197]);
198
199impl<'t> Parser<'t> {
200    fn new(inp: &'t Input) -> Parser<'t> {
201        Parser {
202            inp,
203            pos: 0,
204            events: vec![],
205            steps: Cell::new(0),
206        }
207    }
208
209    /// Consume the next token if `kind` matches.
210    pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool {
211        if !self.at(kind) {
212            return false;
213        }
214        let n_raw_tokens = match kind {
215            SyntaxKind::COLON_EQ
216            | SyntaxKind::NEQ
217            | SyntaxKind::NEQB
218            | SyntaxKind::LTEQ
219            | SyntaxKind::FAT_ARROW
220            | SyntaxKind::GTEQ => 2,
221            SyntaxKind::SIMILAR_TO => {
222                let m = self.start();
223                self.bump(SyntaxKind::SIMILAR_KW);
224                self.bump(SyntaxKind::TO_KW);
225                m.complete(self, SyntaxKind::SIMILAR_TO);
226                return true;
227            }
228            SyntaxKind::AT_TIME_ZONE => {
229                let m = self.start();
230                self.bump(SyntaxKind::AT_KW);
231                self.bump(SyntaxKind::TIME_KW);
232                self.bump(SyntaxKind::ZONE_KW);
233                m.complete(self, SyntaxKind::AT_TIME_ZONE);
234                return true;
235            }
236            SyntaxKind::AT_LOCAL => {
237                let m = self.start();
238                self.bump(SyntaxKind::AT_KW);
239                self.bump(SyntaxKind::LOCAL_KW);
240                m.complete(self, SyntaxKind::AT_LOCAL);
241                return true;
242            }
243            SyntaxKind::IS_NOT_NORMALIZED => {
244                let m = self.start();
245                self.bump(SyntaxKind::IS_KW);
246                self.bump(SyntaxKind::NOT_KW);
247                if matches!(
248                    self.current(),
249                    SyntaxKind::NFC_KW
250                        | SyntaxKind::NFD_KW
251                        | SyntaxKind::NFKC_KW
252                        | SyntaxKind::NFKD_KW
253                ) {
254                    let fm = self.start();
255                    self.bump_any();
256                    fm.complete(self, SyntaxKind::UNICODE_NORMAL_FORM);
257                }
258                self.bump(SyntaxKind::NORMALIZED_KW);
259                m.complete(self, SyntaxKind::IS_NOT_NORMALIZED);
260                return true;
261            }
262            SyntaxKind::IS_NORMALIZED => {
263                let m = self.start();
264                self.bump(SyntaxKind::IS_KW);
265                if matches!(
266                    self.current(),
267                    SyntaxKind::NFC_KW
268                        | SyntaxKind::NFD_KW
269                        | SyntaxKind::NFKC_KW
270                        | SyntaxKind::NFKD_KW
271                ) {
272                    let fm = self.start();
273                    self.bump_any();
274                    fm.complete(self, SyntaxKind::UNICODE_NORMAL_FORM);
275                }
276                self.bump(SyntaxKind::NORMALIZED_KW);
277                m.complete(self, SyntaxKind::IS_NORMALIZED);
278                return true;
279            }
280            SyntaxKind::COLON_COLON => {
281                let m = self.start();
282                self.bump(SyntaxKind::COLON);
283                self.bump(SyntaxKind::COLON);
284                m.complete(self, SyntaxKind::COLON_COLON);
285                return true;
286            }
287            SyntaxKind::IS_JSON => {
288                let m = self.start();
289                self.bump(SyntaxKind::IS_KW);
290                self.bump(SyntaxKind::JSON_KW);
291                grammar::opt_json_keys_unique_clause(self);
292                m.complete(self, SyntaxKind::IS_JSON);
293                return true;
294            }
295            SyntaxKind::IS_NOT_JSON => {
296                let m = self.start();
297                self.bump(SyntaxKind::IS_KW);
298                self.bump(SyntaxKind::NOT_KW);
299                self.bump(SyntaxKind::JSON_KW);
300                grammar::opt_json_keys_unique_clause(self);
301                m.complete(self, SyntaxKind::IS_NOT_JSON);
302                return true;
303            }
304            SyntaxKind::IS_NOT_JSON_OBJECT => {
305                let m = self.start();
306                self.bump(SyntaxKind::IS_KW);
307                self.bump(SyntaxKind::NOT_KW);
308                self.bump(SyntaxKind::JSON_KW);
309                self.bump(SyntaxKind::OBJECT_KW);
310                grammar::opt_json_keys_unique_clause(self);
311                m.complete(self, SyntaxKind::IS_NOT_JSON_OBJECT);
312                return true;
313            }
314            SyntaxKind::IS_NOT_JSON_ARRAY => {
315                let m = self.start();
316                self.bump(SyntaxKind::IS_KW);
317                self.bump(SyntaxKind::NOT_KW);
318                self.bump(SyntaxKind::JSON_KW);
319                self.bump(SyntaxKind::ARRAY_KW);
320                grammar::opt_json_keys_unique_clause(self);
321                m.complete(self, SyntaxKind::IS_NOT_JSON_ARRAY);
322                return true;
323            }
324            SyntaxKind::IS_NOT_JSON_VALUE => {
325                let m = self.start();
326                self.bump(SyntaxKind::IS_KW);
327                self.bump(SyntaxKind::NOT_KW);
328                self.bump(SyntaxKind::JSON_KW);
329                self.bump(SyntaxKind::VALUE_KW);
330                grammar::opt_json_keys_unique_clause(self);
331                m.complete(self, SyntaxKind::IS_NOT_JSON_VALUE);
332                return true;
333            }
334            SyntaxKind::IS_NOT_JSON_SCALAR => {
335                let m = self.start();
336                self.bump(SyntaxKind::IS_KW);
337                self.bump(SyntaxKind::NOT_KW);
338                self.bump(SyntaxKind::JSON_KW);
339                self.bump(SyntaxKind::SCALAR_KW);
340                grammar::opt_json_keys_unique_clause(self);
341                m.complete(self, SyntaxKind::IS_NOT_JSON_SCALAR);
342                return true;
343            }
344            SyntaxKind::IS_JSON_OBJECT => {
345                let m = self.start();
346                self.bump(SyntaxKind::IS_KW);
347                self.bump(SyntaxKind::JSON_KW);
348                self.bump(SyntaxKind::OBJECT_KW);
349                grammar::opt_json_keys_unique_clause(self);
350                m.complete(self, SyntaxKind::IS_JSON_OBJECT);
351                return true;
352            }
353            SyntaxKind::IS_JSON_ARRAY => {
354                let m = self.start();
355                self.bump(SyntaxKind::IS_KW);
356                self.bump(SyntaxKind::JSON_KW);
357                self.bump(SyntaxKind::ARRAY_KW);
358                grammar::opt_json_keys_unique_clause(self);
359                m.complete(self, SyntaxKind::IS_JSON_ARRAY);
360                return true;
361            }
362            SyntaxKind::IS_JSON_VALUE => {
363                let m = self.start();
364                self.bump(SyntaxKind::IS_KW);
365                self.bump(SyntaxKind::JSON_KW);
366                self.bump(SyntaxKind::VALUE_KW);
367                grammar::opt_json_keys_unique_clause(self);
368                m.complete(self, SyntaxKind::IS_JSON_VALUE);
369                return true;
370            }
371            SyntaxKind::IS_JSON_SCALAR => {
372                let m = self.start();
373                self.bump(SyntaxKind::IS_KW);
374                self.bump(SyntaxKind::JSON_KW);
375                self.bump(SyntaxKind::SCALAR_KW);
376                grammar::opt_json_keys_unique_clause(self);
377                m.complete(self, SyntaxKind::IS_JSON_SCALAR);
378                return true;
379            }
380            SyntaxKind::NOT_SIMILAR_TO => {
381                let m = self.start();
382                self.bump(SyntaxKind::NOT_KW);
383                self.bump(SyntaxKind::SIMILAR_KW);
384                self.bump(SyntaxKind::TO_KW);
385                m.complete(self, SyntaxKind::NOT_SIMILAR_TO);
386                return true;
387            }
388            SyntaxKind::IS_NOT_DISTINCT_FROM => {
389                let m = self.start();
390                self.bump(SyntaxKind::IS_KW);
391                self.bump(SyntaxKind::NOT_KW);
392                self.bump(SyntaxKind::DISTINCT_KW);
393                self.bump(SyntaxKind::FROM_KW);
394                m.complete(self, SyntaxKind::IS_NOT_DISTINCT_FROM);
395                return true;
396            }
397            SyntaxKind::OPERATOR_CALL => {
398                let m = self.start();
399                self.bump(SyntaxKind::OPERATOR_KW);
400                self.bump(SyntaxKind::L_PAREN);
401
402                // database.
403                if self.eat(SyntaxKind::IDENT) {
404                    self.expect(SyntaxKind::DOT);
405                }
406                // schema.
407                if self.eat(SyntaxKind::IDENT) {
408                    self.expect(SyntaxKind::DOT);
409                }
410
411                // +, -, etc.
412                match grammar::current_operator(self) {
413                    Some(kind) => {
414                        self.bump(kind);
415                    }
416                    None => {
417                        self.error("expected operator");
418                    }
419                }
420
421                self.expect(SyntaxKind::R_PAREN);
422                m.complete(self, SyntaxKind::OPERATOR_CALL);
423                return true;
424            }
425            SyntaxKind::IS_DISTINCT_FROM => {
426                let m = self.start();
427                self.bump(SyntaxKind::IS_KW);
428                self.bump(SyntaxKind::DISTINCT_KW);
429                self.bump(SyntaxKind::FROM_KW);
430                m.complete(self, SyntaxKind::IS_DISTINCT_FROM);
431                return true;
432            }
433            SyntaxKind::NOT_LIKE => {
434                let m = self.start();
435                self.bump(SyntaxKind::NOT_KW);
436                self.bump(SyntaxKind::LIKE_KW);
437                m.complete(self, SyntaxKind::NOT_LIKE);
438                return true;
439            }
440            SyntaxKind::NOT_ILIKE => {
441                let m = self.start();
442                self.bump(SyntaxKind::NOT_KW);
443                self.bump(SyntaxKind::ILIKE_KW);
444                m.complete(self, SyntaxKind::NOT_ILIKE);
445                return true;
446            }
447            SyntaxKind::NOT_IN => {
448                let m = self.start();
449                self.bump(SyntaxKind::NOT_KW);
450                self.bump(SyntaxKind::IN_KW);
451                m.complete(self, SyntaxKind::NOT_IN);
452                return true;
453            }
454            SyntaxKind::IS_NOT => {
455                let m = self.start();
456                self.bump(SyntaxKind::IS_KW);
457                self.bump(SyntaxKind::NOT_KW);
458                m.complete(self, SyntaxKind::IS_NOT);
459                return true;
460            }
461            SyntaxKind::CUSTOM_OP => {
462                let m = self.start();
463                for _ in 0..self.op_len() {
464                    self.bump_any();
465                }
466                m.complete(self, SyntaxKind::CUSTOM_OP);
467                return true;
468            }
469            _ => 1,
470        };
471        self.do_bump(kind, n_raw_tokens);
472        true
473    }
474
475    fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, triva: TrivaBetween) -> bool {
476        let tokens_match =
477            self.inp.kind(self.pos + n) == k1 && self.inp.kind(self.pos + n + 1) == k2;
478        // We need to do this so we can say that:
479        // 1 > > 2, is not the same as 1 >> 2
480        match triva {
481            TrivaBetween::Allowed => tokens_match,
482            TrivaBetween::NotAllowed => {
483                return tokens_match
484                    && self.inp.is_joint(self.pos + n)
485                    && self.next_not_joined_op(n + 1);
486            }
487        }
488    }
489
490    fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool {
491        self.inp.kind(self.pos + n) == k1
492            && self.inp.kind(self.pos + n + 1) == k2
493            && self.inp.kind(self.pos + n + 2) == k3
494    }
495
496    fn at_composite4(
497        &self,
498        n: usize,
499        k1: SyntaxKind,
500        k2: SyntaxKind,
501        k3: SyntaxKind,
502        k4: SyntaxKind,
503    ) -> bool {
504        self.inp.kind(self.pos + n) == k1
505            && self.inp.kind(self.pos + n + 1) == k2
506            && self.inp.kind(self.pos + n + 2) == k3
507            && self.inp.kind(self.pos + n + 3) == k4
508    }
509
510    fn next_not_joined_op(&self, n: usize) -> bool {
511        // next isn't an operator so we know we're not joined to it
512        if !self.nth_at_ts(n + 1, OPERATOR_FIRST) {
513            return true;
514        }
515        // current kind isn't joined
516        if !self.inp.is_joint(self.pos + n) {
517            return true;
518        }
519        self.op_len() == n + 1
520    }
521
522    fn op_len(&self) -> usize {
523        if !self.at_ts(OPERATOR_FIRST) {
524            return 0;
525        }
526
527        let mut len = 1;
528        let mut has_special = self.at_ts(SPECIAL_OP_CHARS);
529        while self.inp.is_joint(self.pos + len - 1) && self.nth_at_ts(len, OPERATOR_FIRST) {
530            has_special |= self.nth_at_ts(len, SPECIAL_OP_CHARS);
531            len += 1;
532        }
533
534        // PostgreSQL skips trailing signs from ops if they don't contain a
535        // special char.
536        // This means `2*-3` parses as `2 * -3`.
537        if !has_special {
538            while len > 1 && self.nth_at_ts(len - 1, OPERATOR_SIGN) {
539                len -= 1;
540            }
541        }
542
543        len
544    }
545
546    /// Checks if the current token is in `kinds`.
547    pub(crate) fn at_ts(&self, kinds: TokenSet) -> bool {
548        kinds.contains(self.current())
549    }
550
551    /// Starts a new node in the syntax tree. All nodes and tokens
552    /// consumed between the `start` and the corresponding `Marker::complete`
553    /// belong to the same node.
554    pub(crate) fn start(&mut self) -> Marker {
555        let pos = self.events.len() as u32;
556        self.push_event(Event::tombstone());
557        Marker::new(pos)
558    }
559
560    /// Consume the next token. Panics if the parser isn't currently at `kind`.
561    pub(crate) fn bump(&mut self, kind: SyntaxKind) {
562        assert!(self.eat(kind));
563    }
564
565    /// Advances the parser by one token
566    pub(crate) fn bump_any(&mut self) {
567        let kind = self.nth(0);
568        if kind == SyntaxKind::EOF {
569            return;
570        }
571        self.do_bump(kind, 1);
572    }
573
574    /// Consume the next token if it is `kind` or emit an error
575    /// otherwise.
576    pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool {
577        if self.eat(kind) {
578            return true;
579        }
580        self.error(format!("expected {kind:?}"));
581        false
582    }
583
584    /// Create an error node and consume the next token.
585    pub(crate) fn err_and_bump(&mut self, message: &str) {
586        self.err_recover(message, TokenSet::EMPTY);
587    }
588
589    /// Create an error node and consume the next token.
590    pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) {
591        // TODO: maybe we actually want this?
592        // if matches!(self.current(), SyntaxKind::L_PAREN | SyntaxKind::R_PAREN) {
593        //     self.error(message);
594        //     return;
595        // }
596
597        if self.at_ts(recovery) {
598            self.error(message);
599            return;
600        }
601
602        let m = self.start();
603        self.error(message);
604        self.bump_any();
605        m.complete(self, SyntaxKind::ERROR);
606    }
607
608    fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) {
609        self.pos += n_raw_tokens as usize;
610        self.steps.set(0);
611        self.push_event(Event::Token { kind, n_raw_tokens });
612    }
613
614    fn push_event(&mut self, event: Event) {
615        self.events.push(event);
616    }
617
618    fn finish(self) -> Vec<Event> {
619        self.events
620    }
621
622    /// Emit error with the `message`
623    /// FIXME: this should be much more fancy and support
624    /// structured errors with spans and notes, like rustc
625    /// does.
626    pub(crate) fn error<T: Into<String>>(&mut self, message: T) {
627        let msg = message.into();
628        self.push_event(Event::Error { msg });
629    }
630
631    /// Checks if the current token is `kind`.
632    #[must_use]
633    pub(crate) fn at(&self, kind: SyntaxKind) -> bool {
634        self.nth_at(0, kind)
635    }
636
637    /// Checks if the nth token is in `kinds`.
638    #[must_use]
639    pub(crate) fn nth_at_ts(&self, n: usize, kinds: TokenSet) -> bool {
640        kinds.contains(self.nth(n))
641    }
642
643    #[must_use]
644    pub(crate) fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool {
645        match kind {
646            // =>
647            SyntaxKind::FAT_ARROW => self.at_composite2(
648                n,
649                SyntaxKind::EQ,
650                SyntaxKind::R_ANGLE,
651                TrivaBetween::NotAllowed,
652            ),
653            // :=
654            SyntaxKind::COLON_EQ => self.at_composite2(
655                n,
656                SyntaxKind::COLON,
657                SyntaxKind::EQ,
658                TrivaBetween::NotAllowed,
659            ),
660            // ::
661            SyntaxKind::COLON_COLON => self.at_composite2(
662                n,
663                SyntaxKind::COLON,
664                SyntaxKind::COLON,
665                TrivaBetween::NotAllowed,
666            ),
667            // !=
668            SyntaxKind::NEQ => self.at_composite2(
669                n,
670                SyntaxKind::BANG,
671                SyntaxKind::EQ,
672                TrivaBetween::NotAllowed,
673            ),
674            // <>
675            SyntaxKind::NEQB => self.at_composite2(
676                n,
677                SyntaxKind::L_ANGLE,
678                SyntaxKind::R_ANGLE,
679                TrivaBetween::NotAllowed,
680            ),
681            // is not
682            SyntaxKind::IS_NOT => self.at_composite2(
683                n,
684                SyntaxKind::IS_KW,
685                SyntaxKind::NOT_KW,
686                TrivaBetween::Allowed,
687            ),
688            // not like
689            SyntaxKind::NOT_LIKE => self.at_composite2(
690                n,
691                SyntaxKind::NOT_KW,
692                SyntaxKind::LIKE_KW,
693                TrivaBetween::Allowed,
694            ),
695            // not ilike
696            SyntaxKind::NOT_ILIKE => self.at_composite2(
697                n,
698                SyntaxKind::NOT_KW,
699                SyntaxKind::ILIKE_KW,
700                TrivaBetween::Allowed,
701            ),
702            // not in
703            SyntaxKind::NOT_IN => self.at_composite2(
704                n,
705                SyntaxKind::NOT_KW,
706                SyntaxKind::IN_KW,
707                TrivaBetween::Allowed,
708            ),
709            // at time zone
710            SyntaxKind::AT_TIME_ZONE => self.at_composite3(
711                n,
712                SyntaxKind::AT_KW,
713                SyntaxKind::TIME_KW,
714                SyntaxKind::ZONE_KW,
715            ),
716            // at local
717            SyntaxKind::AT_LOCAL => self.at_composite2(
718                n,
719                SyntaxKind::AT_KW,
720                SyntaxKind::LOCAL_KW,
721                TrivaBetween::Allowed,
722            ),
723            // is distinct from
724            SyntaxKind::IS_DISTINCT_FROM => self.at_composite3(
725                n,
726                SyntaxKind::IS_KW,
727                SyntaxKind::DISTINCT_KW,
728                SyntaxKind::FROM_KW,
729            ),
730            // is not distinct from
731            SyntaxKind::IS_NOT_DISTINCT_FROM => self.at_composite4(
732                n,
733                SyntaxKind::IS_KW,
734                SyntaxKind::NOT_KW,
735                SyntaxKind::DISTINCT_KW,
736                SyntaxKind::FROM_KW,
737            ),
738            // is normalized
739            SyntaxKind::IS_NORMALIZED => {
740                if self.at(SyntaxKind::IS_KW) {
741                    if matches!(
742                        self.nth(1),
743                        SyntaxKind::NFC_KW
744                            | SyntaxKind::NFD_KW
745                            | SyntaxKind::NFKC_KW
746                            | SyntaxKind::NFKD_KW
747                    ) {
748                        if self.nth_at(2, SyntaxKind::NORMALIZED_KW) {
749                            return true;
750                        }
751                    } else {
752                        if self.nth_at(1, SyntaxKind::NORMALIZED_KW) {
753                            return true;
754                        }
755                    }
756                }
757                return false;
758            }
759            // is not normalized
760            SyntaxKind::IS_NOT_NORMALIZED => {
761                if self.at(SyntaxKind::IS_KW) && self.nth_at(1, SyntaxKind::NOT_KW) {
762                    if matches!(
763                        self.nth(2),
764                        SyntaxKind::NFC_KW
765                            | SyntaxKind::NFD_KW
766                            | SyntaxKind::NFKC_KW
767                            | SyntaxKind::NFKD_KW
768                    ) {
769                        if self.nth_at(3, SyntaxKind::NORMALIZED_KW) {
770                            return true;
771                        }
772                    } else if self.nth_at(2, SyntaxKind::NORMALIZED_KW) {
773                        return true;
774                    }
775                }
776                return false;
777            }
778            SyntaxKind::NOT_SIMILAR_TO => self.at_composite3(
779                n,
780                SyntaxKind::NOT_KW,
781                SyntaxKind::SIMILAR_KW,
782                SyntaxKind::TO_KW,
783            ),
784            // similar to
785            SyntaxKind::SIMILAR_TO => self.at_composite2(
786                n,
787                SyntaxKind::SIMILAR_KW,
788                SyntaxKind::TO_KW,
789                TrivaBetween::Allowed,
790            ),
791            // https://www.postgresql.org/docs/17/sql-expressions.html#SQL-EXPRESSIONS-OPERATOR-CALLS
792            // TODO: is this right?
793            SyntaxKind::OPERATOR_CALL => self.at_composite2(
794                n,
795                SyntaxKind::OPERATOR_KW,
796                SyntaxKind::L_PAREN,
797                TrivaBetween::Allowed,
798            ),
799            // is json
800            SyntaxKind::IS_JSON => self.at_composite2(
801                n,
802                SyntaxKind::IS_KW,
803                SyntaxKind::JSON_KW,
804                TrivaBetween::Allowed,
805            ),
806            // is not json
807            SyntaxKind::IS_NOT_JSON => self.at_composite3(
808                n,
809                SyntaxKind::IS_KW,
810                SyntaxKind::NOT_KW,
811                SyntaxKind::JSON_KW,
812            ),
813            // is not json object
814            SyntaxKind::IS_NOT_JSON_OBJECT => self.at_composite4(
815                n,
816                SyntaxKind::IS_KW,
817                SyntaxKind::NOT_KW,
818                SyntaxKind::JSON_KW,
819                SyntaxKind::OBJECT_KW,
820            ),
821            // is not json array
822            SyntaxKind::IS_NOT_JSON_ARRAY => self.at_composite4(
823                n,
824                SyntaxKind::IS_KW,
825                SyntaxKind::NOT_KW,
826                SyntaxKind::JSON_KW,
827                SyntaxKind::ARRAY_KW,
828            ),
829            // is not json value
830            SyntaxKind::IS_NOT_JSON_VALUE => self.at_composite4(
831                n,
832                SyntaxKind::IS_KW,
833                SyntaxKind::NOT_KW,
834                SyntaxKind::JSON_KW,
835                SyntaxKind::VALUE_KW,
836            ),
837            // is not json scalar
838            SyntaxKind::IS_NOT_JSON_SCALAR => self.at_composite4(
839                n,
840                SyntaxKind::IS_KW,
841                SyntaxKind::NOT_KW,
842                SyntaxKind::JSON_KW,
843                SyntaxKind::SCALAR_KW,
844            ),
845            // is json object
846            SyntaxKind::IS_JSON_OBJECT => self.at_composite3(
847                n,
848                SyntaxKind::IS_KW,
849                SyntaxKind::JSON_KW,
850                SyntaxKind::OBJECT_KW,
851            ),
852            // is json array
853            SyntaxKind::IS_JSON_ARRAY => self.at_composite3(
854                n,
855                SyntaxKind::IS_KW,
856                SyntaxKind::JSON_KW,
857                SyntaxKind::ARRAY_KW,
858            ),
859            // is json value
860            SyntaxKind::IS_JSON_VALUE => self.at_composite3(
861                n,
862                SyntaxKind::IS_KW,
863                SyntaxKind::JSON_KW,
864                SyntaxKind::VALUE_KW,
865            ),
866            // is json scalar
867            SyntaxKind::IS_JSON_SCALAR => self.at_composite3(
868                n,
869                SyntaxKind::IS_KW,
870                SyntaxKind::JSON_KW,
871                SyntaxKind::SCALAR_KW,
872            ),
873            // <=
874            SyntaxKind::LTEQ => self.at_composite2(
875                n,
876                SyntaxKind::L_ANGLE,
877                SyntaxKind::EQ,
878                TrivaBetween::NotAllowed,
879            ),
880            // <=
881            SyntaxKind::GTEQ => self.at_composite2(
882                n,
883                SyntaxKind::R_ANGLE,
884                SyntaxKind::EQ,
885                TrivaBetween::NotAllowed,
886            ),
887            SyntaxKind::CUSTOM_OP => {
888                // TODO: is this right?
889                if self.at_ts(OPERATOR_FIRST) {
890                    return true;
891                }
892                return false;
893            }
894            // TODO: we probably shouldn't be using a _ for this but be explicit for each type?
895            _ => self.inp.kind(self.pos + n) == kind,
896        }
897    }
898
899    /// Returns the kind of the current token.
900    /// If parser has already reached the end of input,
901    /// the special `EOF` kind is returned.
902    #[must_use]
903    pub(crate) fn current(&self) -> SyntaxKind {
904        self.nth(0)
905    }
906
907    /// Lookahead operation: returns the kind of the next nth
908    /// token.
909    #[must_use]
910    fn nth(&self, n: usize) -> SyntaxKind {
911        let steps = self.steps.get();
912        assert!(
913            (steps as usize) < PARSER_STEP_LIMIT,
914            "the parser seems stuck"
915        );
916        self.steps.set(steps + 1);
917
918        self.inp.kind(self.pos + n)
919    }
920}