Skip to main content

squawk_parser/
lib.rs

1// via https://github.com/rust-lang/rust-analyzer/blob/d8887c0758bbd2d5f752d5bd405d4491e90e7ed6/crates/parser/src/lib.rs
2//
3// Permission is hereby granted, free of charge, to any
4// person obtaining a copy of this software and associated
5// documentation files (the "Software"), to deal in the
6// Software without restriction, including without
7// limitation the rights to use, copy, modify, merge,
8// publish, distribute, sublicense, and/or sell copies of
9// the Software, and to permit persons to whom the Software
10// is furnished to do so, subject to the following
11// conditions:
12//
13// The above copyright notice and this permission notice
14// shall be included in all copies or substantial portions
15// of the Software.
16//
17// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
18// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
19// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
20// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
21// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
22// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
24// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25// DEALINGS IN THE SOFTWARE.
26
27use drop_bomb::DropBomb;
28use event::Event;
29use grammar::OPERATOR_FIRST;
30use std::cell::Cell;
31use token_set::TokenSet;
32mod event;
33mod generated;
34mod grammar;
35mod input;
36mod lexed_str;
37mod output;
38mod shortcuts;
39mod syntax_kind;
40mod token_set;
41
42pub use crate::{
43    lexed_str::LexedStr,
44    // output::{Output, Step},
45    shortcuts::StrStep,
46    syntax_kind::SyntaxKind,
47};
48
49use crate::input::Input;
50pub use crate::output::Output;
51
52/// See [`Parser::start`].
53pub(crate) struct Marker {
54    pos: u32,
55    bomb: DropBomb,
56}
57
58impl Marker {
59    fn new(pos: u32) -> Marker {
60        Marker {
61            pos,
62            bomb: DropBomb::new("Marker must be either completed or abandoned"),
63        }
64    }
65
66    /// Finishes the syntax tree node and assigns `kind` to it,
67    /// and mark the create a `CompletedMarker` for possible future
68    /// operation like `.precede()` to deal with `forward_parent`.
69    pub(crate) fn complete(mut self, p: &mut Parser<'_>, kind: SyntaxKind) -> CompletedMarker {
70        self.bomb.defuse();
71        let idx = self.pos as usize;
72        match &mut p.events[idx] {
73            Event::Start { kind: slot, .. } => {
74                *slot = kind;
75            }
76            _ => unreachable!(),
77        }
78        p.push_event(Event::Finish);
79        CompletedMarker::new(self.pos, kind)
80    }
81
82    /// Abandons the syntax tree node. All its children
83    /// are attached to its parent instead.
84    pub(crate) fn abandon(mut self, p: &mut Parser<'_>) {
85        self.bomb.defuse();
86        let idx = self.pos as usize;
87        if idx == p.events.len() - 1 {
88            match p.events.pop() {
89                Some(Event::Start {
90                    kind: SyntaxKind::TOMBSTONE,
91                    forward_parent: None,
92                }) => (),
93                _ => unreachable!(),
94            }
95        }
96    }
97}
98
99pub(crate) struct CompletedMarker {
100    pos: u32,
101    kind: SyntaxKind,
102}
103
104impl CompletedMarker {
105    fn new(pos: u32, kind: SyntaxKind) -> Self {
106        CompletedMarker { pos, kind }
107    }
108
109    /// This method allows to create a new node which starts
110    /// *before* the current one. That is, parser could start
111    /// node `A`, then complete it, and then after parsing the
112    /// whole `A`, decide that it should have started some node
113    /// `B` before starting `A`. `precede` allows to do exactly
114    /// that. See also docs about
115    /// [`Event::Start::forward_parent`](crate::event::Event::Start::forward_parent).
116    ///
117    /// Given completed events `[START, FINISH]` and its corresponding
118    /// `CompletedMarker(pos: 0, _)`.
119    /// Append a new `START` events as `[START, FINISH, NEWSTART]`,
120    /// then mark `NEWSTART` as `START`'s parent with saving its relative
121    /// distance to `NEWSTART` into `forward_parent`(=2 in this case);
122    pub(crate) fn precede(self, p: &mut Parser<'_>) -> Marker {
123        let new_pos = p.start();
124        let idx = self.pos as usize;
125        match &mut p.events[idx] {
126            Event::Start { forward_parent, .. } => {
127                *forward_parent = Some(new_pos.pos - self.pos);
128            }
129            _ => unreachable!(),
130        }
131        new_pos
132    }
133
134    /// Extends this completed marker *to the left* up to `m`.
135    pub(crate) fn extend_to(self, p: &mut Parser<'_>, mut m: Marker) -> CompletedMarker {
136        m.bomb.defuse();
137        let idx = m.pos as usize;
138        match &mut p.events[idx] {
139            Event::Start { forward_parent, .. } => {
140                *forward_parent = Some(self.pos - m.pos);
141            }
142            _ => unreachable!(),
143        }
144        self
145    }
146
147    pub(crate) fn kind(&self) -> SyntaxKind {
148        self.kind
149    }
150}
151
152pub fn parse(input: &Input) -> Output {
153    let mut p = Parser::new(input);
154    // 2. lex tokens to event vec via parser aka actually run the parser code,
155    // it calls the methods on the parser to create a vector of events
156    grammar::entry_point(&mut p);
157    let events = p.finish();
158    // 3. forward parents
159    event::process(events)
160}
161
162pub(crate) struct Parser<'t> {
163    inp: &'t Input,
164    pos: usize,
165    events: Vec<Event>,
166    steps: Cell<u32>,
167}
168
169const PARSER_STEP_LIMIT: usize = 15_000_000;
170
171enum TrivaBetween {
172    NotAllowed,
173    Allowed,
174}
175
176impl<'t> Parser<'t> {
177    fn new(inp: &'t Input) -> Parser<'t> {
178        Parser {
179            inp,
180            pos: 0,
181            events: vec![],
182            steps: Cell::new(0),
183        }
184    }
185
186    /// Consume the next token if `kind` matches.
187    pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool {
188        if !self.at(kind) {
189            return false;
190        }
191        let n_raw_tokens = match kind {
192            SyntaxKind::COLON_EQ
193            | SyntaxKind::NEQ
194            | SyntaxKind::NEQB
195            | SyntaxKind::LTEQ
196            | SyntaxKind::FAT_ARROW
197            | SyntaxKind::GTEQ => 2,
198            SyntaxKind::SIMILAR_TO => {
199                let m = self.start();
200                self.bump(SyntaxKind::SIMILAR_KW);
201                self.bump(SyntaxKind::TO_KW);
202                m.complete(self, SyntaxKind::SIMILAR_TO);
203                return true;
204            }
205            SyntaxKind::AT_TIME_ZONE => {
206                let m = self.start();
207                self.bump(SyntaxKind::AT_KW);
208                self.bump(SyntaxKind::TIME_KW);
209                self.bump(SyntaxKind::ZONE_KW);
210                m.complete(self, SyntaxKind::AT_TIME_ZONE);
211                return true;
212            }
213            SyntaxKind::AT_LOCAL => {
214                let m = self.start();
215                self.bump(SyntaxKind::AT_KW);
216                self.bump(SyntaxKind::LOCAL_KW);
217                m.complete(self, SyntaxKind::AT_LOCAL);
218                return true;
219            }
220            SyntaxKind::IS_NOT_NORMALIZED => {
221                let m = self.start();
222                self.bump(SyntaxKind::IS_KW);
223                self.bump(SyntaxKind::NOT_KW);
224                if matches!(
225                    self.current(),
226                    SyntaxKind::NFC_KW
227                        | SyntaxKind::NFD_KW
228                        | SyntaxKind::NFKC_KW
229                        | SyntaxKind::NFKD_KW
230                ) {
231                    let fm = self.start();
232                    self.bump_any();
233                    fm.complete(self, SyntaxKind::UNICODE_NORMAL_FORM);
234                }
235                self.bump(SyntaxKind::NORMALIZED_KW);
236                m.complete(self, SyntaxKind::IS_NOT_NORMALIZED);
237                return true;
238            }
239            SyntaxKind::IS_NORMALIZED => {
240                let m = self.start();
241                self.bump(SyntaxKind::IS_KW);
242                if matches!(
243                    self.current(),
244                    SyntaxKind::NFC_KW
245                        | SyntaxKind::NFD_KW
246                        | SyntaxKind::NFKC_KW
247                        | SyntaxKind::NFKD_KW
248                ) {
249                    let fm = self.start();
250                    self.bump_any();
251                    fm.complete(self, SyntaxKind::UNICODE_NORMAL_FORM);
252                }
253                self.bump(SyntaxKind::NORMALIZED_KW);
254                m.complete(self, SyntaxKind::IS_NORMALIZED);
255                return true;
256            }
257            SyntaxKind::COLON_COLON => {
258                let m = self.start();
259                self.bump(SyntaxKind::COLON);
260                self.bump(SyntaxKind::COLON);
261                m.complete(self, SyntaxKind::COLON_COLON);
262                return true;
263            }
264            SyntaxKind::IS_JSON => {
265                let m = self.start();
266                self.bump(SyntaxKind::IS_KW);
267                self.bump(SyntaxKind::JSON_KW);
268                grammar::opt_json_keys_unique_clause(self);
269                m.complete(self, SyntaxKind::IS_JSON);
270                return true;
271            }
272            SyntaxKind::IS_NOT_JSON => {
273                let m = self.start();
274                self.bump(SyntaxKind::IS_KW);
275                self.bump(SyntaxKind::NOT_KW);
276                self.bump(SyntaxKind::JSON_KW);
277                grammar::opt_json_keys_unique_clause(self);
278                m.complete(self, SyntaxKind::IS_NOT_JSON);
279                return true;
280            }
281            SyntaxKind::IS_NOT_JSON_OBJECT => {
282                let m = self.start();
283                self.bump(SyntaxKind::IS_KW);
284                self.bump(SyntaxKind::NOT_KW);
285                self.bump(SyntaxKind::JSON_KW);
286                self.bump(SyntaxKind::OBJECT_KW);
287                grammar::opt_json_keys_unique_clause(self);
288                m.complete(self, SyntaxKind::IS_NOT_JSON_OBJECT);
289                return true;
290            }
291            SyntaxKind::IS_NOT_JSON_ARRAY => {
292                let m = self.start();
293                self.bump(SyntaxKind::IS_KW);
294                self.bump(SyntaxKind::NOT_KW);
295                self.bump(SyntaxKind::JSON_KW);
296                self.bump(SyntaxKind::ARRAY_KW);
297                grammar::opt_json_keys_unique_clause(self);
298                m.complete(self, SyntaxKind::IS_NOT_JSON_ARRAY);
299                return true;
300            }
301            SyntaxKind::IS_NOT_JSON_VALUE => {
302                let m = self.start();
303                self.bump(SyntaxKind::IS_KW);
304                self.bump(SyntaxKind::NOT_KW);
305                self.bump(SyntaxKind::JSON_KW);
306                self.bump(SyntaxKind::VALUE_KW);
307                grammar::opt_json_keys_unique_clause(self);
308                m.complete(self, SyntaxKind::IS_NOT_JSON_VALUE);
309                return true;
310            }
311            SyntaxKind::IS_NOT_JSON_SCALAR => {
312                let m = self.start();
313                self.bump(SyntaxKind::IS_KW);
314                self.bump(SyntaxKind::NOT_KW);
315                self.bump(SyntaxKind::JSON_KW);
316                self.bump(SyntaxKind::SCALAR_KW);
317                grammar::opt_json_keys_unique_clause(self);
318                m.complete(self, SyntaxKind::IS_NOT_JSON_SCALAR);
319                return true;
320            }
321            SyntaxKind::IS_JSON_OBJECT => {
322                let m = self.start();
323                self.bump(SyntaxKind::IS_KW);
324                self.bump(SyntaxKind::JSON_KW);
325                self.bump(SyntaxKind::OBJECT_KW);
326                grammar::opt_json_keys_unique_clause(self);
327                m.complete(self, SyntaxKind::IS_JSON_OBJECT);
328                return true;
329            }
330            SyntaxKind::IS_JSON_ARRAY => {
331                let m = self.start();
332                self.bump(SyntaxKind::IS_KW);
333                self.bump(SyntaxKind::JSON_KW);
334                self.bump(SyntaxKind::ARRAY_KW);
335                grammar::opt_json_keys_unique_clause(self);
336                m.complete(self, SyntaxKind::IS_JSON_ARRAY);
337                return true;
338            }
339            SyntaxKind::IS_JSON_VALUE => {
340                let m = self.start();
341                self.bump(SyntaxKind::IS_KW);
342                self.bump(SyntaxKind::JSON_KW);
343                self.bump(SyntaxKind::VALUE_KW);
344                grammar::opt_json_keys_unique_clause(self);
345                m.complete(self, SyntaxKind::IS_JSON_VALUE);
346                return true;
347            }
348            SyntaxKind::IS_JSON_SCALAR => {
349                let m = self.start();
350                self.bump(SyntaxKind::IS_KW);
351                self.bump(SyntaxKind::JSON_KW);
352                self.bump(SyntaxKind::SCALAR_KW);
353                grammar::opt_json_keys_unique_clause(self);
354                m.complete(self, SyntaxKind::IS_JSON_SCALAR);
355                return true;
356            }
357            SyntaxKind::NOT_SIMILAR_TO => {
358                let m = self.start();
359                self.bump(SyntaxKind::NOT_KW);
360                self.bump(SyntaxKind::SIMILAR_KW);
361                self.bump(SyntaxKind::TO_KW);
362                m.complete(self, SyntaxKind::NOT_SIMILAR_TO);
363                return true;
364            }
365            SyntaxKind::IS_NOT_DISTINCT_FROM => {
366                let m = self.start();
367                self.bump(SyntaxKind::IS_KW);
368                self.bump(SyntaxKind::NOT_KW);
369                self.bump(SyntaxKind::DISTINCT_KW);
370                self.bump(SyntaxKind::FROM_KW);
371                m.complete(self, SyntaxKind::IS_NOT_DISTINCT_FROM);
372                return true;
373            }
374            SyntaxKind::OPERATOR_CALL => {
375                let m = self.start();
376                self.bump(SyntaxKind::OPERATOR_KW);
377                self.bump(SyntaxKind::L_PAREN);
378
379                // database.
380                if self.eat(SyntaxKind::IDENT) {
381                    self.expect(SyntaxKind::DOT);
382                }
383                // schema.
384                if self.eat(SyntaxKind::IDENT) {
385                    self.expect(SyntaxKind::DOT);
386                }
387
388                // +, -, etc.
389                match grammar::current_operator(self) {
390                    Some(kind) => {
391                        self.bump(kind);
392                    }
393                    None => {
394                        self.error("expected operator");
395                    }
396                }
397
398                self.expect(SyntaxKind::R_PAREN);
399                m.complete(self, SyntaxKind::OPERATOR_CALL);
400                return true;
401            }
402            SyntaxKind::IS_DISTINCT_FROM => {
403                let m = self.start();
404                self.bump(SyntaxKind::IS_KW);
405                self.bump(SyntaxKind::DISTINCT_KW);
406                self.bump(SyntaxKind::FROM_KW);
407                m.complete(self, SyntaxKind::IS_DISTINCT_FROM);
408                return true;
409            }
410            SyntaxKind::NOT_LIKE => {
411                let m = self.start();
412                self.bump(SyntaxKind::NOT_KW);
413                self.bump(SyntaxKind::LIKE_KW);
414                m.complete(self, SyntaxKind::NOT_LIKE);
415                return true;
416            }
417            SyntaxKind::NOT_ILIKE => {
418                let m = self.start();
419                self.bump(SyntaxKind::NOT_KW);
420                self.bump(SyntaxKind::ILIKE_KW);
421                m.complete(self, SyntaxKind::NOT_ILIKE);
422                return true;
423            }
424            SyntaxKind::NOT_IN => {
425                let m = self.start();
426                self.bump(SyntaxKind::NOT_KW);
427                self.bump(SyntaxKind::IN_KW);
428                m.complete(self, SyntaxKind::NOT_IN);
429                return true;
430            }
431            SyntaxKind::IS_NOT => {
432                let m = self.start();
433                self.bump(SyntaxKind::IS_KW);
434                self.bump(SyntaxKind::NOT_KW);
435                m.complete(self, SyntaxKind::IS_NOT);
436                return true;
437            }
438            SyntaxKind::CUSTOM_OP => {
439                let m = self.start();
440                while !self.at(SyntaxKind::EOF) {
441                    let is_joint = self.inp.is_joint(self.pos);
442                    if self.at_ts(OPERATOR_FIRST) {
443                        self.bump_any();
444                    } else {
445                        break;
446                    }
447                    if !is_joint {
448                        break;
449                    }
450                }
451                m.complete(self, SyntaxKind::CUSTOM_OP);
452                return true;
453            }
454            _ => 1,
455        };
456        self.do_bump(kind, n_raw_tokens);
457        true
458    }
459
460    fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, triva: TrivaBetween) -> bool {
461        let tokens_match =
462            self.inp.kind(self.pos + n) == k1 && self.inp.kind(self.pos + n + 1) == k2;
463        // We need to do this so we can say that:
464        // 1 > > 2, is not the same as 1 >> 2
465        match triva {
466            TrivaBetween::Allowed => tokens_match,
467            TrivaBetween::NotAllowed => {
468                return tokens_match
469                    && self.inp.is_joint(self.pos + n)
470                    && self.next_not_joined_op(n + 1);
471            }
472        }
473    }
474
475    fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool {
476        self.inp.kind(self.pos + n) == k1
477            && self.inp.kind(self.pos + n + 1) == k2
478            && self.inp.kind(self.pos + n + 2) == k3
479    }
480
481    fn at_composite4(
482        &self,
483        n: usize,
484        k1: SyntaxKind,
485        k2: SyntaxKind,
486        k3: SyntaxKind,
487        k4: SyntaxKind,
488    ) -> bool {
489        self.inp.kind(self.pos + n) == k1
490            && self.inp.kind(self.pos + n + 1) == k2
491            && self.inp.kind(self.pos + n + 2) == k3
492            && self.inp.kind(self.pos + n + 3) == k4
493    }
494
495    fn next_not_joined_op(&self, n: usize) -> bool {
496        let next = self.inp.kind(self.pos + n + 1);
497        // next isn't an operator so we know we're not joined to it
498        if !OPERATOR_FIRST.contains(next) {
499            return true;
500        }
501        // current kind isn't joined
502        if !self.inp.is_joint(self.pos + n) {
503            return true;
504        }
505        false
506    }
507
508    /// Checks if the current token is in `kinds`.
509    pub(crate) fn at_ts(&self, kinds: TokenSet) -> bool {
510        kinds.contains(self.current())
511    }
512
513    /// Starts a new node in the syntax tree. All nodes and tokens
514    /// consumed between the `start` and the corresponding `Marker::complete`
515    /// belong to the same node.
516    pub(crate) fn start(&mut self) -> Marker {
517        let pos = self.events.len() as u32;
518        self.push_event(Event::tombstone());
519        Marker::new(pos)
520    }
521
522    /// Consume the next token. Panics if the parser isn't currently at `kind`.
523    pub(crate) fn bump(&mut self, kind: SyntaxKind) {
524        assert!(self.eat(kind));
525    }
526
527    /// Advances the parser by one token
528    pub(crate) fn bump_any(&mut self) {
529        let kind = self.nth(0);
530        if kind == SyntaxKind::EOF {
531            return;
532        }
533        self.do_bump(kind, 1);
534    }
535
536    /// Advances the parser by one token
537    pub(crate) fn split_numeric(&mut self, mut marker: Marker) -> (bool, Marker) {
538        assert!(self.at(SyntaxKind::NUMERIC_NUMBER));
539        // we have parse `<something>.`
540        // `<something>`.0.1
541        // here we need to insert an extra event
542        //
543        // `<something>`. 0. 1;
544        // here we need to change the follow up parse, the return value will cause us to emulate a dot
545        // the actual splitting happens later
546        let ends_in_dot = !self.inp.is_joint(self.pos);
547        if !ends_in_dot {
548            let new_marker = self.start();
549            let idx = marker.pos as usize;
550            match &mut self.events[idx] {
551                Event::Start {
552                    forward_parent,
553                    kind,
554                } => {
555                    *kind = SyntaxKind::FIELD_EXPR;
556                    *forward_parent = Some(new_marker.pos - marker.pos);
557                }
558                _ => unreachable!(),
559            }
560            marker.bomb.defuse();
561            marker = new_marker;
562        };
563        self.pos += 1;
564        self.push_event(Event::NumericSplitHack { ends_in_dot });
565        (ends_in_dot, marker)
566    }
567
568    /// Consume the next token if it is `kind` or emit an error
569    /// otherwise.
570    pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool {
571        if self.eat(kind) {
572            return true;
573        }
574        self.error(format!("expected {kind:?}"));
575        false
576    }
577
578    /// Create an error node and consume the next token.
579    pub(crate) fn err_and_bump(&mut self, message: &str) {
580        self.err_recover(message, TokenSet::EMPTY);
581    }
582
583    /// Create an error node and consume the next token.
584    pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) {
585        // TODO: maybe we actually want this?
586        // if matches!(self.current(), SyntaxKind::L_PAREN | SyntaxKind::R_PAREN) {
587        //     self.error(message);
588        //     return;
589        // }
590
591        if self.at_ts(recovery) {
592            self.error(message);
593            return;
594        }
595
596        let m = self.start();
597        self.error(message);
598        self.bump_any();
599        m.complete(self, SyntaxKind::ERROR);
600    }
601
602    fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) {
603        self.pos += n_raw_tokens as usize;
604        self.steps.set(0);
605        self.push_event(Event::Token { kind, n_raw_tokens });
606    }
607
608    fn push_event(&mut self, event: Event) {
609        self.events.push(event);
610    }
611
612    fn finish(self) -> Vec<Event> {
613        self.events
614    }
615
616    /// Emit error with the `message`
617    /// FIXME: this should be much more fancy and support
618    /// structured errors with spans and notes, like rustc
619    /// does.
620    pub(crate) fn error<T: Into<String>>(&mut self, message: T) {
621        let msg = message.into();
622        self.push_event(Event::Error { msg });
623    }
624
625    /// Checks if the current token is `kind`.
626    #[must_use]
627    pub(crate) fn at(&self, kind: SyntaxKind) -> bool {
628        self.nth_at(0, kind)
629    }
630
631    /// Checks if the nth token is in `kinds`.
632    #[must_use]
633    pub(crate) fn nth_at_ts(&self, n: usize, kinds: TokenSet) -> bool {
634        kinds.contains(self.nth(n))
635    }
636
637    #[must_use]
638    pub(crate) fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool {
639        match kind {
640            // =>
641            SyntaxKind::FAT_ARROW => self.at_composite2(
642                n,
643                SyntaxKind::EQ,
644                SyntaxKind::R_ANGLE,
645                TrivaBetween::NotAllowed,
646            ),
647            // :=
648            SyntaxKind::COLON_EQ => self.at_composite2(
649                n,
650                SyntaxKind::COLON,
651                SyntaxKind::EQ,
652                TrivaBetween::NotAllowed,
653            ),
654            // ::
655            SyntaxKind::COLON_COLON => self.at_composite2(
656                n,
657                SyntaxKind::COLON,
658                SyntaxKind::COLON,
659                TrivaBetween::NotAllowed,
660            ),
661            // !=
662            SyntaxKind::NEQ => self.at_composite2(
663                n,
664                SyntaxKind::BANG,
665                SyntaxKind::EQ,
666                TrivaBetween::NotAllowed,
667            ),
668            // <>
669            SyntaxKind::NEQB => self.at_composite2(
670                n,
671                SyntaxKind::L_ANGLE,
672                SyntaxKind::R_ANGLE,
673                TrivaBetween::NotAllowed,
674            ),
675            // is not
676            SyntaxKind::IS_NOT => self.at_composite2(
677                n,
678                SyntaxKind::IS_KW,
679                SyntaxKind::NOT_KW,
680                TrivaBetween::Allowed,
681            ),
682            // not like
683            SyntaxKind::NOT_LIKE => self.at_composite2(
684                n,
685                SyntaxKind::NOT_KW,
686                SyntaxKind::LIKE_KW,
687                TrivaBetween::Allowed,
688            ),
689            // not ilike
690            SyntaxKind::NOT_ILIKE => self.at_composite2(
691                n,
692                SyntaxKind::NOT_KW,
693                SyntaxKind::ILIKE_KW,
694                TrivaBetween::Allowed,
695            ),
696            // not in
697            SyntaxKind::NOT_IN => self.at_composite2(
698                n,
699                SyntaxKind::NOT_KW,
700                SyntaxKind::IN_KW,
701                TrivaBetween::Allowed,
702            ),
703            // at time zone
704            SyntaxKind::AT_TIME_ZONE => self.at_composite3(
705                n,
706                SyntaxKind::AT_KW,
707                SyntaxKind::TIME_KW,
708                SyntaxKind::ZONE_KW,
709            ),
710            // at local
711            SyntaxKind::AT_LOCAL => self.at_composite2(
712                n,
713                SyntaxKind::AT_KW,
714                SyntaxKind::LOCAL_KW,
715                TrivaBetween::Allowed,
716            ),
717            // is distinct from
718            SyntaxKind::IS_DISTINCT_FROM => self.at_composite3(
719                n,
720                SyntaxKind::IS_KW,
721                SyntaxKind::DISTINCT_KW,
722                SyntaxKind::FROM_KW,
723            ),
724            // is not distinct from
725            SyntaxKind::IS_NOT_DISTINCT_FROM => self.at_composite4(
726                n,
727                SyntaxKind::IS_KW,
728                SyntaxKind::NOT_KW,
729                SyntaxKind::DISTINCT_KW,
730                SyntaxKind::FROM_KW,
731            ),
732            // is normalized
733            SyntaxKind::IS_NORMALIZED => {
734                if self.at(SyntaxKind::IS_KW) {
735                    if matches!(
736                        self.nth(1),
737                        SyntaxKind::NFC_KW
738                            | SyntaxKind::NFD_KW
739                            | SyntaxKind::NFKC_KW
740                            | SyntaxKind::NFKD_KW
741                    ) {
742                        if self.nth_at(2, SyntaxKind::NORMALIZED_KW) {
743                            return true;
744                        }
745                    } else {
746                        if self.nth_at(1, SyntaxKind::NORMALIZED_KW) {
747                            return true;
748                        }
749                    }
750                }
751                return false;
752            }
753            // is not normalized
754            SyntaxKind::IS_NOT_NORMALIZED => {
755                if self.at(SyntaxKind::IS_KW) && self.nth_at(1, SyntaxKind::NOT_KW) {
756                    if matches!(
757                        self.nth(2),
758                        SyntaxKind::NFC_KW
759                            | SyntaxKind::NFD_KW
760                            | SyntaxKind::NFKC_KW
761                            | SyntaxKind::NFKD_KW
762                    ) {
763                        if self.nth_at(3, SyntaxKind::NORMALIZED_KW) {
764                            return true;
765                        }
766                    } else if self.nth_at(2, SyntaxKind::NORMALIZED_KW) {
767                        return true;
768                    }
769                }
770                return false;
771            }
772            SyntaxKind::NOT_SIMILAR_TO => self.at_composite3(
773                n,
774                SyntaxKind::NOT_KW,
775                SyntaxKind::SIMILAR_KW,
776                SyntaxKind::TO_KW,
777            ),
778            // similar to
779            SyntaxKind::SIMILAR_TO => self.at_composite2(
780                n,
781                SyntaxKind::SIMILAR_KW,
782                SyntaxKind::TO_KW,
783                TrivaBetween::Allowed,
784            ),
785            // https://www.postgresql.org/docs/17/sql-expressions.html#SQL-EXPRESSIONS-OPERATOR-CALLS
786            // TODO: is this right?
787            SyntaxKind::OPERATOR_CALL => self.at_composite2(
788                n,
789                SyntaxKind::OPERATOR_KW,
790                SyntaxKind::L_PAREN,
791                TrivaBetween::Allowed,
792            ),
793            // is json
794            SyntaxKind::IS_JSON => self.at_composite2(
795                n,
796                SyntaxKind::IS_KW,
797                SyntaxKind::JSON_KW,
798                TrivaBetween::Allowed,
799            ),
800            // is not json
801            SyntaxKind::IS_NOT_JSON => self.at_composite3(
802                n,
803                SyntaxKind::IS_KW,
804                SyntaxKind::NOT_KW,
805                SyntaxKind::JSON_KW,
806            ),
807            // is not json object
808            SyntaxKind::IS_NOT_JSON_OBJECT => self.at_composite4(
809                n,
810                SyntaxKind::IS_KW,
811                SyntaxKind::NOT_KW,
812                SyntaxKind::JSON_KW,
813                SyntaxKind::OBJECT_KW,
814            ),
815            // is not json array
816            SyntaxKind::IS_NOT_JSON_ARRAY => self.at_composite4(
817                n,
818                SyntaxKind::IS_KW,
819                SyntaxKind::NOT_KW,
820                SyntaxKind::JSON_KW,
821                SyntaxKind::ARRAY_KW,
822            ),
823            // is not json value
824            SyntaxKind::IS_NOT_JSON_VALUE => self.at_composite4(
825                n,
826                SyntaxKind::IS_KW,
827                SyntaxKind::NOT_KW,
828                SyntaxKind::JSON_KW,
829                SyntaxKind::VALUE_KW,
830            ),
831            // is not json scalar
832            SyntaxKind::IS_NOT_JSON_SCALAR => self.at_composite4(
833                n,
834                SyntaxKind::IS_KW,
835                SyntaxKind::NOT_KW,
836                SyntaxKind::JSON_KW,
837                SyntaxKind::SCALAR_KW,
838            ),
839            // is json object
840            SyntaxKind::IS_JSON_OBJECT => self.at_composite3(
841                n,
842                SyntaxKind::IS_KW,
843                SyntaxKind::JSON_KW,
844                SyntaxKind::OBJECT_KW,
845            ),
846            // is json array
847            SyntaxKind::IS_JSON_ARRAY => self.at_composite3(
848                n,
849                SyntaxKind::IS_KW,
850                SyntaxKind::JSON_KW,
851                SyntaxKind::ARRAY_KW,
852            ),
853            // is json value
854            SyntaxKind::IS_JSON_VALUE => self.at_composite3(
855                n,
856                SyntaxKind::IS_KW,
857                SyntaxKind::JSON_KW,
858                SyntaxKind::VALUE_KW,
859            ),
860            // is json scalar
861            SyntaxKind::IS_JSON_SCALAR => self.at_composite3(
862                n,
863                SyntaxKind::IS_KW,
864                SyntaxKind::JSON_KW,
865                SyntaxKind::SCALAR_KW,
866            ),
867            // <=
868            SyntaxKind::LTEQ => self.at_composite2(
869                n,
870                SyntaxKind::L_ANGLE,
871                SyntaxKind::EQ,
872                TrivaBetween::NotAllowed,
873            ),
874            // <=
875            SyntaxKind::GTEQ => self.at_composite2(
876                n,
877                SyntaxKind::R_ANGLE,
878                SyntaxKind::EQ,
879                TrivaBetween::NotAllowed,
880            ),
881            SyntaxKind::CUSTOM_OP => {
882                // TODO: is this right?
883                if self.at_ts(OPERATOR_FIRST) {
884                    return true;
885                }
886                return false;
887            }
888            // TODO: we probably shouldn't be using a _ for this but be explicit for each type?
889            _ => self.inp.kind(self.pos + n) == kind,
890        }
891    }
892
893    /// Returns the kind of the current token.
894    /// If parser has already reached the end of input,
895    /// the special `EOF` kind is returned.
896    #[must_use]
897    pub(crate) fn current(&self) -> SyntaxKind {
898        self.nth(0)
899    }
900
901    /// Lookahead operation: returns the kind of the next nth
902    /// token.
903    #[must_use]
904    fn nth(&self, n: usize) -> SyntaxKind {
905        assert!(n <= 3);
906
907        let steps = self.steps.get();
908        assert!(
909            (steps as usize) < PARSER_STEP_LIMIT,
910            "the parser seems stuck"
911        );
912        self.steps.set(steps + 1);
913
914        self.inp.kind(self.pos + n)
915    }
916}