squawk_parser/
lib.rs

1// via https://github.com/rust-lang/rust-analyzer/blob/d8887c0758bbd2d5f752d5bd405d4491e90e7ed6/crates/parser/src/lib.rs
2//
3// Permission is hereby granted, free of charge, to any
4// person obtaining a copy of this software and associated
5// documentation files (the "Software"), to deal in the
6// Software without restriction, including without
7// limitation the rights to use, copy, modify, merge,
8// publish, distribute, sublicense, and/or sell copies of
9// the Software, and to permit persons to whom the Software
10// is furnished to do so, subject to the following
11// conditions:
12//
13// The above copyright notice and this permission notice
14// shall be included in all copies or substantial portions
15// of the Software.
16//
17// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
18// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
19// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
20// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
21// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
22// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
24// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25// DEALINGS IN THE SOFTWARE.
26
27use drop_bomb::DropBomb;
28use event::Event;
29use grammar::OPERATOR_FIRST;
30use std::cell::Cell;
31use token_set::TokenSet;
32mod event;
33mod generated;
34mod grammar;
35mod input;
36mod lexed_str;
37mod output;
38mod shortcuts;
39mod syntax_kind;
40mod token_set;
41
42pub use crate::{
43    lexed_str::LexedStr,
44    // output::{Output, Step},
45    shortcuts::StrStep,
46    syntax_kind::SyntaxKind,
47};
48
49use crate::input::Input;
50pub use crate::output::Output;
51
52/// See [`Parser::start`].
53pub(crate) struct Marker {
54    pos: u32,
55    bomb: DropBomb,
56}
57
58impl Marker {
59    fn new(pos: u32) -> Marker {
60        Marker {
61            pos,
62            bomb: DropBomb::new("Marker must be either completed or abandoned"),
63        }
64    }
65
66    /// Finishes the syntax tree node and assigns `kind` to it,
67    /// and mark the create a `CompletedMarker` for possible future
68    /// operation like `.precede()` to deal with `forward_parent`.
69    pub(crate) fn complete(mut self, p: &mut Parser<'_>, kind: SyntaxKind) -> CompletedMarker {
70        self.bomb.defuse();
71        let idx = self.pos as usize;
72        match &mut p.events[idx] {
73            Event::Start { kind: slot, .. } => {
74                *slot = kind;
75            }
76            _ => unreachable!(),
77        }
78        p.push_event(Event::Finish);
79        CompletedMarker::new(self.pos, kind)
80    }
81
82    /// Abandons the syntax tree node. All its children
83    /// are attached to its parent instead.
84    pub(crate) fn abandon(mut self, p: &mut Parser<'_>) {
85        self.bomb.defuse();
86        let idx = self.pos as usize;
87        if idx == p.events.len() - 1 {
88            match p.events.pop() {
89                Some(Event::Start {
90                    kind: SyntaxKind::TOMBSTONE,
91                    forward_parent: None,
92                }) => (),
93                _ => unreachable!(),
94            }
95        }
96    }
97}
98
99pub(crate) struct CompletedMarker {
100    pos: u32,
101    kind: SyntaxKind,
102}
103
104impl CompletedMarker {
105    fn new(pos: u32, kind: SyntaxKind) -> Self {
106        CompletedMarker { pos, kind }
107    }
108
109    /// This method allows to create a new node which starts
110    /// *before* the current one. That is, parser could start
111    /// node `A`, then complete it, and then after parsing the
112    /// whole `A`, decide that it should have started some node
113    /// `B` before starting `A`. `precede` allows to do exactly
114    /// that. See also docs about
115    /// [`Event::Start::forward_parent`](crate::event::Event::Start::forward_parent).
116    ///
117    /// Given completed events `[START, FINISH]` and its corresponding
118    /// `CompletedMarker(pos: 0, _)`.
119    /// Append a new `START` events as `[START, FINISH, NEWSTART]`,
120    /// then mark `NEWSTART` as `START`'s parent with saving its relative
121    /// distance to `NEWSTART` into `forward_parent`(=2 in this case);
122    pub(crate) fn precede(self, p: &mut Parser<'_>) -> Marker {
123        let new_pos = p.start();
124        let idx = self.pos as usize;
125        match &mut p.events[idx] {
126            Event::Start { forward_parent, .. } => {
127                *forward_parent = Some(new_pos.pos - self.pos);
128            }
129            _ => unreachable!(),
130        }
131        new_pos
132    }
133
134    /// Extends this completed marker *to the left* up to `m`.
135    pub(crate) fn extend_to(self, p: &mut Parser<'_>, mut m: Marker) -> CompletedMarker {
136        m.bomb.defuse();
137        let idx = m.pos as usize;
138        match &mut p.events[idx] {
139            Event::Start { forward_parent, .. } => {
140                *forward_parent = Some(self.pos - m.pos);
141            }
142            _ => unreachable!(),
143        }
144        self
145    }
146
147    pub(crate) fn kind(&self) -> SyntaxKind {
148        self.kind
149    }
150}
151
152pub fn parse(input: &Input) -> Output {
153    let mut p = Parser::new(input);
154    // 2. lex tokens to event vec via parser aka actually run the parser code,
155    // it calls the methods on the parser to create a vector of events
156    grammar::entry_point(&mut p);
157    let events = p.finish();
158    // 3. forward parents
159    event::process(events)
160}
161
162pub(crate) struct Parser<'t> {
163    inp: &'t Input,
164    pos: usize,
165    events: Vec<Event>,
166    steps: Cell<u32>,
167}
168
169const PARSER_STEP_LIMIT: usize = 15_000_000;
170
171enum TrivaBetween {
172    NotAllowed,
173    Allowed,
174}
175
176impl<'t> Parser<'t> {
177    fn new(inp: &'t Input) -> Parser<'t> {
178        Parser {
179            inp,
180            pos: 0,
181            events: vec![],
182            steps: Cell::new(0),
183        }
184    }
185
186    /// Consume the next token if `kind` matches.
187    pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool {
188        if !self.at(kind) {
189            return false;
190        }
191        let n_raw_tokens = match kind {
192            SyntaxKind::COLON_EQ
193            | SyntaxKind::NEQ
194            | SyntaxKind::NEQB
195            | SyntaxKind::LTEQ
196            | SyntaxKind::FAT_ARROW
197            | SyntaxKind::GTEQ => 2,
198            SyntaxKind::SIMILAR_TO => {
199                let m = self.start();
200                self.bump(SyntaxKind::SIMILAR_KW);
201                self.bump(SyntaxKind::TO_KW);
202                m.complete(self, SyntaxKind::SIMILAR_TO);
203                return true;
204            }
205            SyntaxKind::AT_TIME_ZONE => {
206                let m = self.start();
207                self.bump(SyntaxKind::AT_KW);
208                self.bump(SyntaxKind::TIME_KW);
209                self.bump(SyntaxKind::ZONE_KW);
210                m.complete(self, SyntaxKind::AT_TIME_ZONE);
211                return true;
212            }
213            SyntaxKind::IS_NOT_NORMALIZED => {
214                let m = self.start();
215                self.bump(SyntaxKind::IS_KW);
216                self.bump(SyntaxKind::NOT_KW);
217                if matches!(
218                    self.current(),
219                    SyntaxKind::NFC_KW
220                        | SyntaxKind::NFD_KW
221                        | SyntaxKind::NFKC_KW
222                        | SyntaxKind::NFKD_KW
223                ) {
224                    self.bump_any();
225                }
226                self.bump(SyntaxKind::NORMALIZED_KW);
227                m.complete(self, SyntaxKind::IS_NOT_NORMALIZED);
228                return true;
229            }
230            SyntaxKind::IS_NORMALIZED => {
231                let m = self.start();
232                self.bump(SyntaxKind::IS_KW);
233                if matches!(
234                    self.current(),
235                    SyntaxKind::NFC_KW
236                        | SyntaxKind::NFD_KW
237                        | SyntaxKind::NFKC_KW
238                        | SyntaxKind::NFKD_KW
239                ) {
240                    self.bump_any();
241                }
242                self.bump(SyntaxKind::NORMALIZED_KW);
243                m.complete(self, SyntaxKind::IS_NORMALIZED);
244                return true;
245            }
246            SyntaxKind::COLON_COLON => {
247                let m = self.start();
248                self.bump(SyntaxKind::COLON);
249                self.bump(SyntaxKind::COLON);
250                m.complete(self, SyntaxKind::COLON_COLON);
251                return true;
252            }
253            SyntaxKind::IS_JSON => {
254                let m = self.start();
255                self.bump(SyntaxKind::IS_KW);
256                self.bump(SyntaxKind::JSON_KW);
257                grammar::opt_json_keys_unique_clause(self);
258                m.complete(self, SyntaxKind::IS_JSON);
259                return true;
260            }
261            SyntaxKind::IS_NOT_JSON => {
262                let m = self.start();
263                self.bump(SyntaxKind::IS_KW);
264                self.bump(SyntaxKind::NOT_KW);
265                self.bump(SyntaxKind::JSON_KW);
266                grammar::opt_json_keys_unique_clause(self);
267                m.complete(self, SyntaxKind::IS_NOT_JSON);
268                return true;
269            }
270            SyntaxKind::IS_NOT_JSON_OBJECT => {
271                let m = self.start();
272                self.bump(SyntaxKind::IS_KW);
273                self.bump(SyntaxKind::NOT_KW);
274                self.bump(SyntaxKind::JSON_KW);
275                self.bump(SyntaxKind::OBJECT_KW);
276                grammar::opt_json_keys_unique_clause(self);
277                m.complete(self, SyntaxKind::IS_NOT_JSON_OBJECT);
278                return true;
279            }
280            SyntaxKind::IS_NOT_JSON_ARRAY => {
281                let m = self.start();
282                self.bump(SyntaxKind::IS_KW);
283                self.bump(SyntaxKind::NOT_KW);
284                self.bump(SyntaxKind::JSON_KW);
285                self.bump(SyntaxKind::ARRAY_KW);
286                grammar::opt_json_keys_unique_clause(self);
287                m.complete(self, SyntaxKind::IS_NOT_JSON_ARRAY);
288                return true;
289            }
290            SyntaxKind::IS_NOT_JSON_VALUE => {
291                let m = self.start();
292                self.bump(SyntaxKind::IS_KW);
293                self.bump(SyntaxKind::NOT_KW);
294                self.bump(SyntaxKind::JSON_KW);
295                self.bump(SyntaxKind::VALUE_KW);
296                grammar::opt_json_keys_unique_clause(self);
297                m.complete(self, SyntaxKind::IS_NOT_JSON_VALUE);
298                return true;
299            }
300            SyntaxKind::IS_NOT_JSON_SCALAR => {
301                let m = self.start();
302                self.bump(SyntaxKind::IS_KW);
303                self.bump(SyntaxKind::NOT_KW);
304                self.bump(SyntaxKind::JSON_KW);
305                self.bump(SyntaxKind::SCALAR_KW);
306                grammar::opt_json_keys_unique_clause(self);
307                m.complete(self, SyntaxKind::IS_NOT_JSON_SCALAR);
308                return true;
309            }
310            SyntaxKind::IS_JSON_OBJECT => {
311                let m = self.start();
312                self.bump(SyntaxKind::IS_KW);
313                self.bump(SyntaxKind::JSON_KW);
314                self.bump(SyntaxKind::OBJECT_KW);
315                grammar::opt_json_keys_unique_clause(self);
316                m.complete(self, SyntaxKind::IS_JSON_OBJECT);
317                return true;
318            }
319            SyntaxKind::IS_JSON_ARRAY => {
320                let m = self.start();
321                self.bump(SyntaxKind::IS_KW);
322                self.bump(SyntaxKind::JSON_KW);
323                self.bump(SyntaxKind::ARRAY_KW);
324                grammar::opt_json_keys_unique_clause(self);
325                m.complete(self, SyntaxKind::IS_JSON_ARRAY);
326                return true;
327            }
328            SyntaxKind::IS_JSON_VALUE => {
329                let m = self.start();
330                self.bump(SyntaxKind::IS_KW);
331                self.bump(SyntaxKind::JSON_KW);
332                self.bump(SyntaxKind::VALUE_KW);
333                grammar::opt_json_keys_unique_clause(self);
334                m.complete(self, SyntaxKind::IS_JSON_VALUE);
335                return true;
336            }
337            SyntaxKind::IS_JSON_SCALAR => {
338                let m = self.start();
339                self.bump(SyntaxKind::IS_KW);
340                self.bump(SyntaxKind::JSON_KW);
341                self.bump(SyntaxKind::SCALAR_KW);
342                grammar::opt_json_keys_unique_clause(self);
343                m.complete(self, SyntaxKind::IS_JSON_SCALAR);
344                return true;
345            }
346            SyntaxKind::NOT_SIMILAR_TO => {
347                let m = self.start();
348                self.bump(SyntaxKind::NOT_KW);
349                self.bump(SyntaxKind::SIMILAR_KW);
350                self.bump(SyntaxKind::TO_KW);
351                m.complete(self, SyntaxKind::NOT_SIMILAR_TO);
352                return true;
353            }
354            SyntaxKind::IS_NOT_DISTINCT_FROM => {
355                let m = self.start();
356                self.bump(SyntaxKind::IS_KW);
357                self.bump(SyntaxKind::NOT_KW);
358                self.bump(SyntaxKind::DISTINCT_KW);
359                self.bump(SyntaxKind::FROM_KW);
360                m.complete(self, SyntaxKind::IS_NOT_DISTINCT_FROM);
361                return true;
362            }
363            SyntaxKind::OPERATOR_CALL => {
364                let m = self.start();
365                self.bump(SyntaxKind::OPERATOR_KW);
366                self.bump(SyntaxKind::L_PAREN);
367
368                // database.
369                if self.eat(SyntaxKind::IDENT) {
370                    self.expect(SyntaxKind::DOT);
371                }
372                // schema.
373                if self.eat(SyntaxKind::IDENT) {
374                    self.expect(SyntaxKind::DOT);
375                }
376
377                // +, -, etc.
378                match grammar::current_operator(self) {
379                    Some(kind) => {
380                        self.bump(kind);
381                    }
382                    None => {
383                        self.error("expected operator");
384                    }
385                }
386
387                self.expect(SyntaxKind::R_PAREN);
388                m.complete(self, SyntaxKind::OPERATOR_CALL);
389                return true;
390            }
391            SyntaxKind::IS_DISTINCT_FROM => {
392                let m = self.start();
393                self.bump(SyntaxKind::IS_KW);
394                self.bump(SyntaxKind::DISTINCT_KW);
395                self.bump(SyntaxKind::FROM_KW);
396                m.complete(self, SyntaxKind::IS_DISTINCT_FROM);
397                return true;
398            }
399            SyntaxKind::NOT_LIKE => {
400                let m = self.start();
401                self.bump(SyntaxKind::NOT_KW);
402                self.bump(SyntaxKind::LIKE_KW);
403                m.complete(self, SyntaxKind::NOT_LIKE);
404                return true;
405            }
406            SyntaxKind::NOT_ILIKE => {
407                let m = self.start();
408                self.bump(SyntaxKind::NOT_KW);
409                self.bump(SyntaxKind::ILIKE_KW);
410                m.complete(self, SyntaxKind::NOT_ILIKE);
411                return true;
412            }
413            SyntaxKind::NOT_IN => {
414                let m = self.start();
415                self.bump(SyntaxKind::NOT_KW);
416                self.bump(SyntaxKind::IN_KW);
417                m.complete(self, SyntaxKind::NOT_IN);
418                return true;
419            }
420            SyntaxKind::IS_NOT => {
421                let m = self.start();
422                self.bump(SyntaxKind::IS_KW);
423                self.bump(SyntaxKind::NOT_KW);
424                m.complete(self, SyntaxKind::IS_NOT);
425                return true;
426            }
427            // SyntaxKind::BYTE_STRING => {
428            //     let m = self.start();
429            //     self.bump(SyntaxKind::BYTE_STRING);
430            //     if self.eat(SyntaxKind::UESCAPE_KW) {
431            //         self.expect(SyntaxKind::STRING);
432            //     }
433            //     m.complete(self, SyntaxKind::BYTE_STRING);
434            //     return true;
435            // }
436            // SyntaxKind::IDENT => {
437            //     let m = self.start();
438            //     self.bump(SyntaxKind::IDENT);
439            //     if self.eat(SyntaxKind::UESCAPE_KW) {
440            //         self.expect(SyntaxKind::STRING);
441            //     }
442            //     m.complete(self, SyntaxKind::IDENT);
443            //     return true;
444            // }
445            SyntaxKind::CUSTOM_OP => {
446                let m = self.start();
447                while !self.at(SyntaxKind::EOF) {
448                    let is_joint = self.inp.is_joint(self.pos);
449                    if self.at_ts(OPERATOR_FIRST) {
450                        self.bump_any();
451                    } else {
452                        break;
453                    }
454                    if !is_joint {
455                        break;
456                    }
457                }
458                m.complete(self, SyntaxKind::CUSTOM_OP);
459                return true;
460            }
461            _ => 1,
462        };
463        self.do_bump(kind, n_raw_tokens);
464        true
465    }
466
467    fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, triva: TrivaBetween) -> bool {
468        let tokens_match =
469            self.inp.kind(self.pos + n) == k1 && self.inp.kind(self.pos + n + 1) == k2;
470        // We need to do this so we can say that:
471        // 1 > > 2, is not the same as 1 >> 2
472        match triva {
473            TrivaBetween::Allowed => tokens_match,
474            TrivaBetween::NotAllowed => {
475                return tokens_match
476                    && self.inp.is_joint(self.pos + n)
477                    && self.next_not_joined_op(n + 1);
478            }
479        }
480    }
481
482    fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool {
483        self.inp.kind(self.pos + n) == k1
484            && self.inp.kind(self.pos + n + 1) == k2
485            && self.inp.kind(self.pos + n + 2) == k3
486    }
487
488    fn at_composite4(
489        &self,
490        n: usize,
491        k1: SyntaxKind,
492        k2: SyntaxKind,
493        k3: SyntaxKind,
494        k4: SyntaxKind,
495    ) -> bool {
496        self.inp.kind(self.pos + n) == k1
497            && self.inp.kind(self.pos + n + 1) == k2
498            && self.inp.kind(self.pos + n + 2) == k3
499            && self.inp.kind(self.pos + n + 3) == k4
500    }
501
502    fn next_not_joined_op(&self, n: usize) -> bool {
503        let next = self.inp.kind(self.pos + n + 1);
504        // next isn't an operator so we know we're not joined to it
505        if !OPERATOR_FIRST.contains(next) {
506            return true;
507        }
508        // current kind isn't joined
509        if !self.inp.is_joint(self.pos + n) {
510            return true;
511        }
512        false
513    }
514
515    /// Checks if the current token is in `kinds`.
516    pub(crate) fn at_ts(&self, kinds: TokenSet) -> bool {
517        kinds.contains(self.current())
518    }
519
520    /// Starts a new node in the syntax tree. All nodes and tokens
521    /// consumed between the `start` and the corresponding `Marker::complete`
522    /// belong to the same node.
523    pub(crate) fn start(&mut self) -> Marker {
524        let pos = self.events.len() as u32;
525        self.push_event(Event::tombstone());
526        Marker::new(pos)
527    }
528
529    /// Consume the next token. Panics if the parser isn't currently at `kind`.
530    pub(crate) fn bump(&mut self, kind: SyntaxKind) {
531        assert!(self.eat(kind));
532    }
533
534    /// Advances the parser by one token
535    pub(crate) fn bump_any(&mut self) {
536        let kind = self.nth(0);
537        if kind == SyntaxKind::EOF {
538            return;
539        }
540        self.do_bump(kind, 1);
541    }
542
543    /// Advances the parser by one token
544    pub(crate) fn split_float(&mut self, mut marker: Marker) -> (bool, Marker) {
545        assert!(self.at(SyntaxKind::FLOAT_NUMBER));
546        // we have parse `<something>.`
547        // `<something>`.0.1
548        // here we need to insert an extra event
549        //
550        // `<something>`. 0. 1;
551        // here we need to change the follow up parse, the return value will cause us to emulate a dot
552        // the actual splitting happens later
553        let ends_in_dot = !self.inp.is_joint(self.pos);
554        if !ends_in_dot {
555            let new_marker = self.start();
556            let idx = marker.pos as usize;
557            match &mut self.events[idx] {
558                Event::Start {
559                    forward_parent,
560                    kind,
561                } => {
562                    *kind = SyntaxKind::FIELD_EXPR;
563                    *forward_parent = Some(new_marker.pos - marker.pos);
564                }
565                _ => unreachable!(),
566            }
567            marker.bomb.defuse();
568            marker = new_marker;
569        };
570        self.pos += 1;
571        self.push_event(Event::FloatSplitHack { ends_in_dot });
572        (ends_in_dot, marker)
573    }
574
575    /// Consume the next token if it is `kind` or emit an error
576    /// otherwise.
577    pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool {
578        if self.eat(kind) {
579            return true;
580        }
581        self.error(format!("expected {kind:?}"));
582        false
583    }
584
585    /// Create an error node and consume the next token.
586    pub(crate) fn err_and_bump(&mut self, message: &str) {
587        self.err_recover(message, TokenSet::EMPTY);
588    }
589
590    /// Create an error node and consume the next token.
591    pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) {
592        // TODO: maybe we actually want this?
593        // if matches!(self.current(), SyntaxKind::L_PAREN | SyntaxKind::R_PAREN) {
594        //     self.error(message);
595        //     return;
596        // }
597
598        if self.at_ts(recovery) {
599            self.error(message);
600            return;
601        }
602
603        let m = self.start();
604        self.error(message);
605        self.bump_any();
606        m.complete(self, SyntaxKind::ERROR);
607    }
608
609    fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) {
610        self.pos += n_raw_tokens as usize;
611        self.steps.set(0);
612        self.push_event(Event::Token { kind, n_raw_tokens });
613    }
614
615    fn push_event(&mut self, event: Event) {
616        self.events.push(event);
617    }
618
619    fn finish(self) -> Vec<Event> {
620        self.events
621    }
622
623    /// Emit error with the `message`
624    /// FIXME: this should be much more fancy and support
625    /// structured errors with spans and notes, like rustc
626    /// does.
627    pub(crate) fn error<T: Into<String>>(&mut self, message: T) {
628        let msg = message.into();
629        self.push_event(Event::Error { msg });
630    }
631
632    /// Checks if the current token is `kind`.
633    #[must_use]
634    pub(crate) fn at(&self, kind: SyntaxKind) -> bool {
635        self.nth_at(0, kind)
636    }
637
638    /// Checks if the nth token is in `kinds`.
639    #[must_use]
640    pub(crate) fn nth_at_ts(&self, n: usize, kinds: TokenSet) -> bool {
641        kinds.contains(self.nth(n))
642    }
643
644    #[must_use]
645    pub(crate) fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool {
646        match kind {
647            // =>
648            SyntaxKind::FAT_ARROW => self.at_composite2(
649                n,
650                SyntaxKind::EQ,
651                SyntaxKind::R_ANGLE,
652                TrivaBetween::NotAllowed,
653            ),
654            // :=
655            SyntaxKind::COLON_EQ => self.at_composite2(
656                n,
657                SyntaxKind::COLON,
658                SyntaxKind::EQ,
659                TrivaBetween::NotAllowed,
660            ),
661            // ::
662            SyntaxKind::COLON_COLON => self.at_composite2(
663                n,
664                SyntaxKind::COLON,
665                SyntaxKind::COLON,
666                TrivaBetween::NotAllowed,
667            ),
668            // !=
669            SyntaxKind::NEQ => self.at_composite2(
670                n,
671                SyntaxKind::BANG,
672                SyntaxKind::EQ,
673                TrivaBetween::NotAllowed,
674            ),
675            // <>
676            SyntaxKind::NEQB => self.at_composite2(
677                n,
678                SyntaxKind::L_ANGLE,
679                SyntaxKind::R_ANGLE,
680                TrivaBetween::NotAllowed,
681            ),
682            // is not
683            SyntaxKind::IS_NOT => self.at_composite2(
684                n,
685                SyntaxKind::IS_KW,
686                SyntaxKind::NOT_KW,
687                TrivaBetween::Allowed,
688            ),
689            // not like
690            SyntaxKind::NOT_LIKE => self.at_composite2(
691                n,
692                SyntaxKind::NOT_KW,
693                SyntaxKind::LIKE_KW,
694                TrivaBetween::Allowed,
695            ),
696            // not ilike
697            SyntaxKind::NOT_ILIKE => self.at_composite2(
698                n,
699                SyntaxKind::NOT_KW,
700                SyntaxKind::ILIKE_KW,
701                TrivaBetween::Allowed,
702            ),
703            // not in
704            SyntaxKind::NOT_IN => self.at_composite2(
705                n,
706                SyntaxKind::NOT_KW,
707                SyntaxKind::IN_KW,
708                TrivaBetween::Allowed,
709            ),
710            // at time zone
711            SyntaxKind::AT_TIME_ZONE => self.at_composite3(
712                n,
713                SyntaxKind::AT_KW,
714                SyntaxKind::TIME_KW,
715                SyntaxKind::ZONE_KW,
716            ),
717            // is distinct from
718            SyntaxKind::IS_DISTINCT_FROM => self.at_composite3(
719                n,
720                SyntaxKind::IS_KW,
721                SyntaxKind::DISTINCT_KW,
722                SyntaxKind::FROM_KW,
723            ),
724            // is not distinct from
725            SyntaxKind::IS_NOT_DISTINCT_FROM => self.at_composite4(
726                n,
727                SyntaxKind::IS_KW,
728                SyntaxKind::NOT_KW,
729                SyntaxKind::DISTINCT_KW,
730                SyntaxKind::FROM_KW,
731            ),
732            // is normalized
733            SyntaxKind::IS_NORMALIZED => {
734                if self.at(SyntaxKind::IS_KW) {
735                    if matches!(
736                        self.nth(1),
737                        SyntaxKind::NFC_KW
738                            | SyntaxKind::NFD_KW
739                            | SyntaxKind::NFKC_KW
740                            | SyntaxKind::NFKD_KW
741                    ) {
742                        if self.nth_at(2, SyntaxKind::NORMALIZED_KW) {
743                            return true;
744                        }
745                    } else {
746                        if self.nth_at(1, SyntaxKind::NORMALIZED_KW) {
747                            return true;
748                        }
749                    }
750                }
751                return false;
752            }
753            // is not normalized
754            SyntaxKind::IS_NOT_NORMALIZED => {
755                if self.at(SyntaxKind::IS_KW) && self.nth_at(1, SyntaxKind::NOT_KW) {
756                    if matches!(
757                        self.nth(2),
758                        SyntaxKind::NFC_KW
759                            | SyntaxKind::NFD_KW
760                            | SyntaxKind::NFKC_KW
761                            | SyntaxKind::NFKD_KW
762                    ) {
763                        if self.nth_at(3, SyntaxKind::NOT_KW)
764                            && self.nth_at(4, SyntaxKind::NORMALIZED_KW)
765                        {
766                            return true;
767                        }
768                    } else {
769                        if self.nth_at(2, SyntaxKind::NOT_KW)
770                            && self.nth_at(3, SyntaxKind::NORMALIZED_KW)
771                        {
772                            return true;
773                        }
774                    }
775                }
776                return false;
777            }
778            SyntaxKind::NOT_SIMILAR_TO => self.at_composite3(
779                n,
780                SyntaxKind::NOT_KW,
781                SyntaxKind::SIMILAR_KW,
782                SyntaxKind::TO_KW,
783            ),
784            // similar to
785            SyntaxKind::SIMILAR_TO => self.at_composite2(
786                n,
787                SyntaxKind::SIMILAR_KW,
788                SyntaxKind::TO_KW,
789                TrivaBetween::Allowed,
790            ),
791            // https://www.postgresql.org/docs/17/sql-expressions.html#SQL-EXPRESSIONS-OPERATOR-CALLS
792            // TODO: is this right?
793            SyntaxKind::OPERATOR_CALL => self.at_composite2(
794                n,
795                SyntaxKind::OPERATOR_KW,
796                SyntaxKind::L_PAREN,
797                TrivaBetween::Allowed,
798            ),
799            // is json
800            SyntaxKind::IS_JSON => self.at_composite2(
801                n,
802                SyntaxKind::IS_KW,
803                SyntaxKind::JSON_KW,
804                TrivaBetween::Allowed,
805            ),
806            // is not json
807            SyntaxKind::IS_NOT_JSON => self.at_composite3(
808                n,
809                SyntaxKind::IS_KW,
810                SyntaxKind::NOT_KW,
811                SyntaxKind::JSON_KW,
812            ),
813            // is not json object
814            SyntaxKind::IS_NOT_JSON_OBJECT => self.at_composite4(
815                n,
816                SyntaxKind::IS_KW,
817                SyntaxKind::NOT_KW,
818                SyntaxKind::JSON_KW,
819                SyntaxKind::OBJECT_KW,
820            ),
821            // is not json array
822            SyntaxKind::IS_NOT_JSON_ARRAY => self.at_composite4(
823                n,
824                SyntaxKind::IS_KW,
825                SyntaxKind::NOT_KW,
826                SyntaxKind::JSON_KW,
827                SyntaxKind::ARRAY_KW,
828            ),
829            // is not json value
830            SyntaxKind::IS_NOT_JSON_VALUE => self.at_composite4(
831                n,
832                SyntaxKind::IS_KW,
833                SyntaxKind::NOT_KW,
834                SyntaxKind::JSON_KW,
835                SyntaxKind::VALUE_KW,
836            ),
837            // is not json scalar
838            SyntaxKind::IS_NOT_JSON_SCALAR => self.at_composite4(
839                n,
840                SyntaxKind::IS_KW,
841                SyntaxKind::NOT_KW,
842                SyntaxKind::JSON_KW,
843                SyntaxKind::SCALAR_KW,
844            ),
845            // is json object
846            SyntaxKind::IS_JSON_OBJECT => self.at_composite3(
847                n,
848                SyntaxKind::IS_KW,
849                SyntaxKind::JSON_KW,
850                SyntaxKind::OBJECT_KW,
851            ),
852            // is json array
853            SyntaxKind::IS_JSON_ARRAY => self.at_composite3(
854                n,
855                SyntaxKind::IS_KW,
856                SyntaxKind::JSON_KW,
857                SyntaxKind::ARRAY_KW,
858            ),
859            // is json value
860            SyntaxKind::IS_JSON_VALUE => self.at_composite3(
861                n,
862                SyntaxKind::IS_KW,
863                SyntaxKind::JSON_KW,
864                SyntaxKind::VALUE_KW,
865            ),
866            // is json scalar
867            SyntaxKind::IS_JSON_SCALAR => self.at_composite3(
868                n,
869                SyntaxKind::IS_KW,
870                SyntaxKind::JSON_KW,
871                SyntaxKind::SCALAR_KW,
872            ),
873            // <=
874            SyntaxKind::LTEQ => self.at_composite2(
875                n,
876                SyntaxKind::L_ANGLE,
877                SyntaxKind::EQ,
878                TrivaBetween::NotAllowed,
879            ),
880            // <=
881            SyntaxKind::GTEQ => self.at_composite2(
882                n,
883                SyntaxKind::R_ANGLE,
884                SyntaxKind::EQ,
885                TrivaBetween::NotAllowed,
886            ),
887            SyntaxKind::CUSTOM_OP => {
888                // TODO: is this right?
889                if self.at_ts(OPERATOR_FIRST) {
890                    return true;
891                }
892                return false;
893            }
894            // TODO: we probably shouldn't be using a _ for this but be explicit for each type?
895            _ => self.inp.kind(self.pos + n) == kind,
896        }
897    }
898
899    /// Returns the kind of the current token.
900    /// If parser has already reached the end of input,
901    /// the special `EOF` kind is returned.
902    #[must_use]
903    pub(crate) fn current(&self) -> SyntaxKind {
904        self.nth(0)
905    }
906
907    /// Lookahead operation: returns the kind of the next nth
908    /// token.
909    #[must_use]
910    fn nth(&self, n: usize) -> SyntaxKind {
911        assert!(n <= 3);
912
913        let steps = self.steps.get();
914        assert!(
915            (steps as usize) < PARSER_STEP_LIMIT,
916            "the parser seems stuck"
917        );
918        self.steps.set(steps + 1);
919
920        self.inp.kind(self.pos + n)
921    }
922}