postgrest_query_parser/
lexer.rs

1use std::{iter::Enumerate, ops::Range};
2
3use peekmore::{PeekMore, PeekMoreIterator};
4
5pub struct Lexer<T>
6where
7    T: Iterator<Item = char>,
8{
9    tokens: PeekMoreIterator<Tokenizer<T>>,
10    previous: Option<TokenType>,
11}
12
13impl<T> Lexer<T>
14where
15    T: Iterator<Item = char>,
16{
17    fn current_is_char_and_next_is_not_char(&mut self, token_type: TokenType) -> bool {
18        let next = self.tokens.peek().map(|x| &x.token_type);
19
20        (token_type == TokenType::Char
21            && next != Some(&TokenType::Char)
22            && next != Some(&TokenType::Minus))
23            || self.next_will_be_arrow()
24    }
25
26    fn current_is_double_colon_and_next_is_double_colon(&mut self, token_type: TokenType) -> bool {
27        token_type == TokenType::DoubleColon
28            && self.tokens.peek().map(|x| &x.token_type) == Some(&TokenType::DoubleColon)
29    }
30
31    fn current_is_double_colon_and_previous_was_double_colon(
32        &mut self,
33        token_type: TokenType,
34    ) -> bool {
35        token_type == TokenType::DoubleColon && self.previous == Some(TokenType::DoubleColon)
36    }
37
38    fn current_is_minus_and_next_angle_bracket_close(&mut self, token_type: TokenType) -> bool {
39        token_type == TokenType::Minus && self.previous == Some(TokenType::AngleBracketsClose)
40    }
41
42    fn current_is_angle_bracket_close_and_previous_was_minus(
43        &mut self,
44        token_type: TokenType,
45    ) -> bool {
46        token_type == TokenType::AngleBracketsClose && self.previous == Some(TokenType::Minus)
47    }
48
49    fn set_previous(&mut self, token_type: TokenType) {
50        self.previous = Some(token_type);
51    }
52
53    fn next_will_be_arrow(&mut self) -> bool {
54        (self.tokens.peek().map(|x| &x.token_type)) == Some(&TokenType::Minus)
55            && (self.tokens.peek_nth(1).map(|x| &x.token_type))
56                == Some(&TokenType::AngleBracketsClose)
57    }
58}
59
60fn one_range(pos: usize) -> Range<usize> {
61    pos..(pos + 1)
62}
63
64fn as_single_item(token_type: TokenType, pos: usize) -> Option<Span> {
65    match token_type {
66        TokenType::Equal => Some(Span {
67            span_type: SpanType::Equal,
68            range: one_range(pos),
69        }),
70        TokenType::Ampersand => Some(Span {
71            span_type: SpanType::And,
72            range: one_range(pos),
73        }),
74        TokenType::Comma => Some(Span {
75            span_type: SpanType::Separator,
76            range: one_range(pos),
77        }),
78        TokenType::Questionmark => Some(Span {
79            span_type: SpanType::QueryStart,
80            range: one_range(pos),
81        }),
82        TokenType::Dot => Some(Span {
83            span_type: SpanType::PathSeparator,
84            range: one_range(pos),
85        }),
86        TokenType::RoundBracketsOpen => Some(Span {
87            span_type: SpanType::CaptureStart,
88            range: one_range(pos),
89        }),
90        TokenType::RoundBracketsClose => Some(Span {
91            span_type: SpanType::CaptureEnd,
92            range: one_range(pos),
93        }),
94        TokenType::CurlyBracketsOpen => Some(Span {
95            span_type: SpanType::ListStart,
96            range: one_range(pos),
97        }),
98        TokenType::CurlyBracketsClose => Some(Span {
99            span_type: SpanType::ListEnd,
100            range: one_range(pos),
101        }),
102        TokenType::Space => Some(Span {
103            span_type: SpanType::Empty,
104            range: one_range(pos),
105        }),
106        TokenType::DoubleColon => Some(Span {
107            span_type: SpanType::Alias,
108            range: one_range(pos),
109        }),
110        _ => None,
111    }
112}
113
114impl<T> Iterator for Lexer<T>
115where
116    T: Iterator<Item = char>,
117{
118    type Item = Span;
119
120    fn next(&mut self) -> Option<Self::Item> {
121        let mut start = None;
122        while let Some(token) = self.tokens.next() {
123            // if start.is_none() {
124            //     start = Some(token.pos)
125            // };
126
127            start = start.or(Some(token.pos));
128
129            let start_pos = start.expect("start is always set");
130
131            if self.current_is_double_colon_and_next_is_double_colon(token.token_type) {
132                self.set_previous(token.token_type);
133                continue;
134            }
135
136            if self.current_is_double_colon_and_previous_was_double_colon(token.token_type) {
137                let range = start_pos..(token.pos + 1);
138
139                self.set_previous(token.token_type);
140                return Some(Span {
141                    span_type: SpanType::Cast,
142                    range,
143                });
144            }
145
146            if self.current_is_minus_and_next_angle_bracket_close(token.token_type) {
147                self.set_previous(token.token_type);
148                continue;
149            }
150
151            if self.current_is_angle_bracket_close_and_previous_was_minus(token.token_type) {
152                if self.tokens.peek().map(|x| &x.token_type) == Some(&TokenType::AngleBracketsClose)
153                {
154                    // lets roll forward
155                    let token = self.tokens.next().unwrap();
156                    let range = start_pos..(token.pos + 1);
157                    self.set_previous(token.token_type);
158                    return Some(Span {
159                        span_type: SpanType::BinaryArrow,
160                        range,
161                    });
162                }
163                let range = start_pos..(token.pos + 1);
164
165                self.set_previous(token.token_type);
166                return Some(Span {
167                    span_type: SpanType::Arrow,
168                    range,
169                });
170            }
171
172            if let Some(span) = as_single_item(token.token_type, start_pos) {
173                self.set_previous(token.token_type);
174                return Some(span);
175            }
176
177            if self.current_is_char_and_next_is_not_char(token.token_type) {
178                let range = start_pos..(token.pos + 1);
179
180                self.set_previous(token.token_type);
181                return Some(Span {
182                    span_type: SpanType::String,
183                    range,
184                });
185            }
186
187            self.set_previous(token.token_type);
188        }
189
190        None
191    }
192}
193
194#[derive(Debug, PartialEq)]
195pub struct Span {
196    pub span_type: SpanType,
197    pub range: Range<usize>,
198}
199
200#[derive(Debug, PartialEq, Clone, Copy)]
201pub enum SpanType {
202    String,
203    Alias,
204    Cast,
205    Equal,
206    And,
207    Separator,
208    PathSeparator,
209    QueryStart,
210    CaptureStart,
211    CaptureEnd,
212    ListStart,
213    ListEnd,
214    Empty,
215    BinaryArrow,
216    Arrow,
217}
218
219impl<T> Lexer<T>
220where
221    T: Iterator<Item = char>,
222{
223    pub fn new(input: T) -> Lexer<T> {
224        Lexer {
225            tokens: Tokenizer::new(input).peekmore(),
226            previous: None,
227        }
228    }
229}
230
231#[derive(Debug)]
232pub struct Tokenizer<T>
233where
234    T: Iterator<Item = char>,
235{
236    input: Enumerate<T>,
237}
238
239impl<T> Iterator for Tokenizer<T>
240where
241    T: Iterator<Item = char>,
242{
243    type Item = Token;
244
245    fn next(&mut self) -> Option<Self::Item> {
246        if let Some((pos, ch)) = self.input.next() {
247            return Some(Token {
248                token_type: TokenType::from(ch),
249                pos,
250            });
251        }
252
253        None
254    }
255}
256
257impl<T> Tokenizer<T>
258where
259    T: Iterator<Item = char>,
260{
261    pub fn new(input: T) -> Tokenizer<T> {
262        Tokenizer {
263            input: input.enumerate(),
264        }
265    }
266}
267
268#[derive(Debug)]
269pub struct Token {
270    pub token_type: TokenType,
271    pub pos: usize,
272}
273
274#[derive(Debug, Clone, Copy, PartialEq)]
275pub enum TokenType {
276    Questionmark,
277    Equal,
278    Comma,
279    Dot,
280    DoubleColon,
281    Minus,
282    RoundBracketsOpen,
283    RoundBracketsClose,
284    SquareBracketsOpen,
285    SquareBracketsClose,
286    AngleBracketsOpen,
287    AngleBracketsClose,
288    CurlyBracketsOpen,
289    CurlyBracketsClose,
290    Ampersand,
291    Space,
292    Char,
293}
294
295impl From<char> for TokenType {
296    fn from(ch: char) -> TokenType {
297        use TokenType::*;
298
299        match ch {
300            '?' => Questionmark,
301            '=' => Equal,
302            ',' => Comma,
303            '.' => Dot,
304            ':' => DoubleColon,
305            '-' => Minus,
306            '>' => AngleBracketsClose,
307            '(' => RoundBracketsOpen,
308            ')' => RoundBracketsClose,
309            '[' => SquareBracketsOpen,
310            ']' => SquareBracketsClose,
311            '{' => CurlyBracketsOpen,
312            '}' => CurlyBracketsClose,
313            '&' => Ampersand,
314            ' ' => Space,
315            _ => Char,
316        }
317    }
318}
319
320// postgrest tests: https://github.com/PostgREST/postgrest/blob/main/test/spec/Feature/Query/QuerySpec.hs
321
322#[test]
323fn simple_select() {
324    use SpanType::*;
325    let input = "select=first_name,age";
326    let lexer = Lexer::new(input.chars());
327
328    let expected = vec![
329        (String, "select"),
330        (Equal, "="),
331        (String, "first_name"),
332        (Separator, ","),
333        (String, "age"),
334    ];
335
336    let mut out = Vec::new();
337    for x in lexer {
338        out.push((x.span_type, &input[x.range]));
339    }
340
341    assert_eq!(expected, out);
342}
343
344#[test]
345fn simple_query() {
346    use SpanType::*;
347
348    let input = "?id=not.eq.5&order=id";
349    let lexer = Lexer::new(input.chars());
350
351    let expected = vec![
352        (QueryStart, "?"),
353        (String, "id"),
354        (Equal, "="),
355        (String, "not"),
356        (PathSeparator, "."),
357        (String, "eq"),
358        (PathSeparator, "."),
359        (String, "5"),
360        (And, "&"),
361        (String, "order"),
362        (Equal, "="),
363        (String, "id"),
364    ];
365
366    let mut out = Vec::new();
367    for x in lexer {
368        out.push((x.span_type, &input[x.range]));
369    }
370
371    assert_eq!(expected, out);
372}
373
374#[test]
375fn or_statement_query() {
376    use SpanType::*;
377
378    let input = "?or=(text_search_vector.phfts(german).Art%20Spass, text_search_vector.phfts(french).amusant, text_search_vector.fts(english).impossible)";
379    let lexer = Lexer::new(input.chars());
380
381    let expected = vec![
382        (QueryStart, "?"),
383        (String, "or"),
384        (Equal, "="),
385        (CaptureStart, "("),
386        (String, "text_search_vector"),
387        (PathSeparator, "."),
388        (String, "phfts"),
389        (CaptureStart, "("),
390        (String, "german"),
391        (CaptureEnd, ")"),
392        (PathSeparator, "."),
393        (String, "Art%20Spass"),
394        (Separator, ","),
395        (Empty, " "),
396        (String, "text_search_vector"),
397        (PathSeparator, "."),
398        (String, "phfts"),
399        (CaptureStart, "("),
400        (String, "french"),
401        (CaptureEnd, ")"),
402        (PathSeparator, "."),
403        (String, "amusant"),
404        (Separator, ","),
405        (Empty, " "),
406        (String, "text_search_vector"),
407        (PathSeparator, "."),
408        (String, "fts"),
409        (CaptureStart, "("),
410        (String, "english"),
411        (CaptureEnd, ")"),
412        (PathSeparator, "."),
413        (String, "impossible"),
414        (CaptureEnd, ")"),
415    ];
416
417    let mut out = Vec::new();
418    for x in lexer {
419        out.push((x.span_type, &input[x.range]));
420    }
421
422    assert_eq!(expected, out);
423}
424
425#[test]
426fn nested_statement_query() {
427    use SpanType::*;
428
429    let input = "?select=id,projects(id,tasks(id,name))&projects.tasks.name=like.Design*";
430    let lexer = Lexer::new(input.chars());
431
432    let expected = vec![
433        (QueryStart, "?"),
434        (String, "select"),
435        (Equal, "="),
436        (String, "id"),
437        (Separator, ","),
438        (String, "projects"),
439        (CaptureStart, "("),
440        (String, "id"),
441        (Separator, ","),
442        (String, "tasks"),
443        (CaptureStart, "("),
444        (String, "id"),
445        (Separator, ","),
446        (String, "name"),
447        (CaptureEnd, ")"),
448        (CaptureEnd, ")"),
449        (And, "&"),
450        (String, "projects"),
451        (PathSeparator, "."),
452        (String, "tasks"),
453        (PathSeparator, "."),
454        (String, "name"),
455        (Equal, "="),
456        (String, "like"),
457        (PathSeparator, "."),
458        (String, "Design*"),
459    ];
460
461    let mut out = Vec::new();
462    for x in lexer {
463        out.push((x.span_type, &input[x.range]));
464    }
465
466    assert_eq!(expected, out);
467}
468
469#[test]
470fn statement_with_list_query() {
471    use SpanType::*;
472
473    let input = "?select=id&arr_data=cd.{1,2,4}";
474    let lexer = Lexer::new(input.chars());
475
476    let expected = vec![
477        (QueryStart, "?"),
478        (String, "select"),
479        (Equal, "="),
480        (String, "id"),
481        (And, "&"),
482        (String, "arr_data"),
483        (Equal, "="),
484        (String, "cd"),
485        (PathSeparator, "."),
486        (ListStart, "{"),
487        (String, "1"),
488        (Separator, ","),
489        (String, "2"),
490        (Separator, ","),
491        (String, "4"),
492        (ListEnd, "}"),
493    ];
494
495    let mut out = Vec::new();
496    for x in lexer {
497        out.push((x.span_type, &input[x.range]));
498    }
499
500    assert_eq!(expected, out);
501}
502
503#[test]
504fn typecast_statement_query() {
505    use SpanType::*;
506
507    let input = "select=clientId:id,oid_col::int,oid_array_col::_int4";
508    let lexer = Lexer::new(input.chars());
509
510    let expected = vec![
511        (String, "select"),
512        (Equal, "="),
513        (String, "clientId"),
514        (Alias, ":"),
515        (String, "id"),
516        (Separator, ","),
517        (String, "oid_col"),
518        (Cast, "::"),
519        (String, "int"),
520        (Separator, ","),
521        (String, "oid_array_col"),
522        (Cast, "::"),
523        (String, "_int4"),
524    ];
525
526    let mut out = Vec::new();
527    for x in lexer {
528        out.push((x.span_type, &input[x.range]));
529    }
530
531    assert_eq!(expected, out);
532}
533
534#[test]
535fn statement_with_escaped_characters_query() {
536    use SpanType::*;
537
538    let input = "?select=%22:arr-%3Eow::cast%22,%22(inside,parens)%22,%22a.dotted.column%22,%22%20%20col%20%20w%20%20space%20%20%22&%22*id*%22=eq.1";
539    let lexer = Lexer::new(input.chars());
540
541    let expected = vec![
542        (QueryStart, "?"),
543        (String, "select"),
544        (Equal, "="),
545        (String, "%22"),
546        (Alias, ":"),
547        (String, "arr-%3Eow"),
548        (Cast, "::"),
549        (String, "cast%22"),
550        (Separator, ","),
551        (String, "%22"),
552        (CaptureStart, "("),
553        (String, "inside"),
554        (Separator, ","),
555        (String, "parens"),
556        (CaptureEnd, ")"),
557        (String, "%22"),
558        (Separator, ","),
559        (String, "%22a"),
560        (PathSeparator, "."),
561        (String, "dotted"),
562        (PathSeparator, "."),
563        (String, "column%22"),
564        (Separator, ","),
565        (String, "%22%20%20col%20%20w%20%20space%20%20%22"),
566        (And, "&"),
567        (String, "%22*id*%22"),
568        (Equal, "="),
569        (String, "eq"),
570        (PathSeparator, "."),
571        (String, "1"),
572    ];
573
574    let mut out = Vec::new();
575    for x in lexer {
576        out.push((x.span_type, &input[x.range]));
577    }
578
579    assert_eq!(expected, out);
580}
581
582#[test]
583fn statement_with_json_query() {
584    use SpanType::*;
585
586    let input = "select=id,json_data->>blood_type,json_data->phones";
587    let lexer = Lexer::new(input.chars());
588
589    let expected = vec![
590        (String, "select"),
591        (Equal, "="),
592        (String, "id"),
593        (Separator, ","),
594        (String, "json_data"),
595        (BinaryArrow, "->>"),
596        (String, "blood_type"),
597        (Separator, ","),
598        (String, "json_data"),
599        (Arrow, "->"),
600        (String, "phones"),
601    ];
602
603    let mut out = Vec::new();
604    for x in lexer {
605        out.push((x.span_type, &input[x.range]));
606    }
607
608    assert_eq!(expected, out);
609}