1use std::{iter::Enumerate, ops::Range};
2
3use peekmore::{PeekMore, PeekMoreIterator};
4
5pub struct Lexer<T>
6where
7 T: Iterator<Item = char>,
8{
9 tokens: PeekMoreIterator<Tokenizer<T>>,
10 previous: Option<TokenType>,
11}
12
13impl<T> Lexer<T>
14where
15 T: Iterator<Item = char>,
16{
17 fn current_is_char_and_next_is_not_char(&mut self, token_type: TokenType) -> bool {
18 let next = self.tokens.peek().map(|x| &x.token_type);
19
20 (token_type == TokenType::Char
21 && next != Some(&TokenType::Char)
22 && next != Some(&TokenType::Minus))
23 || self.next_will_be_arrow()
24 }
25
26 fn current_is_double_colon_and_next_is_double_colon(&mut self, token_type: TokenType) -> bool {
27 token_type == TokenType::DoubleColon
28 && self.tokens.peek().map(|x| &x.token_type) == Some(&TokenType::DoubleColon)
29 }
30
31 fn current_is_double_colon_and_previous_was_double_colon(
32 &mut self,
33 token_type: TokenType,
34 ) -> bool {
35 token_type == TokenType::DoubleColon && self.previous == Some(TokenType::DoubleColon)
36 }
37
38 fn current_is_minus_and_next_angle_bracket_close(&mut self, token_type: TokenType) -> bool {
39 token_type == TokenType::Minus && self.previous == Some(TokenType::AngleBracketsClose)
40 }
41
42 fn current_is_angle_bracket_close_and_previous_was_minus(
43 &mut self,
44 token_type: TokenType,
45 ) -> bool {
46 token_type == TokenType::AngleBracketsClose && self.previous == Some(TokenType::Minus)
47 }
48
49 fn set_previous(&mut self, token_type: TokenType) {
50 self.previous = Some(token_type);
51 }
52
53 fn next_will_be_arrow(&mut self) -> bool {
54 (self.tokens.peek().map(|x| &x.token_type)) == Some(&TokenType::Minus)
55 && (self.tokens.peek_nth(1).map(|x| &x.token_type))
56 == Some(&TokenType::AngleBracketsClose)
57 }
58}
59
60fn one_range(pos: usize) -> Range<usize> {
61 pos..(pos + 1)
62}
63
64fn as_single_item(token_type: TokenType, pos: usize) -> Option<Span> {
65 match token_type {
66 TokenType::Equal => Some(Span {
67 span_type: SpanType::Equal,
68 range: one_range(pos),
69 }),
70 TokenType::Ampersand => Some(Span {
71 span_type: SpanType::And,
72 range: one_range(pos),
73 }),
74 TokenType::Comma => Some(Span {
75 span_type: SpanType::Separator,
76 range: one_range(pos),
77 }),
78 TokenType::Questionmark => Some(Span {
79 span_type: SpanType::QueryStart,
80 range: one_range(pos),
81 }),
82 TokenType::Dot => Some(Span {
83 span_type: SpanType::PathSeparator,
84 range: one_range(pos),
85 }),
86 TokenType::RoundBracketsOpen => Some(Span {
87 span_type: SpanType::CaptureStart,
88 range: one_range(pos),
89 }),
90 TokenType::RoundBracketsClose => Some(Span {
91 span_type: SpanType::CaptureEnd,
92 range: one_range(pos),
93 }),
94 TokenType::CurlyBracketsOpen => Some(Span {
95 span_type: SpanType::ListStart,
96 range: one_range(pos),
97 }),
98 TokenType::CurlyBracketsClose => Some(Span {
99 span_type: SpanType::ListEnd,
100 range: one_range(pos),
101 }),
102 TokenType::Space => Some(Span {
103 span_type: SpanType::Empty,
104 range: one_range(pos),
105 }),
106 TokenType::DoubleColon => Some(Span {
107 span_type: SpanType::Alias,
108 range: one_range(pos),
109 }),
110 _ => None,
111 }
112}
113
114impl<T> Iterator for Lexer<T>
115where
116 T: Iterator<Item = char>,
117{
118 type Item = Span;
119
120 fn next(&mut self) -> Option<Self::Item> {
121 let mut start = None;
122 while let Some(token) = self.tokens.next() {
123 start = start.or(Some(token.pos));
128
129 let start_pos = start.expect("start is always set");
130
131 if self.current_is_double_colon_and_next_is_double_colon(token.token_type) {
132 self.set_previous(token.token_type);
133 continue;
134 }
135
136 if self.current_is_double_colon_and_previous_was_double_colon(token.token_type) {
137 let range = start_pos..(token.pos + 1);
138
139 self.set_previous(token.token_type);
140 return Some(Span {
141 span_type: SpanType::Cast,
142 range,
143 });
144 }
145
146 if self.current_is_minus_and_next_angle_bracket_close(token.token_type) {
147 self.set_previous(token.token_type);
148 continue;
149 }
150
151 if self.current_is_angle_bracket_close_and_previous_was_minus(token.token_type) {
152 if self.tokens.peek().map(|x| &x.token_type) == Some(&TokenType::AngleBracketsClose)
153 {
154 let token = self.tokens.next().unwrap();
156 let range = start_pos..(token.pos + 1);
157 self.set_previous(token.token_type);
158 return Some(Span {
159 span_type: SpanType::BinaryArrow,
160 range,
161 });
162 }
163 let range = start_pos..(token.pos + 1);
164
165 self.set_previous(token.token_type);
166 return Some(Span {
167 span_type: SpanType::Arrow,
168 range,
169 });
170 }
171
172 if let Some(span) = as_single_item(token.token_type, start_pos) {
173 self.set_previous(token.token_type);
174 return Some(span);
175 }
176
177 if self.current_is_char_and_next_is_not_char(token.token_type) {
178 let range = start_pos..(token.pos + 1);
179
180 self.set_previous(token.token_type);
181 return Some(Span {
182 span_type: SpanType::String,
183 range,
184 });
185 }
186
187 self.set_previous(token.token_type);
188 }
189
190 None
191 }
192}
193
194#[derive(Debug, PartialEq)]
195pub struct Span {
196 pub span_type: SpanType,
197 pub range: Range<usize>,
198}
199
200#[derive(Debug, PartialEq, Clone, Copy)]
201pub enum SpanType {
202 String,
203 Alias,
204 Cast,
205 Equal,
206 And,
207 Separator,
208 PathSeparator,
209 QueryStart,
210 CaptureStart,
211 CaptureEnd,
212 ListStart,
213 ListEnd,
214 Empty,
215 BinaryArrow,
216 Arrow,
217}
218
219impl<T> Lexer<T>
220where
221 T: Iterator<Item = char>,
222{
223 pub fn new(input: T) -> Lexer<T> {
224 Lexer {
225 tokens: Tokenizer::new(input).peekmore(),
226 previous: None,
227 }
228 }
229}
230
231#[derive(Debug)]
232pub struct Tokenizer<T>
233where
234 T: Iterator<Item = char>,
235{
236 input: Enumerate<T>,
237}
238
239impl<T> Iterator for Tokenizer<T>
240where
241 T: Iterator<Item = char>,
242{
243 type Item = Token;
244
245 fn next(&mut self) -> Option<Self::Item> {
246 if let Some((pos, ch)) = self.input.next() {
247 return Some(Token {
248 token_type: TokenType::from(ch),
249 pos,
250 });
251 }
252
253 None
254 }
255}
256
257impl<T> Tokenizer<T>
258where
259 T: Iterator<Item = char>,
260{
261 pub fn new(input: T) -> Tokenizer<T> {
262 Tokenizer {
263 input: input.enumerate(),
264 }
265 }
266}
267
268#[derive(Debug)]
269pub struct Token {
270 pub token_type: TokenType,
271 pub pos: usize,
272}
273
274#[derive(Debug, Clone, Copy, PartialEq)]
275pub enum TokenType {
276 Questionmark,
277 Equal,
278 Comma,
279 Dot,
280 DoubleColon,
281 Minus,
282 RoundBracketsOpen,
283 RoundBracketsClose,
284 SquareBracketsOpen,
285 SquareBracketsClose,
286 AngleBracketsOpen,
287 AngleBracketsClose,
288 CurlyBracketsOpen,
289 CurlyBracketsClose,
290 Ampersand,
291 Space,
292 Char,
293}
294
295impl From<char> for TokenType {
296 fn from(ch: char) -> TokenType {
297 use TokenType::*;
298
299 match ch {
300 '?' => Questionmark,
301 '=' => Equal,
302 ',' => Comma,
303 '.' => Dot,
304 ':' => DoubleColon,
305 '-' => Minus,
306 '>' => AngleBracketsClose,
307 '(' => RoundBracketsOpen,
308 ')' => RoundBracketsClose,
309 '[' => SquareBracketsOpen,
310 ']' => SquareBracketsClose,
311 '{' => CurlyBracketsOpen,
312 '}' => CurlyBracketsClose,
313 '&' => Ampersand,
314 ' ' => Space,
315 _ => Char,
316 }
317 }
318}
319
320#[test]
323fn simple_select() {
324 use SpanType::*;
325 let input = "select=first_name,age";
326 let lexer = Lexer::new(input.chars());
327
328 let expected = vec![
329 (String, "select"),
330 (Equal, "="),
331 (String, "first_name"),
332 (Separator, ","),
333 (String, "age"),
334 ];
335
336 let mut out = Vec::new();
337 for x in lexer {
338 out.push((x.span_type, &input[x.range]));
339 }
340
341 assert_eq!(expected, out);
342}
343
344#[test]
345fn simple_query() {
346 use SpanType::*;
347
348 let input = "?id=not.eq.5&order=id";
349 let lexer = Lexer::new(input.chars());
350
351 let expected = vec![
352 (QueryStart, "?"),
353 (String, "id"),
354 (Equal, "="),
355 (String, "not"),
356 (PathSeparator, "."),
357 (String, "eq"),
358 (PathSeparator, "."),
359 (String, "5"),
360 (And, "&"),
361 (String, "order"),
362 (Equal, "="),
363 (String, "id"),
364 ];
365
366 let mut out = Vec::new();
367 for x in lexer {
368 out.push((x.span_type, &input[x.range]));
369 }
370
371 assert_eq!(expected, out);
372}
373
374#[test]
375fn or_statement_query() {
376 use SpanType::*;
377
378 let input = "?or=(text_search_vector.phfts(german).Art%20Spass, text_search_vector.phfts(french).amusant, text_search_vector.fts(english).impossible)";
379 let lexer = Lexer::new(input.chars());
380
381 let expected = vec![
382 (QueryStart, "?"),
383 (String, "or"),
384 (Equal, "="),
385 (CaptureStart, "("),
386 (String, "text_search_vector"),
387 (PathSeparator, "."),
388 (String, "phfts"),
389 (CaptureStart, "("),
390 (String, "german"),
391 (CaptureEnd, ")"),
392 (PathSeparator, "."),
393 (String, "Art%20Spass"),
394 (Separator, ","),
395 (Empty, " "),
396 (String, "text_search_vector"),
397 (PathSeparator, "."),
398 (String, "phfts"),
399 (CaptureStart, "("),
400 (String, "french"),
401 (CaptureEnd, ")"),
402 (PathSeparator, "."),
403 (String, "amusant"),
404 (Separator, ","),
405 (Empty, " "),
406 (String, "text_search_vector"),
407 (PathSeparator, "."),
408 (String, "fts"),
409 (CaptureStart, "("),
410 (String, "english"),
411 (CaptureEnd, ")"),
412 (PathSeparator, "."),
413 (String, "impossible"),
414 (CaptureEnd, ")"),
415 ];
416
417 let mut out = Vec::new();
418 for x in lexer {
419 out.push((x.span_type, &input[x.range]));
420 }
421
422 assert_eq!(expected, out);
423}
424
425#[test]
426fn nested_statement_query() {
427 use SpanType::*;
428
429 let input = "?select=id,projects(id,tasks(id,name))&projects.tasks.name=like.Design*";
430 let lexer = Lexer::new(input.chars());
431
432 let expected = vec![
433 (QueryStart, "?"),
434 (String, "select"),
435 (Equal, "="),
436 (String, "id"),
437 (Separator, ","),
438 (String, "projects"),
439 (CaptureStart, "("),
440 (String, "id"),
441 (Separator, ","),
442 (String, "tasks"),
443 (CaptureStart, "("),
444 (String, "id"),
445 (Separator, ","),
446 (String, "name"),
447 (CaptureEnd, ")"),
448 (CaptureEnd, ")"),
449 (And, "&"),
450 (String, "projects"),
451 (PathSeparator, "."),
452 (String, "tasks"),
453 (PathSeparator, "."),
454 (String, "name"),
455 (Equal, "="),
456 (String, "like"),
457 (PathSeparator, "."),
458 (String, "Design*"),
459 ];
460
461 let mut out = Vec::new();
462 for x in lexer {
463 out.push((x.span_type, &input[x.range]));
464 }
465
466 assert_eq!(expected, out);
467}
468
469#[test]
470fn statement_with_list_query() {
471 use SpanType::*;
472
473 let input = "?select=id&arr_data=cd.{1,2,4}";
474 let lexer = Lexer::new(input.chars());
475
476 let expected = vec![
477 (QueryStart, "?"),
478 (String, "select"),
479 (Equal, "="),
480 (String, "id"),
481 (And, "&"),
482 (String, "arr_data"),
483 (Equal, "="),
484 (String, "cd"),
485 (PathSeparator, "."),
486 (ListStart, "{"),
487 (String, "1"),
488 (Separator, ","),
489 (String, "2"),
490 (Separator, ","),
491 (String, "4"),
492 (ListEnd, "}"),
493 ];
494
495 let mut out = Vec::new();
496 for x in lexer {
497 out.push((x.span_type, &input[x.range]));
498 }
499
500 assert_eq!(expected, out);
501}
502
503#[test]
504fn typecast_statement_query() {
505 use SpanType::*;
506
507 let input = "select=clientId:id,oid_col::int,oid_array_col::_int4";
508 let lexer = Lexer::new(input.chars());
509
510 let expected = vec![
511 (String, "select"),
512 (Equal, "="),
513 (String, "clientId"),
514 (Alias, ":"),
515 (String, "id"),
516 (Separator, ","),
517 (String, "oid_col"),
518 (Cast, "::"),
519 (String, "int"),
520 (Separator, ","),
521 (String, "oid_array_col"),
522 (Cast, "::"),
523 (String, "_int4"),
524 ];
525
526 let mut out = Vec::new();
527 for x in lexer {
528 out.push((x.span_type, &input[x.range]));
529 }
530
531 assert_eq!(expected, out);
532}
533
534#[test]
535fn statement_with_escaped_characters_query() {
536 use SpanType::*;
537
538 let input = "?select=%22:arr-%3Eow::cast%22,%22(inside,parens)%22,%22a.dotted.column%22,%22%20%20col%20%20w%20%20space%20%20%22&%22*id*%22=eq.1";
539 let lexer = Lexer::new(input.chars());
540
541 let expected = vec![
542 (QueryStart, "?"),
543 (String, "select"),
544 (Equal, "="),
545 (String, "%22"),
546 (Alias, ":"),
547 (String, "arr-%3Eow"),
548 (Cast, "::"),
549 (String, "cast%22"),
550 (Separator, ","),
551 (String, "%22"),
552 (CaptureStart, "("),
553 (String, "inside"),
554 (Separator, ","),
555 (String, "parens"),
556 (CaptureEnd, ")"),
557 (String, "%22"),
558 (Separator, ","),
559 (String, "%22a"),
560 (PathSeparator, "."),
561 (String, "dotted"),
562 (PathSeparator, "."),
563 (String, "column%22"),
564 (Separator, ","),
565 (String, "%22%20%20col%20%20w%20%20space%20%20%22"),
566 (And, "&"),
567 (String, "%22*id*%22"),
568 (Equal, "="),
569 (String, "eq"),
570 (PathSeparator, "."),
571 (String, "1"),
572 ];
573
574 let mut out = Vec::new();
575 for x in lexer {
576 out.push((x.span_type, &input[x.range]));
577 }
578
579 assert_eq!(expected, out);
580}
581
582#[test]
583fn statement_with_json_query() {
584 use SpanType::*;
585
586 let input = "select=id,json_data->>blood_type,json_data->phones";
587 let lexer = Lexer::new(input.chars());
588
589 let expected = vec![
590 (String, "select"),
591 (Equal, "="),
592 (String, "id"),
593 (Separator, ","),
594 (String, "json_data"),
595 (BinaryArrow, "->>"),
596 (String, "blood_type"),
597 (Separator, ","),
598 (String, "json_data"),
599 (Arrow, "->"),
600 (String, "phones"),
601 ];
602
603 let mut out = Vec::new();
604 for x in lexer {
605 out.push((x.span_type, &input[x.range]));
606 }
607
608 assert_eq!(expected, out);
609}