1use std::borrow::Cow;
2use unicode_categories::UnicodeCategories;
3use winnow::ascii::{digit0, digit1, till_line_ending, Caseless};
4use winnow::combinator::{alt, dispatch, eof, fail, opt, peek, terminated};
5use winnow::error::ContextError;
6use winnow::error::ParserError;
7use winnow::prelude::*;
8use winnow::token::{any, one_of, rest, take, take_until, take_while};
9use winnow::Result;
10
11use crate::{Dialect, FormatOptions};
12
13pub(crate) fn tokenize<'a>(
14 mut input: &'a str,
15 named_placeholders: bool,
16 options: &FormatOptions,
17) -> Vec<Token<'a>> {
18 let mut tokens: Vec<Token> = Vec::new();
19
20 let mut last_non_whitespace_token = None;
21 let mut last_reserved_token = None;
22 let mut last_reserved_top_level_token = None;
23
24 if let Ok(Some(result)) = opt(get_whitespace_token).parse_next(&mut input) {
25 tokens.push(result);
26 }
27
28 while let Ok(mut result) = get_next_token(
30 &mut input,
31 last_non_whitespace_token.clone(),
32 last_reserved_token.clone(),
33 last_reserved_top_level_token.clone(),
34 named_placeholders,
35 options.dialect,
36 ) {
37 match result.kind {
38 TokenKind::Reserved => {
39 last_reserved_token = Some(result.clone());
40 }
41 TokenKind::ReservedTopLevel => {
42 last_reserved_top_level_token = Some(result.clone());
43 }
44 TokenKind::Join => {
45 if options.joins_as_top_level {
46 result.kind = TokenKind::ReservedTopLevel;
47 } else {
48 result.kind = TokenKind::ReservedNewline;
49 }
50 }
51 _ => {}
52 }
53
54 if result.kind != TokenKind::Whitespace {
55 last_non_whitespace_token = Some(result.clone());
56 }
57
58 tokens.push(result);
59
60 if let Ok(Some(result)) = opt(get_whitespace_token).parse_next(&mut input) {
61 tokens.push(result);
62 }
63 }
64 tokens
65}
66
67#[derive(Debug, Clone)]
68pub(crate) struct Token<'a> {
69 pub kind: TokenKind,
70 pub value: &'a str,
71 pub key: Option<PlaceholderKind<'a>>,
73 pub alias: &'a str,
75}
76
77#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
78pub(crate) enum TokenKind {
79 TypeSpecifier,
80 Whitespace,
81 String,
82 Reserved,
83 ReservedTopLevel,
84 ReservedTopLevelNoIndent,
85 ReservedNewline,
86 ReservedNewlineAfter,
87 Operator,
88 OpenParen,
89 CloseParen,
90 LineComment,
91 BlockComment,
92 Number,
93 Placeholder,
94 Word,
95 Join,
96}
97
98#[derive(Debug, Clone)]
99pub(crate) enum PlaceholderKind<'a> {
100 Named(Cow<'a, str>),
101 ZeroIndexed(usize),
102 OneIndexed(usize),
103}
104
105impl<'a> PlaceholderKind<'a> {
106 pub fn named(&'a self) -> &'a str {
107 match self {
108 PlaceholderKind::Named(val) => val.as_ref(),
109 _ => "",
110 }
111 }
112
113 pub fn indexed(&self) -> Option<usize> {
114 match self {
115 PlaceholderKind::ZeroIndexed(val) => Some(*val),
116 PlaceholderKind::OneIndexed(val) => Some(*val - 1),
117 _ => None,
118 }
119 }
120}
121
122fn get_next_token<'a>(
123 input: &mut &'a str,
124 previous_token: Option<Token<'a>>,
125 last_reserved_token: Option<Token<'a>>,
126 last_reserved_top_level_token: Option<Token<'a>>,
127 named_placeholders: bool,
128 dialect: Dialect,
129) -> Result<Token<'a>> {
130 alt((
131 get_comment_token,
132 |input: &mut _| get_type_specifier_token(input, previous_token.clone()),
133 |input: &mut _| get_string_token(input, dialect),
134 |input: &mut _| get_open_paren_token(input, dialect),
135 |input: &mut _| get_close_paren_token(input, dialect),
136 get_number_token,
137 |input: &mut _| {
138 get_reserved_word_token(
139 input,
140 previous_token.clone(),
141 last_reserved_token.clone(),
142 last_reserved_top_level_token.clone(),
143 )
144 },
145 get_operator_token,
146 |input: &mut _| get_placeholder_token(input, named_placeholders, dialect),
147 get_word_token,
148 get_any_other_char,
149 ))
150 .parse_next(input)
151}
152fn get_type_specifier_token<'i>(
153 input: &mut &'i str,
154 previous_token: Option<Token<'i>>,
155) -> Result<Token<'i>> {
156 if previous_token.is_some_and(|token| {
157 ![
158 TokenKind::CloseParen,
159 TokenKind::Placeholder,
160 TokenKind::Reserved,
161 TokenKind::String,
162 TokenKind::Number,
163 TokenKind::TypeSpecifier,
164 TokenKind::Word,
165 ]
166 .contains(&token.kind)
167 }) {
168 fail.parse_next(input)
169 } else {
170 alt(("::", "[]")).parse_next(input).map(|token| Token {
171 kind: TokenKind::TypeSpecifier,
172 value: token,
173 key: None,
174 alias: token,
175 })
176 }
177}
178fn get_whitespace_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
179 take_while(1.., char::is_whitespace)
180 .parse_next(input)
181 .map(|token| Token {
182 kind: TokenKind::Whitespace,
183 value: token,
184 key: None,
185 alias: token,
186 })
187}
188
189fn get_comment_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
190 dispatch! {any;
191 '#' => till_line_ending.value(TokenKind::LineComment),
192 '-' => ('-', till_line_ending).value(TokenKind::LineComment),
193 '/' => ('*', alt((take_until(0.., "*/"), rest)), opt(take(2usize))).value(TokenKind::BlockComment),
194 _ => fail,
195 }
196 .with_taken()
197 .parse_next(input)
198 .map(|(kind, token)| Token {
199 kind,
200 value: token,
201 key: None,
202 alias: token,
203 })
204}
205
206pub fn take_till_escaping<'a>(
207 desired: char,
208 escapes: &'static [char],
209) -> impl Parser<&'a str, &'a str, ContextError> {
210 move |input: &mut &'a str| {
211 let mut chars = input.char_indices().peekable();
212 loop {
213 let item = chars.next();
214 let next = chars.peek().map(|item| item.1);
215 match item {
216 Some((byte_pos, item)) => {
217 if escapes.contains(&item) && next.map(|n| n == desired).unwrap_or(false) {
219 chars.next();
221 continue;
222 }
223
224 if item == desired {
225 return Ok(input.next_slice(byte_pos));
226 }
227 }
228 None => {
229 return rest.parse_next(input);
230 }
231 }
232 }
233 }
234}
235
236fn get_string_token<'i>(input: &mut &'i str, dialect: Dialect) -> Result<Token<'i>> {
244 dispatch! {any;
245 '`' => (take_till_escaping('`', &['`']), any).void(),
246 '[' if dialect == Dialect::SQLServer => (take_till_escaping(']', &[']']), any).void(),
247 '"' => (take_till_escaping('"', &['"', '\\']), any).void(),
248 '\'' => (take_till_escaping('\'', &['\'', '\\']), any).void(),
249 'N' => ('\'', take_till_escaping('\'', &['\'', '\\']), any).void(),
250 'E' => ('\'', take_till_escaping('\'', &['\'', '\\']), any).void(),
251 'x' => ('\'', take_till_escaping('\'', &[]), any).void(),
252 'X' => ('\'', take_till_escaping('\'', &[]), any).void(),
253 _ => fail,
254 }
255 .take()
256 .parse_next(input)
257 .map(|token| Token {
258 kind: TokenKind::String,
259 value: token,
260 key: None,
261 alias: token,
262 })
263}
264
265fn get_placeholder_string_token<'i>(input: &mut &'i str, dialect: Dialect) -> Result<Token<'i>> {
267 dispatch! {any;
268 '`'=>( take_till_escaping('`', &['`']), any).void(),
269 '[' if dialect == Dialect::SQLServer =>( take_till_escaping(']', &[']']), any).void(),
270 '"'=>( take_till_escaping('"', &['\\']), any).void(),
271 '\''=>( take_till_escaping('\'', &['\\']), any).void(),
272 'N' =>('\'', take_till_escaping('\'', &['\\']), any).void(),
273 _ => fail,
274 }
275 .take()
276 .parse_next(input)
277 .map(|token| Token {
278 kind: TokenKind::String,
279 value: token,
280 key: None,
281 alias: token,
282 })
283}
284
285fn get_open_paren_token<'i>(input: &mut &'i str, dialect: Dialect) -> Result<Token<'i>> {
286 let case = terminated(Caseless("CASE"), end_of_word);
287 let open_paren = if dialect == Dialect::PostgreSql {
288 ("(", "[", case)
289 } else {
290 ("(", "(", case)
291 };
292
293 alt(open_paren).parse_next(input).map(|token| Token {
294 kind: TokenKind::OpenParen,
295 value: token,
296 key: None,
297 alias: token,
298 })
299}
300
301fn get_close_paren_token<'i>(input: &mut &'i str, dialect: Dialect) -> Result<Token<'i>> {
302 let end = terminated(Caseless("END"), end_of_word);
303 let close_paren = if dialect == Dialect::PostgreSql {
304 (")", "]", end)
305 } else {
306 (")", ")", end)
307 };
308 alt(close_paren).parse_next(input).map(|token| Token {
309 kind: TokenKind::CloseParen,
310 value: token,
311 key: None,
312 alias: token,
313 })
314}
315
316fn get_placeholder_token<'i>(
317 input: &mut &'i str,
318 named_placeholders: bool,
319 dialect: Dialect,
320) -> Result<Token<'i>> {
321 if named_placeholders {
325 alt((
326 get_ident_named_placeholder_token,
327 |input: &mut _| get_string_named_placeholder_token(input, dialect),
328 get_indexed_placeholder_token,
329 ))
330 .parse_next(input)
331 } else {
332 alt((
333 get_indexed_placeholder_token,
334 get_ident_named_placeholder_token,
335 |input: &mut _| get_string_named_placeholder_token(input, dialect),
336 ))
337 .parse_next(input)
338 }
339}
340
341fn get_indexed_placeholder_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
342 alt(((one_of(('?', '$')), digit1).take(), "?"))
343 .parse_next(input)
344 .map(|token| Token {
345 kind: TokenKind::Placeholder,
346 value: token,
347 key: if token.len() > 1 {
348 if let Ok(index) = token[1..].parse::<usize>() {
349 Some(if token.starts_with('$') {
350 PlaceholderKind::OneIndexed(index)
351 } else {
352 PlaceholderKind::ZeroIndexed(index)
353 })
354 } else {
355 None
356 }
357 } else {
358 None
359 },
360 alias: token,
361 })
362}
363
364fn get_ident_named_placeholder_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
365 (
366 one_of(('@', ':', '$')),
367 take_while(1.., |item: char| {
368 item.is_alphanumeric() || item == '.' || item == '_' || item == '$'
369 }),
370 )
371 .take()
372 .parse_next(input)
373 .map(|token| {
374 let index = Cow::Borrowed(&token[1..]);
375 Token {
376 kind: TokenKind::Placeholder,
377 value: token,
378 key: Some(PlaceholderKind::Named(index)),
379 alias: token,
380 }
381 })
382}
383
384fn get_string_named_placeholder_token<'i>(
385 input: &mut &'i str,
386 dialect: Dialect,
387) -> Result<Token<'i>> {
388 (one_of(('@', ':')), |input: &mut _| {
389 get_placeholder_string_token(input, dialect)
390 })
391 .take()
392 .parse_next(input)
393 .map(|token| {
394 let index =
395 get_escaped_placeholder_key(&token[2..token.len() - 1], &token[token.len() - 1..]);
396 Token {
397 kind: TokenKind::Placeholder,
398 value: token,
399 key: Some(PlaceholderKind::Named(index)),
400 alias: token,
401 }
402 })
403}
404
405fn get_escaped_placeholder_key<'a>(key: &'a str, quote_char: &str) -> Cow<'a, str> {
406 Cow::Owned(key.replace(&format!("\\{}", quote_char), quote_char))
407}
408
409fn get_number_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
410 (opt("-"), alt((scientific_notation, decimal_number, digit1)))
411 .take()
412 .parse_next(input)
413 .map(|token| Token {
414 kind: TokenKind::Number,
415 value: token,
416 key: None,
417 alias: token,
418 })
419}
420
421fn decimal_number<'i>(input: &mut &'i str) -> Result<&'i str> {
422 (digit1, ".", digit0).take().parse_next(input)
423}
424
425fn scientific_notation<'i>(input: &mut &'i str) -> Result<&'i str> {
426 (
427 alt((decimal_number, digit1)),
428 "e",
429 opt(one_of(('-', '+'))),
430 digit1,
431 )
432 .take()
433 .parse_next(input)
434}
435
436fn get_reserved_word_token<'a>(
437 input: &mut &'a str,
438 previous_token: Option<Token<'a>>,
439 last_reserved_token: Option<Token<'a>>,
440 last_reserved_top_level_token: Option<Token<'a>>,
441) -> Result<Token<'a>> {
442 if let Some(token) = previous_token {
445 if token.value == "." {
446 return Err(ParserError::from_input(input));
447 }
448 }
449
450 if !('a'..='z', 'A'..='Z', '$').contains_token(input.chars().next().unwrap_or('\0')) {
451 return Err(ParserError::from_input(input));
452 }
453
454 alt((
455 get_top_level_reserved_token(last_reserved_top_level_token),
456 get_newline_after_reserved_token(),
457 get_newline_reserved_token(last_reserved_token),
458 get_join_token(),
459 get_top_level_reserved_token_no_indent,
460 get_plain_reserved_token,
461 ))
462 .parse_next(input)
463}
464
465fn get_uc_words(input: &str, words: usize) -> String {
467 input
468 .split_whitespace()
469 .take(words)
470 .collect::<Vec<&str>>()
471 .join(" ")
472 .to_ascii_uppercase()
473}
474
475fn finalize<'a>(input: &mut &'a str, token: &str) -> &'a str {
476 let final_word = token.split_whitespace().last().unwrap_or(token);
477 let input_end_pos = input.to_ascii_uppercase().find(final_word).unwrap_or(0) + final_word.len();
478 input.next_slice(input_end_pos)
479}
480
481fn get_top_level_reserved_token<'a>(
482 last_reserved_top_level_token: Option<Token<'a>>,
483) -> impl Parser<&'a str, Token<'a>, ContextError> {
484 move |input: &mut &'a str| {
485 let uc_input: String = get_uc_words(input, 4);
486 let mut uc_input = uc_input.as_str();
487
488 let first_char = peek(any).parse_next(input)?.to_ascii_uppercase();
490
491 let result: Result<&str> = match first_char {
493 'A' => alt((
494 terminated("ADD", end_of_word),
495 terminated("AFTER", end_of_word),
496 terminated("ALTER COLUMN", end_of_word),
497 terminated("ALTER TABLE", end_of_word),
498 ))
499 .parse_next(&mut uc_input),
500
501 'C' => terminated(
502 (
503 "CREATE ",
504 opt(alt((
505 "UNLOGGED ",
506 (
507 alt(("GLOBAL ", "LOCAL ")),
508 opt(alt(("TEMPORARY ", "TEMP "))),
509 )
510 .take(),
511 ))),
512 "TABLE",
513 )
514 .take(),
515 end_of_word,
516 )
517 .parse_next(&mut uc_input),
518
519 'D' => terminated("DELETE FROM", end_of_word).parse_next(&mut uc_input),
520
521 'E' => terminated("EXCEPT", end_of_word).parse_next(&mut uc_input),
522
523 'F' => alt((
524 terminated("FETCH FIRST", end_of_word),
525 terminated("FROM", end_of_word),
526 terminated(
527 (
528 "FOR ",
529 alt(("UPDATE", "NO KEY UPDATE", "SHARE", "KEY SHARE")),
530 )
531 .take(),
532 end_of_word,
533 ),
534 ))
535 .parse_next(&mut uc_input),
536
537 'G' => alt((
538 terminated("GROUP BY", end_of_word),
539 terminated("GO", end_of_word),
540 ))
541 .parse_next(&mut uc_input),
542
543 'H' => terminated("HAVING", end_of_word).parse_next(&mut uc_input),
544
545 'I' => alt((
546 terminated("INSERT INTO", end_of_word),
547 terminated("INSERT", end_of_word),
548 ))
549 .parse_next(&mut uc_input),
550
551 'L' => terminated("LIMIT", end_of_word).parse_next(&mut uc_input),
552
553 'M' => alt((
554 terminated("MODIFY", end_of_word),
555 terminated("MERGE INTO", end_of_word),
556 ))
557 .parse_next(&mut uc_input),
558
559 'O' => alt((
560 terminated("ORDER BY", end_of_word),
561 terminated("ON CONFLICT", end_of_word),
562 ))
563 .parse_next(&mut uc_input),
564
565 'P' => terminated("PARTITION BY", end_of_word).parse_next(&mut uc_input),
566
567 'R' => terminated("RETURNING", end_of_word).parse_next(&mut uc_input),
568
569 'S' => alt((
570 terminated("SELECT DISTINCT", end_of_word),
571 terminated("SELECT ALL", end_of_word),
572 terminated("SELECT", end_of_word),
573 terminated("SET CURRENT SCHEMA", end_of_word),
574 terminated("SET SCHEMA", end_of_word),
575 terminated("SET", end_of_word),
576 ))
577 .parse_next(&mut uc_input),
578
579 'U' => alt((
580 terminated("UPDATE", end_of_word),
581 terminated("USING", end_of_word),
582 ))
583 .parse_next(&mut uc_input),
584
585 'V' => terminated("VALUES", end_of_word).parse_next(&mut uc_input),
586
587 'W' => alt((
588 terminated("WHERE", end_of_word),
589 terminated("WINDOW", end_of_word),
590 ))
591 .parse_next(&mut uc_input),
592
593 _ => Err(ParserError::from_input(&uc_input)),
595 };
596
597 if let Ok(token) = result {
598 let token = finalize(input, token);
599
600 let kind = match (
601 token,
602 last_reserved_top_level_token.as_ref().map(|v| v.alias),
603 ) {
604 ("EXCEPT", Some("SELECT")) =>
605 {
607 TokenKind::Reserved
608 }
609 ("SET", Some("UPDATE")) => TokenKind::ReservedNewlineAfter,
610 ("USING", v) if v != Some("MERGE INTO") && v != Some("DELETE FROM") => {
611 TokenKind::Reserved
612 }
613 _ => TokenKind::ReservedTopLevel,
614 };
615
616 let alias = if token.starts_with("CREATE") {
617 "CREATE"
618 } else if token.starts_with("SELECT") {
619 "SELECT"
620 } else {
621 token
622 };
623
624 Ok(Token {
625 kind,
626 value: token,
627 key: None,
628 alias,
629 })
630 } else {
631 Err(ParserError::from_input(input))
632 }
633 }
634}
635
636fn get_join_token<'a>() -> impl Parser<&'a str, Token<'a>, ContextError> {
637 move |input: &mut &'a str| {
638 let uc_input: String = get_uc_words(input, 3);
639 let mut uc_input = uc_input.as_str();
640
641 let standard_joins = alt((
643 terminated("JOIN", end_of_word),
644 terminated("INNER JOIN", end_of_word),
645 terminated("LEFT JOIN", end_of_word),
646 terminated("RIGHT JOIN", end_of_word),
647 terminated("FULL JOIN", end_of_word),
648 terminated("CROSS JOIN", end_of_word),
649 terminated("LEFT OUTER JOIN", end_of_word),
650 terminated("RIGHT OUTER JOIN", end_of_word),
651 terminated("FULL OUTER JOIN", end_of_word),
652 ));
653
654 let specific_joins = alt((
656 terminated("INNER ANY JOIN", end_of_word),
657 terminated("LEFT ANY JOIN", end_of_word),
658 terminated("RIGHT ANY JOIN", end_of_word),
659 terminated("ANY JOIN", end_of_word),
660 terminated("SEMI JOIN", end_of_word),
661 terminated("LEFT SEMI JOIN", end_of_word),
662 terminated("RIGHT SEMI JOIN", end_of_word),
663 terminated("LEFT ANTI JOIN", end_of_word),
664 terminated("RIGHT ANTI JOIN", end_of_word),
665 ));
666
667 let special_joins = alt((
669 terminated("ASOF JOIN", end_of_word),
670 terminated("LEFT ASOF JOIN", end_of_word),
671 terminated("PASTE JOIN", end_of_word),
672 terminated("GLOBAL INNER JOIN", end_of_word),
673 terminated("GLOBAL LEFT JOIN", end_of_word),
674 terminated("GLOBAL RIGHT JOIN", end_of_word),
675 terminated("GLOBAL FULL JOIN", end_of_word),
676 ));
677
678 let result: Result<&str> =
680 alt((standard_joins, specific_joins, special_joins)).parse_next(&mut uc_input);
681
682 if let Ok(token) = result {
683 let final_word = token.split(' ').next_back().unwrap();
684 let input_end_pos =
685 input.to_ascii_uppercase().find(final_word).unwrap() + final_word.len();
686 let token = input.next_slice(input_end_pos);
687 let kind = TokenKind::Join;
688 Ok(Token {
689 kind,
690 value: token,
691 key: None,
692 alias: token,
693 })
694 } else {
695 Err(ParserError::from_input(input))
696 }
697 }
698}
699
700fn get_newline_after_reserved_token<'a>() -> impl Parser<&'a str, Token<'a>, ContextError> {
701 move |input: &mut &'a str| {
702 let uc_input: String = get_uc_words(input, 3);
703 let mut uc_input = uc_input.as_str();
704
705 let mut on_conflict = alt((
706 terminated("DO NOTHING", end_of_word),
707 terminated("DO UPDATE SET", end_of_word),
708 ));
709
710 let result: Result<&str> = on_conflict.parse_next(&mut uc_input);
711
712 if let Ok(token) = result {
713 let value = finalize(input, token);
714 Ok(Token {
715 kind: TokenKind::ReservedNewlineAfter,
716 value,
717 key: None,
718 alias: value,
719 })
720 } else {
721 Err(ParserError::from_input(input))
722 }
723 }
724}
725
726fn get_newline_reserved_token<'a>(
727 last_reserved_token: Option<Token<'a>>,
728) -> impl Parser<&'a str, Token<'a>, ContextError> {
729 move |input: &mut &'a str| {
730 let uc_input: String = get_uc_words(input, 3);
731 let mut uc_input = uc_input.as_str();
732
733 let operators = alt((
738 terminated("CROSS APPLY", end_of_word),
739 terminated("OUTER APPLY", end_of_word),
740 terminated("AND", end_of_word),
741 terminated("OR", end_of_word),
742 terminated("XOR", end_of_word),
743 terminated("WHEN", end_of_word),
744 terminated("ELSE", end_of_word),
745 ));
746
747 let alter_table_actions = alt((
748 terminated("ADD", end_of_word),
749 terminated("DROP", end_of_word),
750 terminated("ALTER", end_of_word),
751 terminated("VALIDATE", end_of_word),
752 terminated("ENABLE", end_of_word),
753 terminated("DISABLE", end_of_word),
754 ));
755
756 let result: Result<&str> = alt((operators, alter_table_actions)).parse_next(&mut uc_input);
758
759 if let Ok(token) = result {
760 let token = finalize(input, token);
761 let kind = if token == "AND"
762 && last_reserved_token.is_some()
763 && last_reserved_token.as_ref().unwrap().value == "BETWEEN"
764 {
765 TokenKind::Reserved
767 } else {
768 TokenKind::ReservedNewline
769 };
770 Ok(Token {
771 kind,
772 value: token,
773 key: None,
774 alias: token,
775 })
776 } else {
777 Err(ParserError::from_input(input))
778 }
779 }
780}
781
782fn get_top_level_reserved_token_no_indent<'i>(input: &mut &'i str) -> Result<Token<'i>> {
783 let uc_input = get_uc_words(input, 2);
784 let mut uc_input = uc_input.as_str();
785
786 let result: Result<&str> = alt((
787 terminated("BEGIN", end_of_word),
788 terminated("DECLARE", end_of_word),
789 terminated("INTERSECT ALL", end_of_word),
790 terminated("INTERSECT", end_of_word),
791 terminated("MINUS", end_of_word),
792 terminated("UNION ALL", end_of_word),
793 terminated("UNION", end_of_word),
794 terminated("WITH", end_of_word),
795 terminated("$$", end_of_word),
796 ))
797 .parse_next(&mut uc_input);
798 if let Ok(token) = result {
799 let value = finalize(input, token);
800 Ok(Token {
801 kind: TokenKind::ReservedTopLevelNoIndent,
802 value,
803 key: None,
804 alias: value,
805 })
806 } else {
807 Err(ParserError::from_input(input))
808 }
809}
810fn get_plain_reserved_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
811 alt((get_plain_reserved_two_token, get_plain_reserved_one_token)).parse_next(input)
812}
813fn get_plain_reserved_one_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
814 let uc_input = get_uc_words(input, 1);
815 let mut uc_input = uc_input.as_str();
816
817 let first_char = peek(any).parse_next(input)?.to_ascii_uppercase();
818
819 let result: Result<&str> = match first_char {
820 'A' => alt((
821 terminated("ACCESSIBLE", end_of_word),
822 terminated("ACTION", end_of_word),
823 terminated("AGAINST", end_of_word),
824 terminated("AGGREGATE", end_of_word),
825 terminated("ALGORITHM", end_of_word),
826 terminated("ALL", end_of_word),
827 terminated("ALTER", end_of_word),
828 terminated("ANALYSE", end_of_word),
829 terminated("ANALYZE", end_of_word),
830 terminated("AS", end_of_word),
831 terminated("ASC", end_of_word),
832 terminated("AUTOCOMMIT", end_of_word),
833 terminated("AUTO_INCREMENT", end_of_word),
834 ))
835 .parse_next(&mut uc_input),
836
837 'B' => alt((
838 terminated("BACKUP", end_of_word),
839 terminated("BETWEEN", end_of_word),
840 terminated("BINLOG", end_of_word),
841 terminated("BOTH", end_of_word),
842 ))
843 .parse_next(&mut uc_input),
844
845 'C' => alt((
846 terminated("CASCADE", end_of_word),
847 terminated("CASE", end_of_word),
848 terminated("CHANGE", end_of_word),
849 terminated("CHANGED", end_of_word),
850 terminated("CHARSET", end_of_word),
851 terminated("CHECK", end_of_word),
852 terminated("CHECKSUM", end_of_word),
853 terminated("COLLATE", end_of_word),
854 terminated("COLLATION", end_of_word),
855 terminated("COLUMN", end_of_word),
856 terminated("COLUMNS", end_of_word),
857 terminated("COMMENT", end_of_word),
858 terminated("COMMIT", end_of_word),
859 terminated("COMMITTED", end_of_word),
860 terminated("COMPRESSED", end_of_word),
861 terminated("CONCURRENT", end_of_word),
862 terminated("CONSTRAINT", end_of_word),
863 terminated("CONTAINS", end_of_word),
864 alt((
865 terminated("CONVERT", end_of_word),
866 terminated("CREATE", end_of_word),
867 terminated("CROSS", end_of_word),
868 terminated("CURRENT_TIMESTAMP", end_of_word),
869 )),
870 ))
871 .parse_next(&mut uc_input),
872
873 'D' => alt((
874 terminated("DATABASE", end_of_word),
875 terminated("DATABASES", end_of_word),
876 terminated("DAY", end_of_word),
877 terminated("DAY_HOUR", end_of_word),
878 terminated("DAY_MINUTE", end_of_word),
879 terminated("DAY_SECOND", end_of_word),
880 terminated("DEFAULT", end_of_word),
881 terminated("DEFINER", end_of_word),
882 terminated("DELAYED", end_of_word),
883 terminated("DELETE", end_of_word),
884 terminated("DESC", end_of_word),
885 terminated("DESCRIBE", end_of_word),
886 terminated("DETERMINISTIC", end_of_word),
887 terminated("DISTINCT", end_of_word),
888 terminated("DISTINCTROW", end_of_word),
889 terminated("DIV", end_of_word),
890 terminated("DO", end_of_word),
891 terminated("DROP", end_of_word),
892 terminated("DUMPFILE", end_of_word),
893 terminated("DUPLICATE", end_of_word),
894 terminated("DYNAMIC", end_of_word),
895 ))
896 .parse_next(&mut uc_input),
897
898 'E' => alt((
899 terminated("ELSE", end_of_word),
900 terminated("ENCLOSED", end_of_word),
901 terminated("END", end_of_word),
902 terminated("ENGINE", end_of_word),
903 terminated("ENGINES", end_of_word),
904 terminated("ENGINE_TYPE", end_of_word),
905 terminated("ESCAPE", end_of_word),
906 terminated("ESCAPED", end_of_word),
907 terminated("EVENTS", end_of_word),
908 terminated("EXEC", end_of_word),
909 terminated("EXECUTE", end_of_word),
910 terminated("EXISTS", end_of_word),
911 terminated("EXPLAIN", end_of_word),
912 terminated("EXTENDED", end_of_word),
913 ))
914 .parse_next(&mut uc_input),
915
916 'F' => alt((
917 terminated("FAST", end_of_word),
918 terminated("FETCH", end_of_word),
919 terminated("FIELDS", end_of_word),
920 terminated("FILE", end_of_word),
921 terminated("FIRST", end_of_word),
922 terminated("FIXED", end_of_word),
923 terminated("FLUSH", end_of_word),
924 terminated("FOR", end_of_word),
925 terminated("FORCE", end_of_word),
926 terminated("FOREIGN", end_of_word),
927 terminated("FULL", end_of_word),
928 terminated("FULLTEXT", end_of_word),
929 terminated("FUNCTION", end_of_word),
930 ))
931 .parse_next(&mut uc_input),
932
933 'G' => alt((
934 terminated("GLOBAL", end_of_word),
935 terminated("GRANT", end_of_word),
936 terminated("GRANTS", end_of_word),
937 terminated("GROUP_CONCAT", end_of_word),
938 ))
939 .parse_next(&mut uc_input),
940
941 'H' => alt((
942 terminated("HEAP", end_of_word),
943 terminated("HIGH_PRIORITY", end_of_word),
944 terminated("HOSTS", end_of_word),
945 terminated("HOUR", end_of_word),
946 terminated("HOUR_MINUTE", end_of_word),
947 terminated("HOUR_SECOND", end_of_word),
948 ))
949 .parse_next(&mut uc_input),
950
951 'I' => alt((
952 terminated("IDENTIFIED", end_of_word),
953 terminated("IF", end_of_word),
954 terminated("IFNULL", end_of_word),
955 terminated("IGNORE", end_of_word),
956 terminated("IN", end_of_word),
957 terminated("INDEX", end_of_word),
958 terminated("INDEXES", end_of_word),
959 terminated("INFILE", end_of_word),
960 terminated("INSERT", end_of_word),
961 terminated("INSERT_ID", end_of_word),
962 terminated("INSERT_METHOD", end_of_word),
963 terminated("INTERVAL", end_of_word),
964 terminated("INTO", end_of_word),
965 terminated("INVOKER", end_of_word),
966 terminated("IS", end_of_word),
967 terminated("ISOLATION", end_of_word),
968 ))
969 .parse_next(&mut uc_input),
970
971 'K' => alt((
972 terminated("KEY", end_of_word),
973 terminated("KEYS", end_of_word),
974 terminated("KILL", end_of_word),
975 ))
976 .parse_next(&mut uc_input),
977
978 'L' => alt((
979 terminated("LAST_INSERT_ID", end_of_word),
980 terminated("LEADING", end_of_word),
981 terminated("LEVEL", end_of_word),
982 terminated("LIKE", end_of_word),
983 terminated("LINEAR", end_of_word),
984 terminated("LINES", end_of_word),
985 terminated("LOAD", end_of_word),
986 terminated("LOCAL", end_of_word),
987 terminated("LOCK", end_of_word),
988 terminated("LOCKS", end_of_word),
989 terminated("LOGS", end_of_word),
990 terminated("LOW_PRIORITY", end_of_word),
991 ))
992 .parse_next(&mut uc_input),
993
994 'M' => alt((
995 terminated("MARIA", end_of_word),
996 terminated("MASTER", end_of_word),
997 terminated("MASTER_CONNECT_RETRY", end_of_word),
998 terminated("MASTER_HOST", end_of_word),
999 terminated("MASTER_LOG_FILE", end_of_word),
1000 terminated("MATCH", end_of_word),
1001 terminated("MAX_CONNECTIONS_PER_HOUR", end_of_word),
1002 terminated("MAX_QUERIES_PER_HOUR", end_of_word),
1003 terminated("MAX_ROWS", end_of_word),
1004 terminated("MAX_UPDATES_PER_HOUR", end_of_word),
1005 terminated("MAX_USER_CONNECTIONS", end_of_word),
1006 terminated("MEDIUM", end_of_word),
1007 terminated("MERGE", end_of_word),
1008 terminated("MINUTE", end_of_word),
1009 terminated("MINUTE_SECOND", end_of_word),
1010 terminated("MIN_ROWS", end_of_word),
1011 terminated("MODE", end_of_word),
1012 terminated("MODIFY", end_of_word),
1013 terminated("MONTH", end_of_word),
1014 terminated("MRG_MYISAM", end_of_word),
1015 terminated("MYISAM", end_of_word),
1016 ))
1017 .parse_next(&mut uc_input),
1018
1019 'N' => alt((
1020 terminated("NAMES", end_of_word),
1021 terminated("NATURAL", end_of_word),
1022 terminated("NOT", end_of_word),
1023 terminated("NOW()", end_of_word),
1024 terminated("NULL", end_of_word),
1025 ))
1026 .parse_next(&mut uc_input),
1027
1028 'O' => alt((
1029 terminated("OFFSET", end_of_word),
1030 terminated("ON", end_of_word),
1031 terminated("ONLY", end_of_word),
1032 terminated("OPEN", end_of_word),
1033 terminated("OPTIMIZE", end_of_word),
1034 terminated("OPTION", end_of_word),
1035 terminated("OPTIONALLY", end_of_word),
1036 terminated("OUTFILE", end_of_word),
1037 ))
1038 .parse_next(&mut uc_input),
1039
1040 'P' => alt((
1041 terminated("PACK_KEYS", end_of_word),
1042 terminated("PAGE", end_of_word),
1043 terminated("PARTIAL", end_of_word),
1044 terminated("PARTITION", end_of_word),
1045 terminated("PARTITIONS", end_of_word),
1046 terminated("PASSWORD", end_of_word),
1047 terminated("PRIMARY", end_of_word),
1048 terminated("PRIVILEGES", end_of_word),
1049 terminated("PROCEDURE", end_of_word),
1050 terminated("PROCESS", end_of_word),
1051 terminated("PROCESSLIST", end_of_word),
1052 terminated("PURGE", end_of_word),
1053 ))
1054 .parse_next(&mut uc_input),
1055
1056 'Q' => terminated("QUICK", end_of_word).parse_next(&mut uc_input),
1057
1058 'R' => alt((
1059 terminated("RAID0", end_of_word),
1060 terminated("RAID_CHUNKS", end_of_word),
1061 terminated("RAID_CHUNKSIZE", end_of_word),
1062 terminated("RAID_TYPE", end_of_word),
1063 terminated("RANGE", end_of_word),
1064 terminated("READ", end_of_word),
1065 terminated("READ_ONLY", end_of_word),
1066 terminated("READ_WRITE", end_of_word),
1067 terminated("REFERENCES", end_of_word),
1068 terminated("REGEXP", end_of_word),
1069 terminated("RELOAD", end_of_word),
1070 terminated("RENAME", end_of_word),
1071 terminated("REPAIR", end_of_word),
1072 terminated("REPEATABLE", end_of_word),
1073 terminated("REPLACE", end_of_word),
1074 terminated("REPLICATION", end_of_word),
1075 terminated("RESET", end_of_word),
1076 alt((
1077 terminated("RESTORE", end_of_word),
1078 terminated("RESTRICT", end_of_word),
1079 terminated("RETURN", end_of_word),
1080 terminated("RETURNS", end_of_word),
1081 terminated("REVOKE", end_of_word),
1082 terminated("RLIKE", end_of_word),
1083 terminated("ROLLBACK", end_of_word),
1084 terminated("ROW", end_of_word),
1085 terminated("ROWS", end_of_word),
1086 terminated("ROW_FORMAT", end_of_word),
1087 )),
1088 ))
1089 .parse_next(&mut uc_input),
1090
1091 'S' => alt((
1092 terminated("SECOND", end_of_word),
1093 terminated("SECURITY", end_of_word),
1094 terminated("SEPARATOR", end_of_word),
1095 terminated("SERIALIZABLE", end_of_word),
1096 terminated("SESSION", end_of_word),
1097 terminated("SHARE", end_of_word),
1098 terminated("SHOW", end_of_word),
1099 terminated("SHUTDOWN", end_of_word),
1100 terminated("SLAVE", end_of_word),
1101 terminated("SONAME", end_of_word),
1102 terminated("SOUNDS", end_of_word),
1103 terminated("SQL", end_of_word),
1104 terminated("SQL_AUTO_IS_NULL", end_of_word),
1105 terminated("SQL_BIG_RESULT", end_of_word),
1106 terminated("SQL_BIG_SELECTS", end_of_word),
1107 terminated("SQL_BIG_TABLES", end_of_word),
1108 terminated("SQL_BUFFER_RESULT", end_of_word),
1109 terminated("SQL_CACHE", end_of_word),
1110 alt((
1111 terminated("SQL_CALC_FOUND_ROWS", end_of_word),
1112 terminated("SQL_LOG_BIN", end_of_word),
1113 terminated("SQL_LOG_OFF", end_of_word),
1114 terminated("SQL_LOG_UPDATE", end_of_word),
1115 terminated("SQL_LOW_PRIORITY_UPDATES", end_of_word),
1116 terminated("SQL_MAX_JOIN_SIZE", end_of_word),
1117 terminated("SQL_NO_CACHE", end_of_word),
1118 terminated("SQL_QUOTE_SHOW_CREATE", end_of_word),
1119 terminated("SQL_BIG_RESULT", end_of_word),
1120 terminated("SQL_BIG_SELECTS", end_of_word),
1121 terminated("SQL_BIG_TABLES", end_of_word),
1122 terminated("SQL_BUFFER_RESULT", end_of_word),
1123 terminated("SQL_CACHE", end_of_word),
1124 terminated("SQL_CALC_FOUND_ROWS", end_of_word),
1125 terminated("SQL_LOG_BIN", end_of_word),
1126 terminated("SQL_LOG_OFF", end_of_word),
1127 terminated("SQL_LOG_UPDATE", end_of_word),
1128 terminated("SQL_LOW_PRIORITY_UPDATES", end_of_word),
1129 terminated("SQL_MAX_JOIN_SIZE", end_of_word),
1130 alt((
1131 terminated("SQL_NO_CACHE", end_of_word),
1132 terminated("SQL_QUOTE_SHOW_CREATE", end_of_word),
1133 terminated("SQL_SAFE_UPDATES", end_of_word),
1134 terminated("SQL_SELECT_LIMIT", end_of_word),
1135 terminated("SQL_SLAVE_SKIP_COUNTER", end_of_word),
1136 terminated("SQL_SMALL_RESULT", end_of_word),
1137 terminated("SQL_WARNINGS", end_of_word),
1138 terminated("START", end_of_word),
1139 terminated("STARTING", end_of_word),
1140 terminated("STATUS", end_of_word),
1141 terminated("STOP", end_of_word),
1142 terminated("STORAGE", end_of_word),
1143 terminated("STRAIGHT_JOIN", end_of_word),
1144 terminated("STRING", end_of_word),
1145 terminated("STRIPED", end_of_word),
1146 terminated("SUPER", end_of_word),
1147 )),
1148 )),
1149 ))
1150 .parse_next(&mut uc_input),
1151
1152 'T' => alt((
1153 terminated("TABLE", end_of_word),
1154 terminated("TABLES", end_of_word),
1155 terminated("TEMPORARY", end_of_word),
1156 terminated("TERMINATED", end_of_word),
1157 terminated("THEN", end_of_word),
1158 terminated("TO", end_of_word),
1159 terminated("TRAILING", end_of_word),
1160 terminated("TRANSACTIONAL", end_of_word),
1161 terminated("TRUE", end_of_word),
1162 terminated("TRUNCATE", end_of_word),
1163 terminated("TYPE", end_of_word),
1164 terminated("TYPES", end_of_word),
1165 terminated("TBLPROPERTIES", end_of_word),
1166 ))
1167 .parse_next(&mut uc_input),
1168
1169 'U' => alt((
1170 terminated("UNCOMMITTED", end_of_word),
1171 terminated("UNIQUE", end_of_word),
1172 terminated("UNLOCK", end_of_word),
1173 terminated("UNSIGNED", end_of_word),
1174 terminated("USAGE", end_of_word),
1175 terminated("USE", end_of_word),
1176 ))
1177 .parse_next(&mut uc_input),
1178
1179 'V' => alt((
1180 terminated("VARIABLES", end_of_word),
1181 terminated("VIEW", end_of_word),
1182 ))
1183 .parse_next(&mut uc_input),
1184
1185 'W' => alt((
1186 terminated("WHEN", end_of_word),
1187 terminated("WORK", end_of_word),
1188 terminated("WRITE", end_of_word),
1189 ))
1190 .parse_next(&mut uc_input),
1191
1192 'Y' => alt((terminated("YEAR_MONTH", end_of_word),)).parse_next(&mut uc_input),
1193 _ => Err(ParserError::from_input(&uc_input)),
1195 };
1196 if let Ok(token) = result {
1197 let input_end_pos = token.len();
1198 let token = input.next_slice(input_end_pos);
1199 Ok(Token {
1200 kind: TokenKind::Reserved,
1201 value: token,
1202 key: None,
1203 alias: token,
1204 })
1205 } else {
1206 Err(ParserError::from_input(input))
1207 }
1208}
1209
1210fn get_plain_reserved_two_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
1211 let uc_input = get_uc_words(input, 2);
1212 let mut uc_input = uc_input.as_str();
1213 let result: Result<&str> = alt((
1214 terminated("CHARACTER SET", end_of_word),
1215 terminated("ON CONFLICT", end_of_word),
1216 terminated("ON CONSTRAINT", end_of_word),
1217 terminated("ON DELETE", end_of_word),
1218 terminated("ON UPDATE", end_of_word),
1219 terminated("DISTINCT FROM", end_of_word),
1220 terminated("PARTITIONED BY", end_of_word),
1221 ))
1222 .parse_next(&mut uc_input);
1223 if let Ok(token) = result {
1224 let value = finalize(input, token);
1225 Ok(Token {
1226 kind: TokenKind::Reserved,
1227 value,
1228 key: None,
1229 alias: value,
1230 })
1231 } else {
1232 Err(ParserError::from_input(input))
1233 }
1234}
1235
1236fn get_word_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
1237 take_while(1.., is_word_character)
1238 .parse_next(input)
1239 .map(|token| Token {
1240 kind: TokenKind::Word,
1241 value: token,
1242 key: None,
1243 alias: token,
1244 })
1245}
1246
1247fn get_operator_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
1248 let allowed_operators = (
1250 '!', '<', '>', '=', '|', ':', '-', '~', '*', '&', '@', '^', '?', '#', '/', '%',
1251 );
1252
1253 take_while(2..=5, allowed_operators)
1254 .map(|token: &str| Token {
1255 kind: TokenKind::Operator,
1256 value: token,
1257 key: None,
1258 alias: token,
1259 })
1260 .parse_next(input)
1261}
1262fn get_any_other_char<'i>(input: &mut &'i str) -> Result<Token<'i>> {
1263 one_of(|token| token != '\n' && token != '\r')
1264 .take()
1265 .parse_next(input)
1266 .map(|token| Token {
1267 kind: TokenKind::Operator,
1268 value: token,
1269 key: None,
1270 alias: token,
1271 })
1272}
1273
1274fn end_of_word<'i>(input: &mut &'i str) -> Result<&'i str> {
1275 peek(alt((
1276 eof,
1277 one_of(|val: char| !is_word_character(val)).take(),
1278 )))
1279 .parse_next(input)
1280}
1281
1282fn is_word_character(item: char) -> bool {
1283 item.is_alphanumeric() || item.is_mark() || item.is_punctuation_connector()
1284}