1use crate::{
4 dom::{self, FromSyntax},
5 syntax::{SyntaxKind, SyntaxKind::*, SyntaxNode},
6 util::{allowed_chars, check_escape},
7};
8use logos::{Lexer, Logos};
9use rowan::{GreenNode, GreenNodeBuilder, TextRange, TextSize};
10use std::convert::TryInto;
11
12#[macro_use]
13mod macros;
14
15#[derive(Debug, Clone, Eq, PartialEq, Hash)]
17pub struct Error {
18 pub range: TextRange,
20
21 pub message: String,
23}
24
25impl core::fmt::Display for Error {
26 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27 write!(f, "{} ({:?})", &self.message, &self.range)
28 }
29}
30impl std::error::Error for Error {}
31
32pub fn parse(source: &str) -> Parse {
44 Parser::new(source).parse()
45}
46
47pub(crate) struct Parser<'p> {
51 skip_whitespace: bool,
52 key_pattern_syntax: bool,
54 current_token: Option<SyntaxKind>,
55
56 error_whitelist: u16,
71
72 lexer: Lexer<'p, SyntaxKind>,
73 builder: GreenNodeBuilder<'p>,
74 errors: Vec<Error>,
75}
76
77impl Parser<'_> {
78 pub(crate) fn parse_key_only(mut self) -> Parse {
83 self.key_pattern_syntax = true;
84 let _ = with_node!(self.builder, KEY, self.parse_key());
85
86 Parse {
87 green_node: self.builder.finish(),
88 errors: self.errors,
89 }
90 }
91}
92
93type ParserResult<T> = Result<T, ()>;
96
97impl<'p> Parser<'p> {
102 pub(crate) fn new(source: &'p str) -> Self {
103 Parser {
104 current_token: None,
105 skip_whitespace: true,
106 key_pattern_syntax: false,
107 error_whitelist: 0,
108 lexer: SyntaxKind::lexer(source),
109 builder: Default::default(),
110 errors: Default::default(),
111 }
112 }
113
114 fn parse(mut self) -> Parse {
115 let _ = with_node!(self.builder, ROOT, self.parse_root());
116
117 Parse {
118 green_node: self.builder.finish(),
119 errors: self.errors,
120 }
121 }
122
123 fn error(&mut self, message: &str) -> ParserResult<()> {
124 let span = self.lexer.span();
125
126 let err = Error {
127 range: TextRange::new(
128 TextSize::from(span.start as u32),
129 TextSize::from(span.end as u32),
130 ),
131 message: message.into(),
132 };
133
134 let same_error = self
135 .errors
136 .last()
137 .map(|e| e.range == err.range)
138 .unwrap_or(false);
139
140 if !same_error {
141 self.add_error(&Error {
142 range: TextRange::new(
143 TextSize::from(span.start as u32),
144 TextSize::from(span.end as u32),
145 ),
146 message: message.into(),
147 });
148 if let Some(t) = self.current_token {
149 if !self.whitelisted(t) {
150 self.token_as(ERROR).ok();
151 }
152 }
153 } else {
154 self.token_as(ERROR).ok();
155 }
156
157 Err(())
158 }
159
160 fn report_error(&mut self, message: &str) -> ParserResult<()> {
162 let span = self.lexer.span();
163 self.add_error(&Error {
164 range: TextRange::new(
165 TextSize::from(span.start as u32),
166 TextSize::from(span.end as u32),
167 ),
168 message: message.into(),
169 });
170 Err(())
171 }
172
173 fn add_error(&mut self, e: &Error) {
174 if let Some(last_err) = self.errors.last_mut() {
175 if last_err == e {
176 return;
177 }
178 }
179
180 self.errors.push(e.clone());
181 }
182
183 #[inline]
184 fn whitelist_token(&mut self, token: SyntaxKind) {
185 self.error_whitelist |= token as u16;
186 }
187
188 #[inline]
189 fn blacklist_token(&mut self, token: SyntaxKind) {
190 self.error_whitelist &= !(token as u16);
191 }
192
193 #[inline]
194 fn whitelisted(&self, token: SyntaxKind) -> bool {
195 self.error_whitelist & token as u16 != 0
196 }
197
198 fn insert_token(&mut self, kind: SyntaxKind, s: &str) {
199 self.builder.token(kind.into(), s)
200 }
201
202 fn must_token_or(&mut self, kind: SyntaxKind, message: &str) -> ParserResult<()> {
203 match self.get_token() {
204 Ok(t) => {
205 if kind == t {
206 self.token()
207 } else {
208 self.error(message)
209 }
210 }
211 Err(_) => {
212 self.add_error(&Error {
213 range: TextRange::new(
214 self.lexer.span().start.try_into().unwrap(),
215 self.lexer.span().end.try_into().unwrap(),
216 ),
217 message: "unexpected EOF".into(),
218 });
219 Err(())
220 }
221 }
222 }
223
224 fn add_token(&mut self) -> ParserResult<()> {
226 match self.get_token() {
227 Err(_) => Err(()),
228 Ok(token) => {
229 self.builder.token(token.into(), self.lexer.slice());
230 self.current_token = None;
231 Ok(())
232 }
233 }
234 }
235
236 fn token(&mut self) -> ParserResult<()> {
237 match self.get_token() {
238 Err(_) => Err(()),
239 Ok(token) => self.token_as(token),
240 }
241 }
242
243 fn token_as(&mut self, kind: SyntaxKind) -> ParserResult<()> {
248 self.token_as_no_step(kind)?;
249 self.step();
250 Ok(())
251 }
252
253 fn token_as_no_step(&mut self, kind: SyntaxKind) -> ParserResult<()> {
254 match self.get_token() {
255 Err(_) => return Err(()),
256 Ok(_) => {
257 self.builder.token(kind.into(), self.lexer.slice());
258 }
259 }
260
261 Ok(())
262 }
263
264 fn step(&mut self) {
265 self.current_token = None;
266 while let Some(token) = self.lexer.next() {
267 match token {
268 COMMENT => {
269 match allowed_chars::comment(self.lexer.slice()) {
270 Ok(_) => {}
271 Err(err_indices) => {
272 for e in err_indices {
273 self.add_error(&Error {
274 range: TextRange::new(
275 (self.lexer.span().start + e).try_into().unwrap(),
276 (self.lexer.span().start + e).try_into().unwrap(),
277 ),
278 message: "invalid character in comment".into(),
279 });
280 }
281 }
282 };
283
284 self.insert_token(token, self.lexer.slice());
285 }
286 WHITESPACE => {
287 if self.skip_whitespace {
288 self.insert_token(token, self.lexer.slice());
289 } else {
290 self.current_token = Some(token);
291 break;
292 }
293 }
294 ERROR => {
295 self.insert_token(token, self.lexer.slice());
296 let span = self.lexer.span();
297 self.add_error(&Error {
298 range: TextRange::new(
299 span.start.try_into().unwrap(),
300 span.end.try_into().unwrap(),
301 ),
302 message: "unexpected token".into(),
303 })
304 }
305 _ => {
306 self.current_token = Some(token);
307 break;
308 }
309 }
310 }
311 }
312
313 fn get_token(&mut self) -> ParserResult<SyntaxKind> {
314 if self.current_token.is_none() {
315 self.step();
316 }
317
318 self.current_token.ok_or(())
319 }
320
321 fn parse_root(&mut self) -> ParserResult<()> {
322 let mut not_newline = false;
324
325 let mut entry_started = false;
328
329 while let Ok(token) = self.get_token() {
330 match token {
331 BRACKET_START => {
332 if entry_started {
333 self.builder.finish_node();
334 entry_started = false;
335 }
336
337 if not_newline {
338 let _ = self.error("expected new line");
339 continue;
340 }
341
342 not_newline = true;
343
344 if self.lexer.remainder().starts_with('[') {
345 let _ = whitelisted!(
346 self,
347 NEWLINE,
348 with_node!(
349 self.builder,
350 TABLE_ARRAY_HEADER,
351 self.parse_table_array_header()
352 )
353 );
354 } else {
355 let _ = whitelisted!(
356 self,
357 NEWLINE,
358 with_node!(self.builder, TABLE_HEADER, self.parse_table_header())
359 );
360 }
361 }
362 NEWLINE => {
363 not_newline = false;
364 if entry_started {
365 self.builder.finish_node();
366 entry_started = false;
367 }
368 let _ = self.token();
369 }
370 _ => {
371 if not_newline {
372 let _ = self.error("expected new line");
373 continue;
374 }
375 if entry_started {
376 self.builder.finish_node();
377 }
378 not_newline = true;
379 self.builder.start_node(ENTRY.into());
380 entry_started = true;
381 let _ = whitelisted!(self, NEWLINE, self.parse_entry());
382 }
383 }
384 }
385 if entry_started {
386 self.builder.finish_node();
387 }
388
389 Ok(())
390 }
391
392 fn parse_table_header(&mut self) -> ParserResult<()> {
393 self.must_token_or(BRACKET_START, r#"expected "[""#)?;
394 let _ = with_node!(self.builder, KEY, self.parse_key());
395 self.must_token_or(BRACKET_END, r#"expected "]""#)?;
396
397 Ok(())
398 }
399
400 fn parse_table_array_header(&mut self) -> ParserResult<()> {
401 self.skip_whitespace = false;
402 self.must_token_or(BRACKET_START, r#"expected "[[""#)?;
403 self.must_token_or(BRACKET_START, r#"expected "[[""#)?;
404 self.skip_whitespace = true;
405 let _ = with_node!(self.builder, KEY, self.parse_key());
406 self.skip_whitespace = false;
407 let _ = self.must_token_or(BRACKET_END, r#"expected "]]""#);
408
409 let token = self.get_token()?;
412 match token {
413 BRACKET_END => {
414 self.token_as_no_step(token)?;
415 }
416 _ => {
417 self.error(r#"expected "]]"#)?;
418 }
419 }
420 self.skip_whitespace = true;
421
422 self.step();
423
424 Ok(())
425 }
426
427 fn parse_entry(&mut self) -> ParserResult<()> {
428 with_node!(self.builder, KEY, self.parse_key())?;
429 self.must_token_or(EQ, r#"expected "=""#)?;
430 with_node!(self.builder, VALUE, self.parse_value())?;
431
432 Ok(())
433 }
434
435 fn parse_key(&mut self) -> ParserResult<()> {
436 if self.parse_ident().is_err() {
437 return self.report_error("expected identifier");
438 }
439
440 let mut after_period = false;
441 loop {
442 let t = match self.get_token() {
443 Ok(token) => token,
444 Err(_) => {
445 if !after_period {
446 return Ok(());
447 }
448 return self.error("unexpected end of input");
449 }
450 };
451
452 match t {
453 PERIOD => {
454 if after_period {
455 return self.error(r#"unexpected ".""#);
456 } else {
457 self.token()?;
458 after_period = true;
459 }
460 }
461 BRACKET_START if self.key_pattern_syntax => {
462 self.step();
463
464 match self.parse_ident() {
465 Ok(_) => {}
466 Err(_) => return self.error("expected identifier"),
467 }
468
469 let token = self.get_token()?;
470
471 if !matches!(token, BRACKET_END) {
472 self.error(r#"expected "]""#)?;
473 }
474 self.step();
475 after_period = false;
476 }
477 _ => {
478 if after_period {
479 match self.parse_ident() {
480 Ok(_) => {}
481 Err(_) => return self.report_error("expected identifier"),
482 }
483 after_period = false;
484 } else if self.key_pattern_syntax {
485 return self.error("unexpected identifier");
486 } else {
487 break;
488 }
489 }
490 };
491 }
492
493 Ok(())
494 }
495
496 fn parse_ident(&mut self) -> ParserResult<()> {
497 let t = self.get_token()?;
498 match t {
499 IDENT => self.token(),
500 IDENT_WITH_GLOB => {
501 if self.key_pattern_syntax {
502 self.token_as(IDENT)
503 } else {
504 self.error("expected identifier")
505 }
506 }
507 INTEGER_HEX | INTEGER_BIN | INTEGER_OCT => self.token_as(IDENT),
508 INTEGER => {
509 if self.lexer.slice().starts_with('+') {
510 Err(())
511 } else {
512 self.token_as(IDENT)
513 }
514 }
515 STRING_LITERAL => {
516 match allowed_chars::string_literal(self.lexer.slice()) {
517 Ok(_) => {}
518 Err(err_indices) => {
519 for e in err_indices {
520 self.add_error(&Error {
521 range: TextRange::new(
522 (self.lexer.span().start + e).try_into().unwrap(),
523 (self.lexer.span().start + e).try_into().unwrap(),
524 ),
525 message: "invalid control character in string literal".into(),
526 });
527 }
528 }
529 };
530
531 self.token_as(IDENT)
532 }
533 STRING => {
534 match allowed_chars::string(self.lexer.slice()) {
535 Ok(_) => {}
536 Err(err_indices) => {
537 for e in err_indices {
538 self.add_error(&Error {
539 range: TextRange::new(
540 (self.lexer.span().start + e).try_into().unwrap(),
541 (self.lexer.span().start + e).try_into().unwrap(),
542 ),
543 message: "invalid character in string".into(),
544 });
545 }
546 }
547 };
548
549 match check_escape(self.lexer.slice()) {
550 Ok(_) => self.token_as(IDENT),
551 Err(err_indices) => {
552 for e in err_indices {
553 self.add_error(&Error {
554 range: TextRange::new(
555 (self.lexer.span().start + e).try_into().unwrap(),
556 (self.lexer.span().start + e).try_into().unwrap(),
557 ),
558 message: "invalid escape sequence".into(),
559 });
560 }
561
562 self.token_as(IDENT)
566 }
567 }
568 }
569 FLOAT => {
570 if self.lexer.slice().starts_with('0') {
571 self.error("zero-padded numbers are not allowed")
572 } else if self.lexer.slice().starts_with('+') {
573 Err(())
574 } else {
575 for (i, s) in self.lexer.slice().split('.').enumerate() {
576 if i != 0 {
577 self.insert_token(PERIOD, ".");
578 }
579
580 self.insert_token(IDENT, s);
581 }
582 self.step();
583 Ok(())
584 }
585 }
586 BOOL => self.token_as(IDENT),
587 _ => self.error("expected identifier"),
588 }
589 }
590
591 fn parse_value(&mut self) -> ParserResult<()> {
592 let t = match self.get_token() {
593 Ok(t) => t,
594 Err(_) => return self.error("expected value"),
595 };
596
597 match t {
598 BOOL | DATE_TIME_OFFSET | DATE_TIME_LOCAL | DATE | TIME => self.token(),
599 INTEGER => {
600 if !self.lexer.slice().starts_with('-') && self.lexer.slice().contains('-') {
603 return self.token_as(DATE);
604 }
605
606 if self.lexer.slice().contains(':') {
608 return self.token_as(TIME);
609 }
610
611 if (self.lexer.slice().starts_with('0') && self.lexer.slice() != "0")
613 || (self.lexer.slice().starts_with("+0") && self.lexer.slice() != "+0")
614 || (self.lexer.slice().starts_with("-0") && self.lexer.slice() != "-0")
615 {
616 self.error("zero-padded integers are not allowed")
617 } else if !check_underscores(self.lexer.slice(), 10) {
618 self.error("invalid underscores")
619 } else {
620 self.token()
621 }
622 }
623 INTEGER_BIN => {
624 if !check_underscores(self.lexer.slice(), 2) {
625 self.error("invalid underscores")
626 } else {
627 self.token()
628 }
629 }
630 INTEGER_HEX => {
631 if !check_underscores(self.lexer.slice(), 16) {
632 self.error("invalid underscores")
633 } else {
634 self.token()
635 }
636 }
637 INTEGER_OCT => {
638 if !check_underscores(self.lexer.slice(), 8) {
639 self.error("invalid underscores")
640 } else {
641 self.token()
642 }
643 }
644 FLOAT => {
645 if self.lexer.slice().contains(':') {
647 return self.token_as(TIME);
648 }
649
650 let int_slice = if self.lexer.slice().contains('.') {
651 self.lexer.slice().split('.').next().unwrap()
652 } else {
653 self.lexer.slice().split('e').next().unwrap()
654 };
655
656 if (int_slice.starts_with('0') && int_slice != "0")
657 || (int_slice.starts_with("+0") && int_slice != "+0")
658 || (int_slice.starts_with("-0") && int_slice != "-0")
659 {
660 self.error("zero-padded numbers are not allowed")
661 } else if !check_underscores(self.lexer.slice(), 10) {
662 self.error("invalid underscores")
663 } else {
664 self.token()
665 }
666 }
667 STRING_LITERAL => {
668 match allowed_chars::string_literal(self.lexer.slice()) {
669 Ok(_) => {}
670 Err(err_indices) => {
671 for e in err_indices {
672 self.add_error(&Error {
673 range: TextRange::new(
674 (self.lexer.span().start + e).try_into().unwrap(),
675 (self.lexer.span().start + e).try_into().unwrap(),
676 ),
677 message: "invalid control character in string literal".into(),
678 });
679 }
680 }
681 };
682 self.token()
683 }
684 MULTI_LINE_STRING_LITERAL => {
685 match allowed_chars::multi_line_string_literal(self.lexer.slice()) {
686 Ok(_) => {}
687 Err(err_indices) => {
688 for e in err_indices {
689 self.add_error(&Error {
690 range: TextRange::new(
691 (self.lexer.span().start + e).try_into().unwrap(),
692 (self.lexer.span().start + e).try_into().unwrap(),
693 ),
694 message: "invalid character in string".into(),
695 });
696 }
697 }
698 };
699 self.token()
700 }
701 STRING => {
702 match allowed_chars::string(self.lexer.slice()) {
703 Ok(_) => {}
704 Err(err_indices) => {
705 for e in err_indices {
706 self.add_error(&Error {
707 range: TextRange::new(
708 (self.lexer.span().start + e).try_into().unwrap(),
709 (self.lexer.span().start + e).try_into().unwrap(),
710 ),
711 message: "invalid character in string".into(),
712 });
713 }
714 }
715 };
716
717 match check_escape(self.lexer.slice()) {
718 Ok(_) => self.token(),
719 Err(err_indices) => {
720 for e in err_indices {
721 self.add_error(&Error {
722 range: TextRange::new(
723 (self.lexer.span().start + e).try_into().unwrap(),
724 (self.lexer.span().start + e).try_into().unwrap(),
725 ),
726 message: "invalid escape sequence".into(),
727 });
728 }
729
730 self.token()
734 }
735 }
736 }
737 MULTI_LINE_STRING => {
738 match allowed_chars::multi_line_string(self.lexer.slice()) {
739 Ok(_) => {}
740 Err(err_indices) => {
741 for e in err_indices {
742 self.add_error(&Error {
743 range: TextRange::new(
744 (self.lexer.span().start + e).try_into().unwrap(),
745 (self.lexer.span().start + e).try_into().unwrap(),
746 ),
747 message: "invalid character in string".into(),
748 });
749 }
750 }
751 };
752
753 match check_escape(self.lexer.slice()) {
754 Ok(_) => self.token(),
755 Err(err_indices) => {
756 for e in err_indices {
757 self.add_error(&Error {
758 range: TextRange::new(
759 (self.lexer.span().start + e).try_into().unwrap(),
760 (self.lexer.span().start + e).try_into().unwrap(),
761 ),
762 message: "invalid escape sequence".into(),
763 });
764 }
765
766 self.token()
770 }
771 }
772 }
773 BRACKET_START => {
774 with_node!(self.builder, ARRAY, self.parse_array())
775 }
776 BRACE_START => {
777 with_node!(self.builder, INLINE_TABLE, self.parse_inline_table())
778 }
779 IDENT | BRACE_END => {
780 self.report_error("expected value").ok();
782 Ok(())
783 }
784 _ => self.error("expected value"),
785 }
786 }
787
788 fn parse_inline_table(&mut self) -> ParserResult<()> {
789 self.must_token_or(BRACE_START, r#"expected "{""#)?;
790
791 let mut first = true;
792 let mut comma_last = false;
793 let mut was_newline = false;
794
795 loop {
796 let t = match self.get_token() {
797 Ok(t) => t,
798 Err(_) => return self.report_error(r#"expected "}""#),
799 };
800
801 match t {
802 BRACE_END => {
803 if comma_last {
804 let _ = self.report_error("expected value, trailing comma is not allowed");
808 }
809 break self.add_token()?;
810 }
811 NEWLINE => {
812 if was_newline {
815 break;
816 }
817
818 let _ = self.error("newline is not allowed in an inline table");
819 was_newline = true;
820 }
821 COMMA => {
822 if comma_last {
823 let _ = self.report_error(r#"unexpected ",""#);
824 }
825
826 if first {
827 let _ = self.error(r#"unexpected ",""#);
828 } else {
829 self.token()?;
830 }
831 comma_last = true;
832 was_newline = false;
833 }
834 _ => {
835 was_newline = false;
836 if !comma_last && !first {
837 let _ = self.error(r#"expected ",""#);
838 }
839 let _ = whitelisted!(
840 self,
841 COMMA,
842 with_node!(self.builder, ENTRY, self.parse_entry())
843 );
844 comma_last = false;
845 }
846 }
847
848 first = false;
849 }
850 Ok(())
851 }
852
853 fn parse_array(&mut self) -> ParserResult<()> {
854 self.must_token_or(BRACKET_START, r#"expected "[""#)?;
855
856 let mut first = true;
857 let mut comma_last = false;
858 loop {
859 let t = match self.get_token() {
860 Ok(t) => t,
861 Err(_) => {
862 let _ = self.report_error("unexpected EOF");
863 return Err(());
864 }
865 };
866
867 match t {
868 BRACKET_END => break self.add_token()?,
869 NEWLINE => {
870 self.token()?;
871 continue; }
873 COMMA => {
874 if first || comma_last {
875 let _ = self.error(r#"unexpected ",""#);
876 }
877 self.token()?;
878 comma_last = true;
879 }
880 _ => {
881 if !comma_last && !first {
882 let _ = self.error(r#"expected ",""#);
883 }
884 let _ = whitelisted!(
885 self,
886 COMMA,
887 with_node!(self.builder, VALUE, self.parse_value())
888 );
889 comma_last = false;
890 }
891 }
892
893 first = false;
894 }
895 Ok(())
896 }
897}
898
899fn check_underscores(s: &str, radix: u32) -> bool {
900 if s.starts_with('_') || s.ends_with('_') {
901 return false;
902 }
903
904 let mut last_char = 0 as char;
905
906 for c in s.chars() {
907 if c == '_' && !last_char.is_digit(radix) {
908 return false;
909 }
910 if !c.is_digit(radix) && last_char == '_' {
911 return false;
912 }
913 last_char = c;
914 }
915
916 true
917}
918
919#[derive(Debug, Clone)]
923pub struct Parse {
924 pub green_node: GreenNode,
925 pub errors: Vec<Error>,
926}
927
928impl Parse {
929 pub fn into_syntax(self) -> SyntaxNode {
931 SyntaxNode::new_root(self.green_node)
932 }
933
934 pub fn into_dom(self) -> dom::node::Node {
939 dom::Node::from_syntax(self.into_syntax().into())
940 }
941}