1use crate::dom;
4use crate::syntax::{SyntaxKind, SyntaxKind::*, SyntaxNode};
5use crate::util::validate_quote;
6use logos::{Lexer, Logos};
7use rowan::{GreenNode, GreenNodeBuilder, TextRange, TextSize};
8use std::collections::HashSet;
9
10macro_rules! with_node {
11 ($builder:expr, $kind:ident, $($content:tt)*) => {
12 {
13 $builder.start_node($kind.into());
14 let res = $($content)*;
15 $builder.finish_node();
16 res
17 }
18 };
19}
20
21#[derive(Debug, Clone, Eq, PartialEq, Hash)]
23pub struct Error {
24 pub range: TextRange,
26
27 pub message: String,
29}
30
31impl core::fmt::Display for Error {
32 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
33 write!(f, "{} ({:?})", &self.message, &self.range)
34 }
35}
36impl std::error::Error for Error {}
37
38pub fn parse(source: &str) -> Parse {
50 Parser::new(source).parse()
51}
52
53pub(crate) struct Parser<'p> {
57 current_token: Option<SyntaxKind>,
58 lexer: Lexer<'p, SyntaxKind>,
59 builder: GreenNodeBuilder<'p>,
60 errors: Vec<Error>,
61 annotation_scope: bool,
62 parse_keys_mode: ParseKeysMode,
63}
64
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub(crate) enum ParseKeysMode {
67 None,
68 Keys,
69 QueryKeys,
70}
71
72impl Default for ParseKeysMode {
73 fn default() -> Self {
74 ParseKeysMode::None
75 }
76}
77
78type ParserResult<T> = Result<T, ()>;
81
82impl<'p> Parser<'p> {
83 pub(crate) fn new(source: &'p str) -> Self {
84 Parser {
85 current_token: None,
86 lexer: SyntaxKind::lexer(source),
87 builder: Default::default(),
88 errors: Default::default(),
89 annotation_scope: false,
90 parse_keys_mode: Default::default(),
91 }
92 }
93
94 pub(crate) fn parse_keys_only(mut self, glob: bool) -> Parse {
95 if glob {
96 self.parse_keys_mode = ParseKeysMode::QueryKeys
97 } else {
98 self.parse_keys_mode = ParseKeysMode::Keys
99 }
100 let _ = with_node!(self.builder, KEYS, self.parse_keys());
101
102 Parse {
103 green_node: self.builder.finish(),
104 errors: self.errors,
105 }
106 }
107
108 fn parse(mut self) -> Parse {
109 let _ = with_node!(self.builder, VALUE, self.parse_root());
110
111 Parse {
112 green_node: self.builder.finish(),
113 errors: self.errors,
114 }
115 }
116
117 fn parse_root(&mut self) -> ParserResult<()> {
118 self.parse_value()?;
119 self.parse_annotations()?;
120 self.must_peek_eof()
121 }
122
123 fn parse_annotations(&mut self) -> ParserResult<()> {
124 if let Ok(ANNOTATION_KEY) = self.peek_token() {
125 self.builder.start_node(ANNOTATIONS.into());
126 while let Ok(ANNOTATION_KEY) = self.peek_token() {
127 if self.lexer.slice().len() == 1 {
128 self.report_error("invalid annotation key");
129 }
130 let _ = with_node!(self.builder, ANNOTATION_PROPERTY, self.parse_anno_entry());
131 }
132 self.builder.finish_node();
133 }
134 Ok(())
135 }
136
137 fn parse_anno_entry(&mut self) -> ParserResult<()> {
138 self.must_token_or(ANNOTATION_KEY, r#"expected annotation key"#)?;
139 if self.annotation_scope {
140 self.report_error("nested annotation");
141 }
142 if let Ok(PARENTHESES_START) = self.peek_token() {
143 self.annotation_scope = true;
144 let ret = with_node!(self.builder, ANNOTATION_VALUE, self.parse_anno_value());
145 self.annotation_scope = false;
146 ret?;
147 }
148 Ok(())
149 }
150
151 fn parse_anno_value(&mut self) -> ParserResult<()> {
152 self.must_token_or(PARENTHESES_START, r#"expected "(""#)?;
153 if PARENTHESES_END == self.peek_token()? {
154 self.must_token_or(PARENTHESES_END, r#"expected ")""#)?;
155 return Ok(());
156 }
157 let ret = with_node!(self.builder, VALUE, self.parse_value());
158 self.must_token_or(PARENTHESES_END, r#"expected ")""#)?;
159 ret
160 }
161
162 fn parse_property(&mut self) -> ParserResult<bool> {
163 with_node!(self.builder, KEY, self.parse_key())?;
164 let _ = self.must_token_or(COLON, r#"expected ":""#);
165 if let Ok(t) = self.peek_token() {
166 match t {
167 COMMA => {
168 self.report_error("expected value");
169 self.consume_current_token()?;
170 return Ok(true);
171 }
172 BRACE_END => {
173 self.report_error("expected value");
174 return Ok(false);
175 }
176 _ => {}
177 }
178 }
179 let ret = with_node!(self.builder, VALUE, self.parse_value_with_annotations());
180 Ok(ret.ok().unwrap_or_default())
181 }
182
183 fn parse_value(&mut self) -> ParserResult<()> {
184 let t = match self.peek_token() {
185 Ok(t) => t,
186 Err(_) => return Ok(()),
187 };
188 match t {
189 BRACE_START => {
190 with_node!(self.builder, OBJECT, self.parse_object())
191 }
192 BRACKET_START => {
193 with_node!(self.builder, ARRAY, self.parse_array())
194 }
195 NULL | BOOL => with_node!(self.builder, SCALAR, self.consume_current_token()),
196 INTEGER => {
197 if (self.lexer.slice().starts_with('0') && self.lexer.slice() != "0")
199 || (self.lexer.slice().starts_with("+0") && self.lexer.slice() != "+0")
200 || (self.lexer.slice().starts_with("-0") && self.lexer.slice() != "-0")
201 {
202 self.consume_error_token("zero-padded integers are not allowed")
203 } else if !validate_underscore_integer(self.lexer.slice(), 10) {
204 self.consume_error_token("invalid underscores")
205 } else {
206 with_node!(self.builder, SCALAR, self.consume_current_token())
207 }
208 }
209 INTEGER_BIN => {
210 if !validate_underscore_integer(self.lexer.slice(), 2) {
211 self.consume_error_token("invalid underscores")
212 } else {
213 with_node!(self.builder, SCALAR, self.consume_current_token())
214 }
215 }
216 INTEGER_HEX => {
217 if !validate_underscore_integer(self.lexer.slice(), 16) {
218 self.consume_error_token("invalid underscores")
219 } else {
220 with_node!(self.builder, SCALAR, self.consume_current_token())
221 }
222 }
223 INTEGER_OCT => {
224 if !validate_underscore_integer(self.lexer.slice(), 8) {
225 self.consume_error_token("invalid underscores")
226 } else {
227 with_node!(self.builder, SCALAR, self.consume_current_token())
228 }
229 }
230 FLOAT => {
231 let int_slice = if self.lexer.slice().contains('.') {
232 self.lexer.slice().split('.').next().unwrap()
233 } else {
234 self.lexer.slice().split('e').next().unwrap()
235 };
236
237 if (int_slice.starts_with('0') && int_slice != "0")
238 || (int_slice.starts_with("+0") && int_slice != "+0")
239 || (int_slice.starts_with("-0") && int_slice != "-0")
240 {
241 self.consume_error_token("zero-padded numbers are not allowed")
242 } else if !validate_underscore_integer(self.lexer.slice(), 10) {
243 self.consume_error_token("invalid underscores")
244 } else {
245 with_node!(self.builder, SCALAR, self.consume_current_token())
246 }
247 }
248 DOUBLE_QUOTE | SINGLE_QUOTE => {
249 self.validate_string();
250 with_node!(self.builder, SCALAR, self.consume_current_token())
251 }
252 BACKTICK_QUOTE => {
253 self.validate_backtick();
254 with_node!(self.builder, SCALAR, self.consume_current_token())
255 }
256 COMMA => {
257 self.report_error("expected value");
258 Err(())
259 }
260 _ => self.consume_error_token("expected value"),
261 }
262 }
263
264 fn parse_value_with_annotations(&mut self) -> ParserResult<bool> {
265 self.parse_value()?;
266 let mut has_comma = false;
267 if let Ok(COMMA) = self.peek_token() {
268 has_comma = true;
269 self.consume_current_token()?;
270 }
271 self.parse_annotations()?;
272 Ok(has_comma)
273 }
274
275 fn parse_object(&mut self) -> ParserResult<()> {
276 self.must_token_or(BRACE_START, r#"expected "{""#)?;
277 self.parse_annotations()?;
278 let mut needs_comma = false;
279
280 while let Ok(t) = self.must_peek_token() {
281 match t {
282 BRACE_END => {
283 return self.consume_current_token();
284 }
285 COMMA => {
286 if needs_comma {
287 needs_comma = false;
288 self.consume_current_token()?;
289 } else {
290 let _ = self.consume_error_token(r#"unexpected ",""#);
291 }
292 }
293 _ => {
294 if needs_comma {
295 self.point_error(r#"expected ",""#);
296 }
297 let ret = with_node!(self.builder, PROPERTY, self.parse_property());
298 if let Ok(has_comma) = ret {
299 needs_comma = !has_comma;
300 }
301 }
302 }
303 }
304 Ok(())
305 }
306
307 fn parse_array(&mut self) -> ParserResult<()> {
308 self.must_token_or(BRACKET_START, r#"expected "[""#)?;
309 let _ = self.parse_annotations();
310 let mut needs_comma = false;
311
312 while let Ok(t) = self.must_peek_token() {
313 match t {
314 BRACKET_END => {
315 return self.consume_current_token();
316 }
317 COMMA => {
318 if needs_comma {
319 needs_comma = false;
320 self.consume_current_token()?;
321 } else {
322 let _ = self.consume_error_token(r#"unexpected ",""#);
323 }
324 }
325 _ => {
326 if needs_comma {
327 self.point_error(r#"expected ",""#);
328 }
329 let ret = with_node!(self.builder, VALUE, self.parse_value_with_annotations());
330 needs_comma = !ret.ok().unwrap_or_default();
331 }
332 }
333 }
334
335 Ok(())
336 }
337
338 fn parse_keys(&mut self) -> ParserResult<()> {
339 let mut first = true;
340 let mut after_dot = false;
341 let mut exist_annotation_key = false;
342 loop {
343 let t = match self.peek_token() {
344 Ok(token) => token,
345 Err(_) => {
346 if !after_dot {
347 return Ok(());
348 }
349 return self.consume_error_token("unexpected EOF");
350 }
351 };
352
353 match t {
354 ANNOTATION_KEY => {
355 if after_dot || exist_annotation_key {
356 return self.consume_error_token("unexpected annotation key");
357 } else {
358 self.consume_current_token()?;
359 exist_annotation_key = true;
360 after_dot = false;
361 first = false;
362 }
363 }
364 PERIOD => {
365 if after_dot {
366 return self.consume_error_token(r#"unexpected ".""#);
367 } else {
368 self.consume_current_token()?;
369 after_dot = true;
370 }
371 }
372 FLOAT => {
373 let value = self.lexer.slice();
374 if value.starts_with(['+', '-']) {
375 return self.consume_error_token("unexpected identifier");
376 } else {
377 let mut dot = false;
378 for (i, s) in value.split('.').enumerate() {
379 if s.is_empty() {
380 if i == 0 && after_dot {
381 return self.consume_error_token(r#"unexpected ".""#);
382 }
383 self.consume_token(PERIOD, ".");
384 dot = true;
385 } else {
386 self.consume_token(IDENT, s);
387 dot = false;
388 }
389 }
390 if dot {
391 after_dot = true;
392 }
393 self.next_token();
394 }
395 }
396 BRACKET_START => {
397 self.consume_current_token()?;
398
399 self.parse_key()?;
400
401 let token = self.peek_token()?;
402
403 if !matches!(token, BRACKET_END) {
404 self.consume_error_token(r#"expected "]""#)?;
405 }
406 self.consume_current_token()?;
407
408 after_dot = false;
409 }
410 _ => {
411 if after_dot || first {
412 match self.parse_key() {
413 Ok(_) => {}
414 Err(_) => {
415 self.report_error("expected identifier");
416 return Err(());
417 }
418 }
419 after_dot = false;
420 first = false;
421 } else {
422 return self.consume_error_token(r#"expect ".""#);
423 }
424 }
425 };
426 }
427 }
428
429 fn parse_key(&mut self) -> ParserResult<()> {
430 let t = self.must_peek_token()?;
431
432 match t {
433 IDENT => self.consume_current_token(),
434 IDENT_WITH_GLOB if self.parse_keys_mode == ParseKeysMode::QueryKeys => {
435 if let Err(err_indices) = validates::glob(self.lexer.slice()) {
436 for e in err_indices {
437 let span = self.lexer.span();
438 self.add_error(&Error {
439 range: TextRange::new(
440 TextSize::from((span.start + e) as u32),
441 TextSize::from((span.start + e) as u32),
442 ),
443 message: "invalid glob".into(),
444 });
445 }
446 };
447 self.consume_current_token()
448 }
449 NULL | BOOL => self.consume_current_token(),
450 INTEGER_HEX | INTEGER_BIN | INTEGER_OCT => self.consume_current_token(),
451 INTEGER => {
452 if self.lexer.slice().starts_with('+') {
453 Err(())
454 } else {
455 self.consume_current_token()
456 }
457 }
458 SINGLE_QUOTE | DOUBLE_QUOTE => {
459 self.validate_string();
460 self.consume_current_token()
461 }
462 BACKTICK_QUOTE => {
463 self.validate_backtick();
464 self.consume_current_token()
465 }
466 FLOAT if self.parse_keys_mode == ParseKeysMode::None => {
467 if self.lexer.slice().starts_with('0') {
468 self.consume_error_token("zero-padded numbers are not allowed")
469 } else if self.lexer.slice().starts_with('+') {
470 Err(())
471 } else {
472 self.consume_current_token()
473 }
474 }
475 _ => self.consume_error_token("expect identifier"),
476 }
477 }
478
479 fn must_peek_token(&mut self) -> ParserResult<SyntaxKind> {
480 match self.peek_token() {
481 Ok(t) => Ok(t),
482 Err(_) => {
483 self.report_error("unexpected EOF");
484 Err(())
485 }
486 }
487 }
488
489 fn must_peek_eof(&mut self) -> ParserResult<()> {
490 match self.peek_token() {
491 Ok(_) => {
492 self.report_error("expect EOF");
493 Err(())
494 }
495 Err(_) => Ok(()),
496 }
497 }
498
499 fn must_token_or(&mut self, kind: SyntaxKind, message: &str) -> ParserResult<()> {
500 let t = self.must_peek_token()?;
501 if kind == t {
502 self.consume_current_token()
503 } else {
504 self.report_error(message);
505 Err(())
506 }
507 }
508
509 fn consume_current_token(&mut self) -> ParserResult<()> {
510 match self.peek_token() {
511 Err(_) => Err(()),
512 Ok(token) => {
513 self.consume_token(token, self.lexer.slice());
514 Ok(())
515 }
516 }
517 }
518
519 fn consume_error_token(&mut self, message: &str) -> ParserResult<()> {
520 self.report_error(message);
521
522 self.consume_token(ERROR, self.lexer.slice());
523
524 Err(())
525 }
526
527 fn peek_token(&mut self) -> ParserResult<SyntaxKind> {
528 if self.current_token.is_none() {
529 self.next_token();
530 }
531
532 self.current_token.ok_or(())
533 }
534
535 fn next_token(&mut self) {
536 self.current_token = None;
537 while let Some(token) = self.lexer.next() {
538 match token {
539 LINE_COMMENT | BLOCK_COMMENT => {
540 let multiline = token == BLOCK_COMMENT;
541 if let Err(err_indices) = validates::comment(self.lexer.slice(), multiline) {
542 for e in err_indices {
543 let span = self.lexer.span();
544 self.add_error(&Error {
545 range: TextRange::new(
546 TextSize::from((span.start + e) as u32),
547 TextSize::from((span.start + e) as u32),
548 ),
549 message: "invalid character in comment".into(),
550 });
551 }
552 };
553
554 self.consume_token(token, self.lexer.slice());
555 }
556 WHITESPACE | NEWLINE => {
557 self.consume_token(token, self.lexer.slice());
558 }
559 ERROR => {
560 let _ = self.consume_error_token("unexpected token");
561 }
562 _ => {
563 self.current_token = Some(token);
564 break;
565 }
566 }
567 }
568 }
569
570 fn consume_token(&mut self, kind: SyntaxKind, text: &str) {
571 self.builder.token(kind.into(), text);
572 self.current_token = None;
573 }
574
575 fn report_error(&mut self, message: &str) {
576 let span = self.lexer.span();
577
578 let err = Error {
579 range: TextRange::new(
580 TextSize::from(span.start as u32),
581 TextSize::from(span.end as u32),
582 ),
583 message: message.into(),
584 };
585 self.add_error(&err);
586 }
587
588 fn point_error(&mut self, message: &str) {
589 let span = self.lexer.span();
590 let point = TextSize::from(span.start.saturating_sub(1) as u32);
591 let err = Error {
592 range: TextRange::new(point, point),
593 message: message.into(),
594 };
595 self.add_error(&err);
596 }
597
598 fn add_error(&mut self, e: &Error) {
599 if let Some(last_err) = self.errors.last_mut() {
600 if last_err.range == e.range {
601 return;
602 }
603 }
604 self.errors.push(e.clone());
605 }
606
607 fn validate_string(&mut self) {
608 let mut indexes: HashSet<usize> = HashSet::default();
609
610 if let Err(err_indices) = validates::string(self.lexer.slice()) {
611 indexes.extend(err_indices);
612 };
613 if let Err(err_indices) = validate_quote(self.lexer.slice()) {
614 indexes.extend(err_indices);
615 };
616 let span = self.lexer.span();
617 for e in indexes {
618 self.add_error(&Error {
619 range: TextRange::new(
620 TextSize::from((span.start + e) as u32),
621 TextSize::from((span.start + e + 1) as u32),
622 ),
623 message: "invalid character in string".into(),
624 });
625 }
626 }
627 fn validate_backtick(&mut self) {
628 if let Err(err_indices) = validates::backtick_string(self.lexer.slice()) {
629 for e in err_indices {
630 let span = self.lexer.span();
631 self.add_error(&Error {
632 range: TextRange::new(
633 TextSize::from((span.start + e) as u32),
634 TextSize::from((span.start + e + 1) as u32),
635 ),
636 message: "invalid character in string".into(),
637 });
638 }
639 };
640 }
641}
642
643fn validate_underscore_integer(s: &str, radix: u32) -> bool {
644 if s.starts_with('_') || s.ends_with('_') {
645 return false;
646 }
647
648 let mut prev_char = 0 as char;
649
650 for c in s.chars() {
651 if c == '_' && !prev_char.is_digit(radix) {
652 return false;
653 }
654 if !c.is_digit(radix) && prev_char == '_' {
655 return false;
656 }
657 prev_char = c;
658 }
659
660 true
661}
662
663#[derive(Debug, Clone)]
667pub struct Parse {
668 pub green_node: GreenNode,
669 pub errors: Vec<Error>,
670}
671
672impl Parse {
673 pub fn into_syntax(self) -> SyntaxNode {
675 SyntaxNode::new_root(self.green_node)
676 }
677 pub fn into_dom(self) -> dom::Node {
682 dom::from_syntax(self.into_syntax().into())
683 }
684}
685
686pub(crate) mod validates {
687 pub(crate) fn comment(s: &str, multiline: bool) -> Result<(), Vec<usize>> {
688 let mut err_indices = Vec::new();
689
690 for (i, c) in s.chars().enumerate() {
691 if multiline {
692 if c != '\t' && c != '\n' && c != '\r' && c.is_control() {
693 err_indices.push(i);
694 }
695 } else if c != '\t' && c.is_control() {
696 err_indices.push(i);
697 }
698 }
699
700 if err_indices.is_empty() {
701 Ok(())
702 } else {
703 Err(err_indices)
704 }
705 }
706
707 pub(crate) fn string(s: &str) -> Result<(), Vec<usize>> {
708 let mut err_indices = Vec::new();
709
710 let mut index = 0;
711 for c in s.chars() {
712 if c != '\t' && c.is_ascii_control() {
713 err_indices.push(index);
714 }
715 index += c.len_utf8();
716 }
717
718 if err_indices.is_empty() {
719 Ok(())
720 } else {
721 Err(err_indices)
722 }
723 }
724
725 pub(crate) fn backtick_string(s: &str) -> Result<(), Vec<usize>> {
726 let mut err_indices = Vec::new();
727
728 let mut index = 0;
729 for c in s.chars() {
730 if c != '\t' && c != '\n' && c != '\r' && c.is_ascii_control() {
731 err_indices.push(index);
732 }
733 index += c.len_utf8();
734 }
735
736 if err_indices.is_empty() {
737 Ok(())
738 } else {
739 Err(err_indices)
740 }
741 }
742
743 pub(crate) fn glob(s: &str) -> Result<(), Vec<usize>> {
744 let mut err_indices = Vec::new();
745
746 if s == "*" || s == "**" {
747 return Ok(());
748 }
749 if let Some(i) = s.find("**") {
750 err_indices.push(i);
751 }
752 if err_indices.is_empty() {
753 Ok(())
754 } else {
755 Err(err_indices)
756 }
757 }
758}