1use std::rc::Rc;
2
3use crate::{
4 analyzer::{
5 ast::{
6 BaseTypeNode, ConstNode, ConstValueNode, CppIncludeNode, DefinitionNode, DocumentNode,
7 EnumNode, EnumValueNode, ExceptionNode, ExtNode, FieldIdNode, FieldNode, FieldTypeNode,
8 FunctionNode, HeaderNode, IdentifierNode, IncludeNode, ListTypeNode, MapTypeNode,
9 NamespaceNode, ServiceNode, SetTypeNode, StructNode, TypedefNode, UnionNode,
10 },
11 base::{Error, Range},
12 scanner::Scanner,
13 token::{Token, TokenKind},
14 },
15 break_opt_token_or_eof, expect, expect_token, extract_token_value, opt_list_separator,
16 parse_definition, parse_header,
17};
18
19pub struct Parser<'a> {
21 scanner: Scanner<'a>,
22 errors: Vec<Error>,
23 prev_token: Option<Token>,
24}
25
26impl<'a> Parser<'a> {
27 pub fn new(input: &'a [char]) -> Parser<'a> {
29 Parser {
30 scanner: Scanner::new(input),
31 errors: Vec::new(),
32 prev_token: None,
33 }
34 }
35
36 pub fn parse(mut self) -> (DocumentNode, Vec<Error>) {
38 let start = self.peek_next_token().range().start;
39 let headers = self.parse_headers();
40 let definitions = self.parse_definitions();
41 let end = self.prev_token().unwrap_or_default().range().end;
42
43 let range = Range { start, end };
44 let node = DocumentNode {
45 headers,
46 definitions,
47 range,
48 };
49
50 (node, self.errors)
51 }
52}
53
54impl<'a> Parser<'a> {
55 fn next_token(&mut self) -> Token {
56 self.skip_comment_tokens();
57 let (next_token, err) = self.scanner.scan();
58 if let Some(err) = err {
59 self.add_error(err.message, err.range);
60 }
61
62 self.prev_token = Some(next_token.clone());
63 next_token
64 }
65
66 fn prev_token(&self) -> Option<Token> {
67 self.prev_token.clone()
68 }
69
70 fn peek_next_token(&mut self) -> Token {
71 self.skip_comment_tokens();
72
73 let state = self.scanner.save_state();
74 let (next_token, _) = self.scanner.scan();
75 self.scanner.restore_state(state);
76
77 next_token
78 }
79
80 fn eat_next_token(&mut self) {
81 self.next_token();
82 }
83
84 fn skip_comment_tokens(&mut self) {
85 loop {
86 let state = self.scanner.save_state();
87 let (next_token, _) = self.scanner.scan();
88 if !next_token.is_comment() {
89 self.scanner.restore_state(state);
90 break;
91 }
92 }
93 }
94}
95
96impl<'a> Parser<'a> {
98 fn parse_headers(&mut self) -> Vec<Rc<HeaderNode>> {
99 let mut headers: Vec<Rc<HeaderNode>> = Vec::new();
101
102 loop {
103 parse_header!(
104 self,
105 headers,
106 Include => parse_include,
107 CppInclude => parse_cpp_include,
108 Namespace => parse_namespace,
109 );
110 }
111
112 headers
113 }
114
115 fn parse_include(&mut self) -> Option<IncludeNode> {
116 let start = self.peek_next_token().range().start;
119 expect_token!(self, Include, "'include'");
120 let token = self.next_token();
121 let literal = extract_token_value!(self, token, Literal, "literal");
122 let end = self.prev_token().unwrap_or_default().range().end;
123
124 let range = Range { start, end };
125 Some(IncludeNode { range, literal })
126 }
127
128 fn parse_cpp_include(&mut self) -> Option<CppIncludeNode> {
129 let start = self.peek_next_token().range().start;
132 expect_token!(self, CppInclude, "'cpp_include'");
133 let token = self.next_token();
134 let literal = extract_token_value!(self, token, Literal, "literal");
135 let end = self.prev_token().unwrap_or_default().range().end;
136
137 let range = Range { start, end };
138 Some(CppIncludeNode { range, literal })
139 }
140
141 fn parse_namespace(&mut self) -> Option<NamespaceNode> {
142 let start = self.peek_next_token().range().start;
145 expect_token!(self, Namespace, "'namespace'");
146 let token = self.next_token();
147 let scope = extract_token_value!(self, token, NamespaceScope, "namespace scope");
148 let identifier = self.parse_identifier()?;
149 let ext = self.opt_parse_ext();
150 let end = self.prev_token().unwrap_or_default().range().end;
151
152 let range = Range { start, end };
153 Some(NamespaceNode {
154 range,
155 scope,
156 identifier,
157 ext,
158 })
159 }
160
161 fn parse_identifier(&mut self) -> Option<IdentifierNode> {
162 let token = self.next_token();
165 let range = token.range();
166 if let TokenKind::Identifier(name) = token.kind {
167 return Some(IdentifierNode { range, name });
168 }
169
170 let name = token.kind.to_string();
171 if !TokenKind::from_string(&name).is_some() {
172 self.add_error(format!("Invalid identifier: {}", name), token.range());
173 return None;
174 }
175
176 Some(IdentifierNode { range, name })
177 }
178}
179
180impl<'a> Parser<'a> {
182 fn parse_definitions(&mut self) -> Vec<Rc<DefinitionNode>> {
183 let mut definitions: Vec<Rc<DefinitionNode>> = Vec::new();
186
187 loop {
188 parse_definition!(
189 self,
190 definitions,
191 Const => parse_const,
192 Typedef => parse_typedef,
193 Enum => parse_enum,
194 Struct => parse_struct,
195 Union => parse_union,
196 Exception => parse_exception,
197 Service => parse_service,
198 );
199 }
200
201 definitions
202 }
203
204 fn parse_const(&mut self) -> Option<ConstNode> {
205 let start = self.peek_next_token().range().start;
208 expect_token!(self, Const, "'const'");
209 let field_type = self.parse_field_type()?;
210 let identifier = self.parse_identifier()?;
211 expect_token!(self, Assign, "'='");
212 let value = self.parse_const_value()?;
213 opt_list_separator!(self);
214 let end = self.prev_token().unwrap_or_default().range().end;
215
216 let range = Range { start, end };
217 Some(ConstNode {
218 range,
219 field_type,
220 identifier,
221 value,
222 })
223 }
224
225 fn parse_field_type(&mut self) -> Option<FieldTypeNode> {
226 let next_token = self.peek_next_token();
229 match next_token.kind {
230 TokenKind::Identifier(ref identifier) => {
231 self.eat_next_token();
232 return Some(FieldTypeNode::Identifier(IdentifierNode {
233 range: next_token.range(),
234 name: identifier.clone(),
235 }));
236 }
237 _ => {
238 return self.parse_definition_type();
239 }
240 }
241 }
242
243 fn parse_definition_type(&mut self) -> Option<FieldTypeNode> {
244 let next_token = self.peek_next_token();
247 match next_token.kind {
248 TokenKind::BaseType(ref base_type) => {
249 self.eat_next_token();
250 return Some(FieldTypeNode::BaseType(BaseTypeNode {
251 range: next_token.range(),
252 name: base_type.clone(),
253 }));
254 }
255 _ => {
256 return self.parse_container_type();
257 }
258 }
259 }
260
261 fn parse_container_type(&mut self) -> Option<FieldTypeNode> {
262 let next_token = self.peek_next_token();
265 match next_token.kind {
266 TokenKind::Map => self.parse_map_type().map(|x| FieldTypeNode::MapType(x)),
267 TokenKind::Set => self.parse_set_type().map(|x| FieldTypeNode::SetType(x)),
268 TokenKind::List => self.parse_list_type().map(|x| FieldTypeNode::ListType(x)),
269 _ => {
270 self.add_error(
271 format!("Expected map, set, or list, but got {}", next_token.kind),
272 next_token.range(),
273 );
274 None
275 }
276 }
277 }
278
279 fn opt_parse_cpp_type(&mut self) -> Option<String> {
280 if self.peek_next_token().kind != TokenKind::CppType {
283 return None;
284 }
285 expect_token!(self, CppType, "'cpp_type'");
286
287 let token = self.next_token();
288 Some(extract_token_value!(self, token, Identifier, "identifier"))
289 }
290
291 fn parse_map_type(&mut self) -> Option<MapTypeNode> {
292 let start = self.peek_next_token().range().start;
295 expect_token!(self, Map, "'map'");
296 let cpp_type = self.opt_parse_cpp_type();
297
298 expect_token!(self, Less, "'<'");
299 let key_type = Box::new(self.parse_field_type()?);
300 expect!(self, TokenKind::ListSeparator(','), "','");
301 let value_type = Box::new(self.parse_field_type()?);
302 expect_token!(self, Greater, "'>'");
303 let end = self.prev_token().unwrap_or_default().range().end;
304
305 let range = Range { start, end };
306 Some(MapTypeNode {
307 range,
308 cpp_type,
309 key_type,
310 value_type,
311 })
312 }
313
314 fn parse_set_type(&mut self) -> Option<SetTypeNode> {
315 let start = self.peek_next_token().range().start;
318 expect_token!(self, Set, "'set'");
319 let cpp_type = self.opt_parse_cpp_type();
320
321 expect!(self, TokenKind::Less, "'<'");
322 let type_node = Box::new(self.parse_field_type()?);
323 expect_token!(self, Greater, "'>'");
324 let end = self.prev_token().unwrap_or_default().range().end;
325
326 let range = Range { start, end };
327 Some(SetTypeNode {
328 range,
329 cpp_type,
330 type_node,
331 })
332 }
333
334 fn parse_list_type(&mut self) -> Option<ListTypeNode> {
335 let start = self.peek_next_token().range().start;
338 expect_token!(self, List, "'list'");
339 let cpp_type = self.opt_parse_cpp_type();
340
341 expect!(self, TokenKind::Less, "'<'");
342 let type_node = Box::new(self.parse_field_type()?);
343 expect_token!(self, Greater, "'>'");
344 let end = self.prev_token().unwrap_or_default().range().end;
345
346 let range = Range { start, end };
347 Some(ListTypeNode {
348 range,
349 cpp_type,
350 type_node,
351 })
352 }
353
354 fn parse_const_value(&mut self) -> Option<ConstValueNode> {
355 let next_token = self.peek_next_token();
358 match &next_token.kind {
359 TokenKind::IntConstant(value)
360 | TokenKind::DoubleConstant(value)
361 | TokenKind::Literal(value)
362 | TokenKind::Identifier(value) => {
363 self.eat_next_token();
364 Some(ConstValueNode {
365 range: next_token.range(),
366 value: value.clone(),
367 })
368 }
369 TokenKind::Lbrack => self.parse_const_list(),
370 TokenKind::Lbrace => self.parse_const_map(),
371 _ => {
372 self.eat_next_token();
373 self.add_error(
374 format!("Expected constant value, but got {}", next_token.kind),
375 next_token.range(),
376 );
377 None
378 }
379 }
380 }
381
382 fn parse_const_list(&mut self) -> Option<ConstValueNode> {
383 let start = self.peek_next_token().range().start;
386 expect_token!(self, Lbrack, "'['");
387 let mut values = Vec::new();
388 loop {
389 break_opt_token_or_eof!(self, Rbrack);
390 values.push(self.parse_const_value()?.value);
391 opt_list_separator!(self);
392 }
393 let end = self.prev_token().unwrap_or_default().range().end;
394
395 let range = Range { start, end };
396 Some(ConstValueNode {
397 range,
398 value: format!("[{}]", values.join(", ")),
399 })
400 }
401
402 fn parse_const_map(&mut self) -> Option<ConstValueNode> {
403 let start = self.peek_next_token().range().start;
406 expect_token!(self, Lbrace, "'{'");
407 let mut pairs = Vec::new();
408 loop {
409 break_opt_token_or_eof!(self, Rbrace);
410 pairs.push(self.parse_const_map_value()?);
411 }
412 let end = self.prev_token().unwrap_or_default().range().end;
413
414 let range = Range { start, end };
415 Some(ConstValueNode {
416 range,
417 value: format!(
418 "{{{}}}",
419 pairs
420 .iter()
421 .map(|(k, v)| format!("{}: {}", k, v))
422 .collect::<Vec<_>>()
423 .join(", ")
424 ),
425 })
426 }
427
428 fn parse_const_map_value(&mut self) -> Option<(String, String)> {
429 let key = self.parse_const_value()?;
432 expect_token!(self, Colon, "':'");
433 let value = self.parse_const_value()?;
434 opt_list_separator!(self);
435
436 Some((key.value, value.value))
437 }
438
439 fn parse_typedef(&mut self) -> Option<TypedefNode> {
440 let start = self.peek_next_token().range().start;
443 expect_token!(self, Typedef, "'typedef'");
444 let definition_type = self.parse_definition_type()?;
445 let identifier = self.parse_identifier()?;
446 let end = self.prev_token().unwrap_or_default().range().end;
447
448 let range = Range { start, end };
449 Some(TypedefNode {
450 range,
451 definition_type,
452 identifier,
453 })
454 }
455
456 fn parse_enum(&mut self) -> Option<EnumNode> {
457 let start = self.peek_next_token().range().start;
460 expect_token!(self, Enum, "'enum'");
461 let identifier = self.parse_identifier()?;
462 expect_token!(self, Lbrace, "'{'");
463
464 let mut values = Vec::new();
465 loop {
466 break_opt_token_or_eof!(self, Rbrace);
467 if let Some(value) = self.parse_enum_value() {
468 values.push(value);
469 } else {
470 self.recover_to_next_line();
471 }
472 }
473 let end = self.prev_token().unwrap_or_default().range().end;
474
475 let range = Range { start, end };
476 Some(EnumNode {
477 range,
478 identifier,
479 values,
480 })
481 }
482
483 fn parse_enum_value(&mut self) -> Option<EnumValueNode> {
484 let start = self.peek_next_token().range().start;
487 let token = self.next_token();
488 let name = extract_token_value!(self, token, Identifier, "identifier");
489
490 let mut value = None;
491 let next_token = self.peek_next_token();
492 if next_token.kind == TokenKind::Assign {
493 self.eat_next_token();
494 let token = self.next_token();
495 value = Some(
496 extract_token_value!(self, token, IntConstant, "integer constant")
497 .parse::<i32>()
498 .unwrap_or_default(),
499 );
500 }
501
502 let ext = self.opt_parse_ext();
503 opt_list_separator!(self);
504 let end = self.prev_token().unwrap_or_default().range().end;
505
506 let range = Range { start, end };
507 Some(EnumValueNode {
508 range,
509 name,
510 value,
511 ext,
512 })
513 }
514
515 fn parse_struct(&mut self) -> Option<StructNode> {
516 let start = self.peek_next_token().range().start;
519 expect_token!(self, Struct, "'struct'");
520 let identifier = self.parse_identifier()?;
521 expect_token!(self, Lbrace, "'{'");
522
523 let mut fields = Vec::new();
524 loop {
525 break_opt_token_or_eof!(self, Rbrace);
526 if let Some(field) = self.parse_field() {
527 fields.push(field);
528 } else {
529 self.recover_to_next_line();
530 }
531 }
532 let ext = self.opt_parse_ext();
533 let end = self.prev_token().unwrap_or_default().range().end;
534
535 let range = Range { start, end };
536 Some(StructNode {
537 range,
538 identifier,
539 fields,
540 ext,
541 })
542 }
543
544 fn parse_field(&mut self) -> Option<FieldNode> {
545 let start = self.peek_next_token().range().start;
548 let mut field_id = None;
549 let mut field_req = None;
550
551 let next_token = self.peek_next_token();
552 match next_token.kind {
553 TokenKind::IntConstant(ref id) => {
554 field_id = Some(FieldIdNode {
555 range: next_token.range(),
556 id: id.parse().unwrap_or_default(),
557 });
558 self.eat_next_token();
559 expect_token!(self, Colon, "':'");
560 }
561 TokenKind::Required | TokenKind::Optional => {
562 field_req = Some(match next_token.kind {
563 TokenKind::Required => "required".to_string(),
564 TokenKind::Optional => "optional".to_string(),
565 _ => unreachable!(),
566 });
567 self.eat_next_token();
568 }
569 _ => {}
570 }
571
572 let next_token = self.peek_next_token();
573 if let TokenKind::Required | TokenKind::Optional = next_token.kind {
574 if !field_req.is_none() {
575 self.add_error(
576 format!("Expected field type, but got {}", next_token.kind),
577 next_token.range(),
578 );
579 return None;
580 }
581 field_req = Some(match next_token.kind {
582 TokenKind::Required => "required".to_string(),
583 TokenKind::Optional => "optional".to_string(),
584 _ => unreachable!(),
585 });
586 self.eat_next_token();
587 }
588
589 let field_type = self.parse_field_type()?;
590 let identifier = self.parse_identifier()?;
591
592 let mut default_value = None;
593 let next_token = self.peek_next_token();
594 if next_token.kind == TokenKind::Assign {
595 self.eat_next_token();
596 default_value = Some(self.parse_const_value()?);
597 }
598
599 let ext = self.opt_parse_ext();
600 opt_list_separator!(self);
601 let end = self.prev_token().unwrap_or_default().range().end;
602
603 let range = Range { start, end };
604 Some(FieldNode {
605 range,
606 field_id,
607 field_req,
608 field_type,
609 identifier,
610 default_value,
611 ext,
612 })
613 }
614
615 fn parse_union(&mut self) -> Option<UnionNode> {
616 let start = self.peek_next_token().range().start;
619 expect_token!(self, Union, "'union'");
620 let identifier = self.parse_identifier()?;
621 expect_token!(self, Lbrace, "'{'");
622
623 let mut fields = Vec::new();
624 loop {
625 break_opt_token_or_eof!(self, Rbrace);
626 if let Some(field) = self.parse_field() {
627 fields.push(field);
628 } else {
629 self.recover_to_next_line();
630 }
631 }
632 let end = self.prev_token().unwrap_or_default().range().end;
633
634 let range = Range { start, end };
635 Some(UnionNode {
636 range,
637 identifier,
638 fields,
639 })
640 }
641
642 fn parse_exception(&mut self) -> Option<ExceptionNode> {
643 let start = self.peek_next_token().range().start;
646 expect_token!(self, Exception, "'exception'");
647 let identifier = self.parse_identifier()?;
648 expect_token!(self, Lbrace, "'{'");
649
650 let mut fields = Vec::new();
651 loop {
652 break_opt_token_or_eof!(self, Rbrace);
653 if let Some(field) = self.parse_field() {
654 fields.push(field);
655 } else {
656 self.recover_to_next_line();
657 }
658 }
659 let end = self.prev_token().unwrap_or_default().range().end;
660
661 let range = Range { start, end };
662 Some(ExceptionNode {
663 range,
664 identifier,
665 fields,
666 })
667 }
668
669 fn parse_service(&mut self) -> Option<ServiceNode> {
670 let start = self.peek_next_token().range().start;
673 expect_token!(self, Service, "'service'");
674 let identifier = self.parse_identifier()?;
675
676 let mut extends = None;
677 let next_token = self.peek_next_token();
678 if next_token.kind == TokenKind::Extends {
679 self.eat_next_token();
680 extends = self.parse_identifier();
681 }
682
683 expect_token!(self, Lbrace, "'{'");
684 let mut functions = Vec::new();
685 loop {
686 break_opt_token_or_eof!(self, Rbrace);
687 if let Some(function) = self.parse_function() {
688 functions.push(function);
689 } else {
690 self.recover_to_next_line();
691 }
692 }
693 let end = self.prev_token().unwrap_or_default().range().end;
694
695 let range = Range { start, end };
696 Some(ServiceNode {
697 range,
698 identifier,
699 extends,
700 functions,
701 })
702 }
703
704 fn parse_function(&mut self) -> Option<FunctionNode> {
705 let start = self.peek_next_token().range().start;
708 let mut is_oneway = false;
709 let next_token = self.peek_next_token();
710 if next_token.kind == TokenKind::Oneway {
711 is_oneway = true;
712 self.eat_next_token();
713 }
714
715 let function_type = self.parse_function_type();
716 let identifier = self.parse_identifier()?;
717 expect_token!(self, Lparen, "'('");
718
719 let mut fields = Vec::new();
720 loop {
721 break_opt_token_or_eof!(self, Rparen);
722 fields.push(self.parse_field()?);
723 }
724
725 let mut throws = None;
726 let next_token = self.peek_next_token();
727 if next_token.kind == TokenKind::Throws {
728 throws = Some(self.parse_throws()?);
729 }
730 let ext = self.opt_parse_ext();
731 opt_list_separator!(self);
732 let end = self.prev_token().unwrap_or_default().range().end;
733
734 let range = Range { start, end };
735 Some(FunctionNode {
736 range,
737 is_oneway,
738 function_type,
739 identifier,
740 fields,
741 throws,
742 ext,
743 })
744 }
745
746 fn parse_function_type(&mut self) -> Option<FieldTypeNode> {
747 let next_token = self.peek_next_token();
750 if next_token.kind == TokenKind::Void {
751 self.eat_next_token();
752 return None;
753 }
754 self.parse_field_type()
755 }
756
757 fn parse_throws(&mut self) -> Option<Vec<FieldNode>> {
758 expect_token!(self, Throws, "'throws'");
761 expect_token!(self, Lparen, "'('");
762
763 let mut fields = Vec::new();
764 loop {
765 break_opt_token_or_eof!(self, Rparen);
766 fields.push(self.parse_field()?);
767 }
768
769 Some(fields)
770 }
771
772 fn opt_parse_ext(&mut self) -> Option<ExtNode> {
773 let start = self.peek_next_token().range().start;
776 if self.peek_next_token().kind != TokenKind::Lparen {
777 return None;
778 }
779 expect_token!(self, Lparen, "'('");
780
781 let mut kv_pairs = Vec::new();
782 loop {
783 break_opt_token_or_eof!(self, Rparen);
784 kv_pairs.push(self.parse_key_value_pair()?);
785 }
786
787 let end = self.prev_token().unwrap_or_default().range().end;
788 let range = Range { start, end };
789 Some(ExtNode { kv_pairs, range })
790 }
791
792 fn parse_key_value_pair(&mut self) -> Option<(String, String)> {
793 let token = self.next_token();
796 let key = extract_token_value!(self, token, Identifier, "identifier");
797 expect_token!(self, Assign, "'='");
798 let token = self.next_token();
799 let value = extract_token_value!(self, token, Literal, "literal");
800 opt_list_separator!(self);
801
802 Some((key, value))
803 }
804}
805
806impl<'a> Parser<'a> {
808 fn add_error(&mut self, message: String, range: Range) {
809 self.errors.push(Error { range, message });
810 }
811
812 fn recover_to_next_definition(&mut self) {
813 loop {
814 let next_token = self.peek_next_token();
815 if next_token.is_eof() {
816 return;
817 }
818
819 match next_token.kind {
820 TokenKind::Const
821 | TokenKind::Typedef
822 | TokenKind::Enum
823 | TokenKind::Struct
824 | TokenKind::Union
825 | TokenKind::Exception
826 | TokenKind::Service => {
827 break;
828 }
829 _ => self.eat_next_token(),
830 }
831 }
832 }
833
834 fn recover_to_next_line(&mut self) {
835 self.scanner.skip_to_next_line();
836 }
837}
838
839#[cfg(test)]
840mod tests {
841 use std::{fs, path::Path};
842
843 use super::*;
844
845 #[test]
846 fn parse_success() {
847 let work_path = std::env::current_dir().unwrap();
848 let file_path = work_path.join(Path::new("./lib/analyzer/test_file/ThriftTest.thrift"));
849 let content = fs::read_to_string(&file_path)
850 .unwrap()
851 .chars()
852 .collect::<Vec<_>>();
853
854 let (document, errors) = Parser::new(&content).parse();
855 println!("Document: {:#?}", document);
856 println!("\nErrors:");
857 for error in errors.iter() {
858 println!(" {:?}: {}", error.range, error.message);
859 }
860 assert!(errors.is_empty());
861 }
862
863 #[test]
864 fn parse_failed() {
865 let work_path = std::env::current_dir().unwrap();
866 let file_path = work_path.join(Path::new(
867 "./lib/analyzer/test_file/InvalidThriftTest.thrift",
868 ));
869 let content = fs::read_to_string(&file_path)
870 .unwrap()
871 .chars()
872 .collect::<Vec<_>>();
873
874 let (document, errors) = Parser::new(&content).parse();
875 println!("Document: {:#?}", document);
876 println!("\nErrors:");
877 for error in errors.iter() {
878 println!(" {:?}: {}", error.range, error.message);
879 }
880 assert!(!errors.is_empty());
881 }
882}