Skip to main content

syster/parser/
parser.rs

1//! Recursive descent parser for SysML v2
2//!
3//! Builds a rowan GreenNode tree from tokens.
4//! Supports error recovery and produces a lossless CST.
5
6use super::grammar::BaseParser;
7use super::grammar::kerml::KerMLParser;
8use super::grammar::kerml_expressions::{self, ExpressionParser};
9use super::grammar::sysml::SysMLParser;
10use super::lexer::{Lexer, Token};
11use super::syntax_kind::SyntaxKind;
12use rowan::{GreenNode, GreenNodeBuilder, TextRange, TextSize};
13
14/// Parse result containing the green tree and any errors
15#[derive(Debug, Clone)]
16pub struct Parse {
17    pub green: GreenNode,
18    pub errors: Vec<SyntaxError>,
19}
20
21impl Parse {
22    /// Get the root syntax node
23    pub fn syntax(&self) -> super::SyntaxNode {
24        super::SyntaxNode::new_root(self.green.clone())
25    }
26
27    /// Check if parsing succeeded without errors
28    pub fn ok(&self) -> bool {
29        self.errors.is_empty()
30    }
31}
32
33/// A syntax error with location and message
34#[derive(Debug, Clone, PartialEq, Eq)]
35pub struct SyntaxError {
36    pub message: String,
37    pub range: TextRange,
38}
39
40impl SyntaxError {
41    pub fn new(message: impl Into<String>, range: TextRange) -> Self {
42        Self {
43            message: message.into(),
44            range,
45        }
46    }
47}
48
49/// Parse SysML source code into a CST
50pub fn parse_sysml(input: &str) -> Parse {
51    let tokens: Vec<_> = Lexer::new(input).collect();
52    let mut parser = Parser::new(&tokens, input);
53    super::grammar::sysml::parse_sysml_file(&mut parser);
54    parser.finish()
55}
56
57/// Parse KerML source code into a CST
58pub fn parse_kerml(input: &str) -> Parse {
59    let tokens: Vec<_> = Lexer::new(input).collect();
60    let mut parser = Parser::new(&tokens, input);
61    super::grammar::kerml::parse_kerml_file(&mut parser);
62    parser.finish()
63}
64
65/// Convert a SyntaxKind to a human-readable name for error messages.
66///
67/// **Deprecated**: Use `kind.display_name()` instead.
68#[inline]
69pub fn kind_to_name(kind: SyntaxKind) -> &'static str {
70    kind.display_name()
71}
72
73/// Check if parser debug logging is enabled
74#[allow(dead_code)]
75fn debug_enabled() -> bool {
76    std::env::var("SYSTER_PARSER_DEBUG").is_ok()
77}
78
79/// The parser state
80#[allow(dead_code)]
81struct Parser<'a> {
82    tokens: &'a [Token<'a>],
83    pos: usize,
84    builder: GreenNodeBuilder<'static>,
85    errors: Vec<SyntaxError>,
86    source: &'a str,
87    depth: usize,
88}
89
90impl<'a> Parser<'a> {
91    fn new(tokens: &'a [Token<'a>], source: &'a str) -> Self {
92        Self {
93            tokens,
94            pos: 0,
95            builder: GreenNodeBuilder::new(),
96            errors: Vec::new(),
97            source,
98            depth: 0,
99        }
100    }
101
102    /// Log a debug message with indentation based on parse depth
103    #[allow(dead_code)]
104    fn log(&self, msg: &str) {
105        if debug_enabled() {
106            let indent = "  ".repeat(self.depth);
107            let token_info = if let Some(t) = self.current() {
108                format!(
109                    "{:?} '{}'",
110                    t.kind,
111                    t.text.chars().take(20).collect::<String>()
112                )
113            } else {
114                "EOF".to_string()
115            };
116            eprintln!("{}[PARSER] {} | token: {}", indent, msg, token_info);
117        }
118    }
119
120    fn finish(self) -> Parse {
121        Parse {
122            green: self.builder.finish(),
123            errors: self.errors,
124        }
125    }
126
127    // =========================================================================
128    // Token inspection
129    // =========================================================================
130
131    fn current(&self) -> Option<&Token<'a>> {
132        self.tokens.get(self.pos)
133    }
134
135    fn current_kind(&self) -> SyntaxKind {
136        self.current().map(|t| t.kind).unwrap_or(SyntaxKind::ERROR)
137    }
138
139    #[allow(dead_code)]
140    fn current_text(&self) -> &str {
141        self.current().map(|t| t.text).unwrap_or("")
142    }
143
144    fn at(&self, kind: SyntaxKind) -> bool {
145        self.current_kind() == kind
146    }
147
148    fn at_any(&self, kinds: &[SyntaxKind]) -> bool {
149        kinds.contains(&self.current_kind())
150    }
151
152    fn at_eof(&self) -> bool {
153        self.pos >= self.tokens.len()
154    }
155
156    fn nth(&self, n: usize) -> SyntaxKind {
157        // Look ahead, skipping trivia
158        let mut idx = self.pos;
159        let mut count = 0;
160        while idx < self.tokens.len() {
161            if !self.tokens[idx].kind.is_trivia() {
162                if count == n {
163                    return self.tokens[idx].kind;
164                }
165                count += 1;
166            }
167            idx += 1;
168        }
169        SyntaxKind::ERROR
170    }
171
172    // =========================================================================
173    // Token consumption
174    // =========================================================================
175
176    fn bump(&mut self) {
177        if let Some(token) = self.current() {
178            self.builder.token(token.kind.into(), token.text);
179            self.pos += 1;
180        }
181    }
182
183    fn bump_any(&mut self) {
184        self.bump();
185    }
186
187    fn eat(&mut self, kind: SyntaxKind) -> bool {
188        if self.at(kind) {
189            self.bump();
190            true
191        } else {
192            false
193        }
194    }
195
196    fn expect(&mut self, kind: SyntaxKind) -> bool {
197        if self.eat(kind) {
198            true
199        } else {
200            let expected = kind.display_name();
201            let found = self
202                .current()
203                .map(|t| t.kind.display_name())
204                .unwrap_or("end of file");
205            self.error(format!("expected {}, found {}", expected, found));
206            false
207        }
208    }
209
210    fn skip_trivia(&mut self) {
211        while self.current().map(|t| t.kind.is_trivia()).unwrap_or(false) {
212            self.bump();
213        }
214    }
215
216    /// Skip only whitespace (preserves comments)
217    #[allow(dead_code)]
218    fn skip_whitespace_only(&mut self) {
219        while self.at(SyntaxKind::WHITESPACE) {
220            self.bump();
221        }
222    }
223
224    // =========================================================================
225    // Error handling
226    // =========================================================================
227
228    fn error(&mut self, message: impl Into<String>) {
229        let range = self
230            .current()
231            .map(|t| TextRange::at(t.offset, TextSize::of(t.text)))
232            .unwrap_or_else(|| TextRange::empty(TextSize::new(0)));
233        self.errors.push(SyntaxError::new(message, range));
234    }
235
236    fn error_recover(&mut self, message: impl Into<String>, recovery: &[SyntaxKind]) {
237        self.error(message);
238        self.builder.start_node(SyntaxKind::ERROR.into());
239        // Always consume at least one token to make progress
240        let mut consumed = false;
241        while !self.at_eof() && !self.at_any(recovery) {
242            self.bump_any();
243            consumed = true;
244        }
245        // If we didn't consume anything and we're not at EOF, consume one token
246        // to prevent infinite loops
247        if !consumed && !self.at_eof() {
248            self.bump_any();
249        }
250        self.builder.finish_node();
251    }
252
253    // =========================================================================
254    // Node building helpers
255    // =========================================================================
256
257    fn start_node(&mut self, kind: SyntaxKind) {
258        self.builder.start_node(kind.into());
259    }
260
261    fn finish_node(&mut self) {
262        self.builder.finish_node();
263    }
264}
265
266// =============================================================================
267// Trait Implementations for Grammar Modules
268// =============================================================================
269
270/// Implement ExpressionParser trait to allow grammar modules to work with Parser
271impl<'a> ExpressionParser for Parser<'a> {
272    fn current_kind(&self) -> SyntaxKind {
273        Parser::current_kind(self)
274    }
275
276    fn at(&self, kind: SyntaxKind) -> bool {
277        Parser::at(self, kind)
278    }
279
280    fn at_any(&self, kinds: &[SyntaxKind]) -> bool {
281        Parser::at_any(self, kinds)
282    }
283
284    fn at_name_token(&self) -> bool {
285        // In SysML/KerML, certain keywords can be used as identifiers in context
286        // (contextual keywords). This includes names like "start", "end", "done", "this" etc.
287        // which are common member names in action definitions or self-references.
288        // Also includes "type" which is a very common feature/attribute name.
289        // And "entry", "exit", "accept", "frame", "do" which are used as step/parameter names.
290        // Also "step" and "feature" which are used as subset targets in metadata defs.
291        // And "behavior", "occurrence", "connection", "function" which appear as feature names
292        // being redefined/subsetted in the standard library (SysML.sysml).
293        // Also "predicate", "interaction", "metaclass", "member" which appear as feature names.
294        // Also "var" which is used as a feature name in Actions.sysml (assign var := ...)
295        matches!(
296            self.current_kind(),
297            SyntaxKind::IDENT
298                | SyntaxKind::START_KW
299                | SyntaxKind::END_KW
300                | SyntaxKind::DONE_KW
301                | SyntaxKind::THIS_KW
302                | SyntaxKind::TYPE_KW
303                | SyntaxKind::ENTRY_KW
304                | SyntaxKind::EXIT_KW
305                | SyntaxKind::ACCEPT_KW
306                | SyntaxKind::FRAME_KW
307                | SyntaxKind::DO_KW
308                | SyntaxKind::STEP_KW
309                | SyntaxKind::FEATURE_KW
310                | SyntaxKind::BEHAVIOR_KW
311                | SyntaxKind::OCCURRENCE_KW
312                | SyntaxKind::CONNECTION_KW
313                | SyntaxKind::FUNCTION_KW
314                | SyntaxKind::PREDICATE_KW
315                | SyntaxKind::INTERACTION_KW
316                | SyntaxKind::METACLASS_KW
317                | SyntaxKind::MEMBER_KW
318                | SyntaxKind::VAR_KW
319        )
320    }
321
322    fn get_pos(&self) -> usize {
323        self.pos
324    }
325
326    fn peek_kind(&self, n: usize) -> SyntaxKind {
327        self.nth(n)
328    }
329
330    fn bump(&mut self) {
331        Parser::bump(self)
332    }
333
334    fn bump_any(&mut self) {
335        Parser::bump_any(self)
336    }
337
338    fn expect(&mut self, kind: SyntaxKind) {
339        Parser::expect(self, kind);
340    }
341
342    fn skip_trivia(&mut self) {
343        Parser::skip_trivia(self)
344    }
345
346    fn start_node(&mut self, kind: SyntaxKind) {
347        Parser::start_node(self, kind)
348    }
349
350    fn finish_node(&mut self) {
351        Parser::finish_node(self)
352    }
353
354    fn parse_qualified_name(&mut self) {
355        super::grammar::kerml::parse_qualified_name(self, &[])
356    }
357
358    fn parse_argument(&mut self) {
359        kerml_expressions::parse_argument(self)
360    }
361}
362
363/// Implement BaseParser trait — shared methods used by both KerML and SysML grammars
364impl<'a> BaseParser for Parser<'a> {
365    fn current_token_text(&self) -> Option<&str> {
366        self.current().map(|t| t.text)
367    }
368
369    fn parse_identification(&mut self) {
370        super::grammar::kerml::parse_identification(self)
371    }
372
373    fn skip_trivia_except_block_comments(&mut self) {
374        while self
375            .current()
376            .map(|t| t.kind == SyntaxKind::WHITESPACE || t.kind == SyntaxKind::LINE_COMMENT)
377            .unwrap_or(false)
378        {
379            self.bump();
380        }
381    }
382
383    fn parse_qualified_name_list(&mut self) {
384        super::grammar::kerml::parse_qualified_name(self, &[]);
385        while self.at(SyntaxKind::COMMA) {
386            self.bump();
387            self.skip_trivia();
388            super::grammar::kerml::parse_qualified_name(self, &[]);
389        }
390    }
391
392    fn error(&mut self, message: impl Into<String>) {
393        Parser::error(self, message)
394    }
395
396    fn error_recover(&mut self, message: impl Into<String>, recovery: &[SyntaxKind]) {
397        Parser::error_recover(self, message, recovery)
398    }
399}
400
401/// Implement KerMLParser trait for kerml grammar module
402impl<'a> KerMLParser for Parser<'a> {
403    fn parse_body(&mut self) {
404        super::grammar::kerml::parse_body(self)
405    }
406
407    fn parse_package(&mut self) {
408        super::grammar::kerml::parse_package(self)
409    }
410
411    fn parse_library_package(&mut self) {
412        super::grammar::kerml::parse_library_package(self)
413    }
414
415    fn parse_import(&mut self) {
416        super::grammar::kerml::parse_import(self)
417    }
418
419    fn parse_alias(&mut self) {
420        super::grammar::kerml::parse_alias(self)
421    }
422
423    fn parse_definition(&mut self) {
424        super::grammar::kerml::parse_definition_impl(self)
425    }
426
427    fn parse_usage(&mut self) {
428        super::grammar::kerml::parse_usage_impl(self)
429    }
430
431    fn parse_invariant(&mut self) {
432        super::grammar::kerml::parse_invariant(self)
433    }
434
435    fn parse_parameter(&mut self) {
436        super::grammar::kerml::parse_parameter_impl(self)
437    }
438
439    fn parse_end_feature_or_parameter(&mut self) {
440        super::grammar::kerml::parse_end_feature_or_parameter(self)
441    }
442
443    fn parse_connector_usage(&mut self) {
444        super::grammar::kerml::parse_connector_usage(self)
445    }
446
447    fn parse_flow_usage(&mut self) {
448        super::grammar::kerml::parse_flow_usage(self)
449    }
450}
451
452/// Implement SysMLParser trait for sysml grammar module
453impl<'a> SysMLParser for Parser<'a> {
454    fn parse_body(&mut self) {
455        super::grammar::sysml::parse_body(self)
456    }
457
458    fn can_start_expression(&self) -> bool {
459        matches!(
460            self.current_kind(),
461            // Literals
462            SyntaxKind::INTEGER | SyntaxKind::DECIMAL | SyntaxKind::STRING |
463            SyntaxKind::TRUE_KW | SyntaxKind::FALSE_KW | SyntaxKind::NULL_KW |
464            // Expression starters
465            SyntaxKind::NEW_KW | SyntaxKind::L_BRACE | SyntaxKind::L_PAREN |
466            SyntaxKind::IF_KW | SyntaxKind::IDENT | SyntaxKind::THIS_KW |
467            // Unary prefix operators
468            SyntaxKind::NOT_KW | SyntaxKind::MINUS | SyntaxKind::PLUS |
469            SyntaxKind::TILDE | SyntaxKind::BANG |
470            // Type classification operators (prefix form)
471            SyntaxKind::HASTYPE_KW | SyntaxKind::ISTYPE_KW | SyntaxKind::ALL_KW |
472            // Metadata access
473            SyntaxKind::AT
474        )
475    }
476
477    fn parse_typing(&mut self) {
478        super::grammar::kerml::parse_typing(self)
479    }
480
481    fn parse_multiplicity(&mut self) {
482        super::grammar::kerml::parse_multiplicity(self)
483    }
484
485    fn parse_constraint_body(&mut self) {
486        super::grammar::sysml::parse_constraint_body(self)
487    }
488
489    fn parse_definition_or_usage(&mut self) {
490        super::grammar::sysml::parse_definition_or_usage(self)
491    }
492
493    fn parse_dependency(&mut self) {
494        super::grammar::sysml::parse_dependency(self)
495    }
496
497    fn parse_filter(&mut self) {
498        super::grammar::sysml::parse_filter(self)
499    }
500
501    fn parse_metadata_usage(&mut self) {
502        super::grammar::sysml::parse_metadata_usage(self)
503    }
504
505    fn parse_connect_usage(&mut self) {
506        super::grammar::sysml::parse_connect_usage(self)
507    }
508
509    fn parse_binding_or_succession(&mut self) {
510        super::grammar::sysml::parse_binding_or_succession(self)
511    }
512
513    fn parse_variant_usage(&mut self) {
514        super::grammar::sysml::parse_variant_usage(self)
515    }
516
517    fn parse_redefines_feature_member(&mut self) {
518        super::grammar::sysml::parse_redefines_feature_member(self)
519    }
520
521    fn parse_shorthand_feature_member(&mut self) {
522        super::grammar::sysml::parse_shorthand_feature_member(self)
523    }
524}
525
526#[cfg(test)]
527mod tests {
528    use super::*;
529
530    #[test]
531    fn test_parse_empty() {
532        let result = parse_sysml("");
533        assert!(result.ok());
534    }
535
536    #[test]
537    fn test_parse_simple_package() {
538        let result = parse_sysml("package Test;");
539        assert!(result.ok(), "errors: {:?}", result.errors);
540
541        let root = result.syntax();
542        assert_eq!(root.kind(), SyntaxKind::SOURCE_FILE);
543    }
544
545    #[test]
546    fn test_parse_package_with_body() {
547        let result = parse_sysml("package Vehicle { part def Engine; }");
548        assert!(result.ok(), "errors: {:?}", result.errors);
549    }
550
551    #[test]
552    fn test_parse_import() {
553        let result = parse_sysml("import ISQ::*;");
554        assert!(result.ok(), "errors: {:?}", result.errors);
555    }
556
557    #[test]
558    fn test_parse_import_with_filter() {
559        let result = parse_sysml("import Library::*[@MyFilter];");
560        assert!(result.ok(), "errors: {:?}", result.errors);
561    }
562
563    #[test]
564    fn test_parse_part_definition() {
565        let result = parse_sysml("part def Vehicle :> Base;");
566        assert!(result.ok(), "errors: {:?}", result.errors);
567    }
568
569    #[test]
570    fn test_parse_part_usage() {
571        let result = parse_sysml("part engine : Engine;");
572        assert!(result.ok(), "errors: {:?}", result.errors);
573    }
574
575    #[test]
576    fn test_parse_nested() {
577        let source = r#"
578            package Vehicle {
579                part def Engine {
580                    attribute power : Real;
581                }
582                part engine : Engine;
583            }
584        "#;
585        let result = parse_sysml(source);
586        assert!(result.ok(), "errors: {:?}", result.errors);
587    }
588
589    #[test]
590    fn test_parse_attribute_with_default() {
591        let result = parse_sysml("attribute x : Integer = 42;");
592        assert!(result.ok(), "errors: {:?}", result.errors);
593    }
594
595    #[test]
596    fn test_parse_attribute_with_expression() {
597        let result = parse_sysml("attribute y : Real = 3.14 + 2.0;");
598        assert!(result.ok(), "errors: {:?}", result.errors);
599    }
600
601    #[test]
602    fn test_parse_multiplicity() {
603        let result = parse_sysml("part engines[2..*] : Engine;");
604        assert!(result.ok(), "errors: {:?}", result.errors);
605    }
606
607    #[test]
608    fn test_parse_function_invocation() {
609        let result = parse_sysml("calc result = compute(x, y);");
610        assert!(result.ok(), "errors: {:?}", result.errors);
611    }
612
613    #[test]
614    fn test_parse_conditional_expression() {
615        let result = parse_sysml("attribute flag : Boolean = x > 0 ? true : false;");
616        assert!(result.ok(), "errors: {:?}", result.errors);
617    }
618
619    #[test]
620    fn test_parse_comment_about() {
621        let source = r#"
622            package Test {
623                comment about Foo, Bar
624                /*
625                 * This is a comment about Foo and Bar
626                 */
627                part def Foo { }
628            }
629        "#;
630        let result = parse_sysml(source);
631        assert!(result.ok(), "errors: {:?}", result.errors);
632    }
633
634    #[test]
635    fn test_parse_if_expression() {
636        let result = parse_sysml("attribute x = if a ? 1 else 0;");
637        assert!(result.ok(), "errors: {:?}", result.errors);
638    }
639
640    #[test]
641    fn test_parse_nested_if_expression() {
642        let result = parse_sysml("attribute x = if a ? 1 else if b ? 2 else 0;");
643        assert!(result.ok(), "errors: {:?}", result.errors);
644    }
645}