Skip to main content

arc_lang/
parser.rs

1/// Arc parser — transforms token stream into AST.
2/// Designed for maximum forgiveness: recovers from errors, skips bad lines,
3/// always produces a (possibly partial) Document.
4
5use crate::ast::*;
6use crate::lexer::{Token, TokenKind};
7
8pub struct ParseResult {
9    pub document: Document,
10    pub diagnostics: Vec<Diagnostic>,
11}
12
13struct Parser {
14    tokens: Vec<Token>,
15    pos: usize,
16    diagnostics: Vec<Diagnostic>,
17}
18
19impl Parser {
20    fn new(tokens: Vec<Token>) -> Self {
21        Self { tokens, pos: 0, diagnostics: Vec::new() }
22    }
23
24    fn peek(&self) -> Option<&Token> {
25        self.tokens.get(self.pos)
26    }
27
28    fn advance(&mut self) -> Option<&Token> {
29        let tok = self.tokens.get(self.pos);
30        if tok.is_some() {
31            self.pos += 1;
32        }
33        tok
34    }
35
36    fn at_end(&self) -> bool {
37        self.pos >= self.tokens.len()
38    }
39
40    fn skip_newlines(&mut self) {
41        while let Some(tok) = self.peek() {
42            if tok.kind == TokenKind::Newline {
43                self.pos += 1;
44            } else if matches!(tok.kind, TokenKind::Comment(_)) {
45                self.pos += 1;
46            } else {
47                break;
48            }
49        }
50    }
51
52    fn skip_to_newline(&mut self) {
53        while let Some(tok) = self.peek() {
54            if tok.kind == TokenKind::Newline {
55                break;
56            }
57            self.pos += 1;
58        }
59    }
60
61    fn expect_newline_or_end(&mut self) {
62        if let Some(tok) = self.peek() {
63            if tok.kind == TokenKind::Newline {
64                self.pos += 1;
65            } else if !self.at_end() && tok.kind != TokenKind::RBrace {
66                // Not a newline and not end — warn and skip
67                self.diagnostics.push(Diagnostic {
68                    line: tok.line,
69                    col: tok.col,
70                    code: "W001".into(),
71                    message: "Expected newline after statement".into(),
72                    suggestion: None,
73                    severity: Severity::Warning,
74                });
75                self.skip_to_newline();
76            }
77        }
78    }
79
80    fn current_span(&self) -> Span {
81        if let Some(tok) = self.peek() {
82            Span { line: tok.line, col: tok.col, len: tok.len }
83        } else if let Some(last) = self.tokens.last() {
84            Span { line: last.line, col: last.col + last.len, len: 0 }
85        } else {
86            Span::default()
87        }
88    }
89
90    fn warn(&mut self, line: usize, col: usize, code: &str, msg: &str) {
91        self.diagnostics.push(Diagnostic {
92            line, col,
93            code: code.into(),
94            message: msg.into(),
95            suggestion: None,
96            severity: Severity::Warning,
97        });
98    }
99
100    fn error(&mut self, line: usize, col: usize, code: &str, msg: &str, suggestion: Option<String>) {
101        self.diagnostics.push(Diagnostic {
102            line, col,
103            code: code.into(),
104            message: msg.into(),
105            suggestion,
106            severity: Severity::Error,
107        });
108    }
109
110    // ── Parsing ───────────────────────────────────────────────
111
112    fn parse_document(&mut self) -> Document {
113        let mut doc = Document::default();
114        self.skip_newlines();
115
116        while !self.at_end() {
117            self.skip_newlines();
118            if self.at_end() { break; }
119
120            let tok = self.peek().unwrap();
121            match &tok.kind {
122                // Directive: @direction, @theme, @spacing
123                TokenKind::At => {
124                    if let Some(dir) = self.parse_directive() {
125                        doc.directives.push(dir);
126                    }
127                }
128                // Group
129                TokenKind::Group => {
130                    if let Some(group) = self.parse_group() {
131                        doc.groups.push(group);
132                    }
133                }
134                // Include
135                TokenKind::Include => {
136                    if let Some(inc) = self.parse_include() {
137                        doc.includes.push(inc);
138                    }
139                }
140                // Node type keyword → could be a node declaration OR start of a connection
141                k if k.is_node_type() => {
142                    self.parse_node_or_connection(&mut doc);
143                }
144                // Identifier → could be a connection (from an already-declared node)
145                TokenKind::Ident(_) => {
146                    self.parse_ident_line(&mut doc);
147                }
148                // Comment — already skipped in skip_newlines, but handle explicitly
149                TokenKind::Comment(_) => {
150                    self.pos += 1;
151                }
152                // Unknown or unexpected
153                _ => {
154                    let t = self.peek().unwrap();
155                    self.error(t.line, t.col, "E001", &format!("Unexpected token"), None);
156                    self.skip_to_newline();
157                }
158            }
159        }
160
161        doc
162    }
163
164    fn parse_directive(&mut self) -> Option<Directive> {
165        let at_tok = self.advance().unwrap(); // consume @
166        let span_line = at_tok.line;
167        let span_col = at_tok.col;
168
169        let name_tok = self.advance()?;
170        let name_line = name_tok.line;
171        let name_col = name_tok.col;
172        let name = match &name_tok.kind {
173            TokenKind::Ident(s) => s.to_lowercase(),
174            _ => {
175                self.error(name_line, name_col, "E002", "Expected directive name after @", None);
176                self.skip_to_newline();
177                return None;
178            }
179        };
180
181        let value_tok = self.advance();
182        let value = match value_tok.map(|t| &t.kind) {
183            Some(TokenKind::Ident(s)) => s.clone(),
184            Some(TokenKind::QuotedString(s)) => s.clone(),
185            _ => {
186                self.error(span_line, span_col, "E003", &format!("Expected value for @{}", name), None);
187                self.skip_to_newline();
188                return None;
189            }
190        };
191
192        self.expect_newline_or_end();
193
194        match name.as_str() {
195            "direction" | "dir" => {
196                match value.to_lowercase().as_str() {
197                    "down" | "vertical" | "tb" | "top-bottom" => Some(Directive::Direction(Direction::Down)),
198                    "right" | "horizontal" | "lr" | "left-right" => Some(Directive::Direction(Direction::Right)),
199                    _ => {
200                        self.warn(span_line, span_col, "W002", &format!("Unknown direction '{}', using 'down'", value));
201                        Some(Directive::Direction(Direction::Down))
202                    }
203                }
204            }
205            "theme" => Some(Directive::Theme(value.to_lowercase())),
206            "spacing" => {
207                match value.to_lowercase().as_str() {
208                    "compact" => Some(Directive::Spacing(Spacing::Compact)),
209                    "normal" => Some(Directive::Spacing(Spacing::Normal)),
210                    "wide" => Some(Directive::Spacing(Spacing::Wide)),
211                    _ => {
212                        self.warn(span_line, span_col, "W003", &format!("Unknown spacing '{}', using 'normal'", value));
213                        Some(Directive::Spacing(Spacing::Normal))
214                    }
215                }
216            }
217            _ => {
218                self.warn(span_line, span_col, "W004", &format!("Unknown directive '@{}'", name));
219                None
220            }
221        }
222    }
223
224    fn parse_group(&mut self) -> Option<Group> {
225        let group_tok = self.advance().unwrap(); // consume 'group'
226        let span = Span { line: group_tok.line, col: group_tok.col, len: group_tok.len };
227
228        // Label (required)
229        let label = match self.peek().map(|t| &t.kind) {
230            Some(TokenKind::QuotedString(s)) => {
231                let s = s.clone();
232                self.advance();
233                s
234            }
235            Some(TokenKind::Ident(s)) => {
236                let s = s.clone();
237                self.advance();
238                s
239            }
240            _ => {
241                self.error(span.line, span.col, "E004", "Expected group label", None);
242                self.skip_to_newline();
243                return None;
244            }
245        };
246
247        // Optional tags
248        let tags = self.try_parse_tags();
249
250        // Opening brace
251        self.skip_newlines();
252        match self.peek().map(|t| &t.kind) {
253            Some(TokenKind::LBrace) => { self.advance(); }
254            _ => {
255                self.error(span.line, span.col, "E005", "Expected '{' after group label", None);
256                self.skip_to_newline();
257                return None;
258            }
259        }
260
261        // Members
262        let mut members = Vec::new();
263        loop {
264            self.skip_newlines();
265            if self.at_end() { break; }
266
267            match self.peek().map(|t| &t.kind) {
268                Some(TokenKind::RBrace) => {
269                    self.advance();
270                    break;
271                }
272                Some(TokenKind::Group) => {
273                    if let Some(sub) = self.parse_group() {
274                        members.push(GroupMember::Group(sub));
275                    }
276                }
277                Some(k) if k.is_node_type() => {
278                    // Could be node declaration or a connection starting with type
279                    let saved = self.pos;
280                    if let Some(node) = self.try_parse_node_decl() {
281                        // Check if next non-ws token is an arrow → it's actually a connection
282                        if let Some(tok) = self.peek() {
283                            if tok.kind.is_arrow() {
284                                // Rewind and parse as connection
285                                self.pos = saved;
286                                let mut temp_doc = Document::default();
287                                self.parse_node_or_connection(&mut temp_doc);
288                                for n in temp_doc.nodes { members.push(GroupMember::Node(n)); }
289                                for c in temp_doc.connections { members.push(GroupMember::Connection(c)); }
290                                continue;
291                            }
292                        }
293                        members.push(GroupMember::Node(node));
294                    }
295                }
296                Some(TokenKind::Ident(_)) => {
297                    // Could be node ref, comma-separated list, or connection
298                    self.parse_group_ident_line(&mut members);
299                }
300                Some(TokenKind::Comment(_)) => {
301                    self.advance();
302                }
303                _ => {
304                    if let Some(t) = self.peek() {
305                        self.error(t.line, t.col, "E006", "Unexpected token in group", None);
306                    }
307                    self.skip_to_newline();
308                }
309            }
310        }
311
312        Some(Group { label, tags, members, span })
313    }
314
315    fn parse_group_ident_line(&mut self, members: &mut Vec<GroupMember>) {
316        // Peek ahead: is this `Ident -> ...` (connection) or `Ident, Ident, ...` (ref list)?
317        let first_ident = match &self.peek().unwrap().kind {
318            TokenKind::Ident(s) => s.clone(),
319            _ => return,
320        };
321        let _first_span = self.current_span();
322
323        // Look ahead past ident
324        let saved = self.pos;
325        self.advance(); // consume ident
326
327        match self.peek().map(|t| &t.kind) {
328            Some(k) if k.is_arrow() => {
329                // It's a connection
330                self.pos = saved;
331                let mut temp_doc = Document::default();
332                self.parse_ident_line(&mut temp_doc);
333                for c in temp_doc.connections { members.push(GroupMember::Connection(c)); }
334            }
335            Some(TokenKind::Comma) => {
336                // It's a comma-separated list of refs
337                let mut refs = vec![first_ident];
338                while let Some(tok) = self.peek() {
339                    if tok.kind == TokenKind::Comma {
340                        self.advance();
341                        if let Some(next) = self.peek() {
342                            match &next.kind {
343                                TokenKind::Ident(s) => {
344                                    refs.push(s.clone());
345                                    self.advance();
346                                }
347                                k if k.is_node_type() => {
348                                    // node type used as ident ref in group
349                                    refs.push(format!("{:?}", k));
350                                    self.advance();
351                                }
352                                _ => break,
353                            }
354                        }
355                    } else {
356                        break;
357                    }
358                }
359                if refs.len() == 1 {
360                    members.push(GroupMember::NodeRef(refs.into_iter().next().unwrap()));
361                } else {
362                    members.push(GroupMember::NodeRefList(refs));
363                }
364                self.expect_newline_or_end();
365            }
366            _ => {
367                // Single node ref
368                members.push(GroupMember::NodeRef(first_ident));
369                self.expect_newline_or_end();
370            }
371        }
372    }
373
374    fn parse_include(&mut self) -> Option<Include> {
375        let inc_tok = self.advance().unwrap(); // consume 'include'
376        let span = Span { line: inc_tok.line, col: inc_tok.col, len: inc_tok.len };
377
378        let path = match self.peek().map(|t| &t.kind) {
379            Some(TokenKind::QuotedString(s)) => {
380                let s = s.clone();
381                self.advance();
382                s
383            }
384            _ => {
385                self.error(span.line, span.col, "E007", "Expected quoted path after 'include'", None);
386                self.skip_to_newline();
387                return None;
388            }
389        };
390
391        self.expect_newline_or_end();
392        Some(Include { path, span })
393    }
394
395    /// Parse a line that starts with a node type keyword.
396    /// It could be:
397    ///   - A node declaration: `service Auth "Auth Service" [tags]`
398    ///   - A node declaration followed by arrow: `service Auth -> db Users`
399    fn parse_node_or_connection(&mut self, doc: &mut Document) {
400        // Parse the node first
401        let node = match self.try_parse_node_decl() {
402            Some(n) => n,
403            None => {
404                self.skip_to_newline();
405                return;
406            }
407        };
408
409        let node_id = node.id.clone();
410
411        // Check if followed by an arrow → connection
412        if let Some(tok) = self.peek() {
413            if tok.kind.is_arrow() {
414                // This node declaration is also the source of a connection
415                doc.nodes.push(node);
416                // Parse connection(s)
417                while let Some(tok) = self.peek() {
418                    if !tok.kind.is_arrow() { break; }
419                    if let Some(conn) = self.parse_connection_from(&node_id) {
420                        // If the target is a new node declaration, add it too
421                        doc.connections.push(conn);
422                    }
423                }
424                self.expect_newline_or_end();
425                return;
426            }
427        }
428
429        doc.nodes.push(node);
430        self.expect_newline_or_end();
431    }
432
433    /// Try to parse a node declaration: TYPE IDENT [LABEL] [TAGS]
434    fn try_parse_node_decl(&mut self) -> Option<Node> {
435        let type_tok = self.peek()?;
436        if !type_tok.kind.is_node_type() { return None; }
437
438        let span = Span { line: type_tok.line, col: type_tok.col, len: type_tok.len };
439        let node_type = type_tok.kind.to_node_type().unwrap();
440        self.advance(); // consume type
441
442        // ID (required)
443        let id = match self.peek().map(|t| &t.kind) {
444            Some(TokenKind::Ident(s)) => {
445                let s = s.clone();
446                self.advance();
447                s
448            }
449            Some(TokenKind::QuotedString(s)) => {
450                // Forgiving: if someone puts a quoted string as ID, use it
451                let s = s.clone();
452                self.advance();
453                // Generate a sanitized ID from the string
454                s.chars().filter(|c| c.is_alphanumeric() || *c == '_' || *c == '-').collect::<String>()
455            }
456            _ => {
457                self.error(span.line, span.col, "E008", &format!("Expected name after '{}'", node_type.as_str()), None);
458                return None;
459            }
460        };
461
462        // Optional label
463        let label = match self.peek().map(|t| &t.kind) {
464            Some(TokenKind::QuotedString(s)) => {
465                let s = s.clone();
466                self.advance();
467                Some(s)
468            }
469            _ => None,
470        };
471
472        // Optional tags
473        let tags = self.try_parse_tags();
474
475        Some(Node { node_type, id, label, tags, span })
476    }
477
478    /// Parse a connection starting from a known source ID.
479    fn parse_connection_from(&mut self, from_id: &str) -> Option<Connection> {
480        let arrow_tok = self.peek()?;
481        if !arrow_tok.kind.is_arrow() { return None; }
482
483        let span = Span { line: arrow_tok.line, col: arrow_tok.col, len: arrow_tok.len };
484        let arrow = arrow_tok.kind.to_arrow_kind().unwrap();
485        self.advance(); // consume arrow
486
487        // Target: could be `TYPE ID` or just `ID`
488        let (to_id, _target_node) = self.parse_connection_target()?;
489
490        // Optional label
491        let label = self.try_parse_label();
492
493        // Optional tags
494        let tags = self.try_parse_tags();
495
496        Some(Connection {
497            from: from_id.to_string(),
498            arrow,
499            to: to_id,
500            label,
501            tags,
502            span,
503        })
504    }
505
506    /// Parse connection target: `TYPE ID` or just `ID`
507    fn parse_connection_target(&mut self) -> Option<(String, Option<Node>)> {
508        let is_node_type = self.peek().map(|t| t.kind.is_node_type()).unwrap_or(false);
509
510        if is_node_type {
511            // `TYPE ID` — inline node declaration as target
512            if let Some(node) = self.try_parse_node_decl() {
513                let id = node.id.clone();
514                return Some((id, Some(node)));
515            }
516        }
517
518        // Just an identifier
519        let tok_line = self.peek().map(|t| t.line).unwrap_or(0);
520        let tok_col = self.peek().map(|t| t.col).unwrap_or(0);
521        let tok_kind = self.peek().map(|t| t.kind.clone());
522
523        match tok_kind {
524            Some(TokenKind::Ident(s)) => {
525                self.advance();
526                Some((s, None))
527            }
528            _ => {
529                self.error(tok_line, tok_col, "E009", "Expected target node in connection", None);
530                self.skip_to_newline();
531                None
532            }
533        }
534    }
535
536    /// Parse a line starting with an identifier (likely a connection).
537    fn parse_ident_line(&mut self, doc: &mut Document) {
538        let ident_line = self.peek().map(|t| t.line).unwrap_or(0);
539        let ident_col = self.peek().map(|t| t.col).unwrap_or(0);
540        let id = match self.peek().map(|t| t.kind.clone()) {
541            Some(TokenKind::Ident(s)) => s,
542            _ => return,
543        };
544        self.advance(); // consume ident
545
546        // Check for arrow
547        if let Some(tok) = self.peek() {
548            if tok.kind.is_arrow() {
549                if let Some(conn) = self.parse_connection_from(&id) {
550                    doc.connections.push(conn);
551                }
552                self.expect_newline_or_end();
553                return;
554            }
555        }
556
557        // Not a connection — it's an error (bare identifier on a line)
558        if let Some(tok) = self.peek() {
559            if tok.kind != TokenKind::Newline {
560                self.warn(ident_line, ident_col, "W005",
561                    &format!("'{}' appears as a bare identifier. Did you mean to declare a node? Use: service {}", id, id));
562            }
563        }
564        self.skip_to_newline();
565    }
566
567    // ── Helpers ───────────────────────────────────────────────
568
569    fn try_parse_tags(&mut self) -> Vec<String> {
570        let mut tags = Vec::new();
571        if let Some(tok) = self.peek() {
572            if tok.kind == TokenKind::LBracket {
573                self.advance(); // consume [
574                loop {
575                    match self.peek().map(|t| &t.kind) {
576                        Some(TokenKind::RBracket) => {
577                            self.advance();
578                            break;
579                        }
580                        Some(TokenKind::Comma) => {
581                            self.advance();
582                        }
583                        Some(TokenKind::Ident(s)) => {
584                            tags.push(s.clone());
585                            self.advance();
586                        }
587                        Some(TokenKind::QuotedString(s)) => {
588                            tags.push(s.clone());
589                            self.advance();
590                        }
591                        // Accept node type keywords as tag values
592                        Some(k) if k.is_node_type() => {
593                            let type_name = match k {
594                                TokenKind::Service => "service",
595                                TokenKind::Db => "db",
596                                TokenKind::Cache => "cache",
597                                TokenKind::Queue => "queue",
598                                TokenKind::Gateway => "gateway",
599                                TokenKind::User => "user",
600                                TokenKind::Store => "store",
601                                TokenKind::Fn => "fn",
602                                TokenKind::Worker => "worker",
603                                TokenKind::External => "external",
604                                _ => unreachable!(),
605                            };
606                            tags.push(type_name.to_string());
607                            self.advance();
608                        }
609                        None => break,
610                        _ => {
611                            // Skip unknown tokens in tags
612                            self.advance();
613                        }
614                    }
615                }
616            }
617        }
618        tags
619    }
620
621    fn try_parse_label(&mut self) -> Option<String> {
622        if let Some(tok) = self.peek() {
623            if tok.kind == TokenKind::Colon {
624                self.advance(); // consume :
625                match self.peek().map(|t| &t.kind) {
626                    Some(TokenKind::QuotedString(s)) => {
627                        let s = s.clone();
628                        self.advance();
629                        return Some(s);
630                    }
631                    // Forgiving: accept unquoted label (until next special token)
632                    Some(TokenKind::Ident(s)) => {
633                        let s = s.clone();
634                        self.advance();
635                        return Some(s);
636                    }
637                    _ => {}
638                }
639            }
640        }
641        None
642    }
643}
644
645// ── Public API ───────────────────────────────────────────────────
646
647pub fn parse(input: &str) -> ParseResult {
648    let tokens = crate::lexer::tokenize(input);
649    let mut parser = Parser::new(tokens);
650    let document = parser.parse_document();
651    ParseResult {
652        document,
653        diagnostics: parser.diagnostics,
654    }
655}
656
657#[cfg(test)]
658mod tests {
659    use super::*;
660
661    #[test]
662    fn test_simple_nodes() {
663        let result = parse("service Auth\ndb Postgres\n");
664        assert!(result.diagnostics.is_empty());
665        assert_eq!(result.document.nodes.len(), 2);
666        assert_eq!(result.document.nodes[0].id, "Auth");
667        assert_eq!(result.document.nodes[1].id, "Postgres");
668    }
669
670    #[test]
671    fn test_connection() {
672        let result = parse("Auth -> API: \"validate\"\n");
673        assert_eq!(result.document.connections.len(), 1);
674        assert_eq!(result.document.connections[0].from, "Auth");
675        assert_eq!(result.document.connections[0].to, "API");
676        assert_eq!(result.document.connections[0].label.as_deref(), Some("validate"));
677    }
678
679    #[test]
680    fn test_node_with_label_and_tags() {
681        let result = parse("service API \"API Gateway\" [Go, v3]\n");
682        assert_eq!(result.document.nodes.len(), 1);
683        let node = &result.document.nodes[0];
684        assert_eq!(node.id, "API");
685        assert_eq!(node.label.as_deref(), Some("API Gateway"));
686        assert_eq!(node.tags, vec!["Go", "v3"]);
687    }
688
689    #[test]
690    fn test_group() {
691        let result = parse("group \"AWS\" {\n  Auth\n  API\n}\n");
692        assert_eq!(result.document.groups.len(), 1);
693        assert_eq!(result.document.groups[0].label, "AWS");
694        assert_eq!(result.document.groups[0].members.len(), 2);
695    }
696
697    #[test]
698    fn test_directive() {
699        let result = parse("@direction right\n@theme dark\n");
700        assert_eq!(result.document.direction(), Direction::Right);
701        assert_eq!(result.document.theme_name(), "dark");
702    }
703}