Skip to main content

cabalist_parser/
parse.rs

1//! Hand-written recursive descent parser for `.cabal` files.
2//!
3//! Transforms the token stream from the lexer into a CST. The parser uses
4//! indentation levels to determine nesting: sections contain fields that
5//! are indented more than the section header, fields contain continuation
6//! lines indented more than the field name, etc.
7
8use crate::cst::{CabalCst, CstNode, CstNodeKind};
9use crate::diagnostic::Diagnostic;
10use crate::lexer::{tokenize, Token, TokenKind, TriviaKind};
11use crate::span::{NodeId, Span};
12
13/// The result of parsing a `.cabal` file.
14#[derive(Debug, Clone)]
15pub struct ParseResult {
16    /// The concrete syntax tree.
17    pub cst: CabalCst,
18    /// Diagnostics (errors, warnings) encountered during parsing.
19    pub diagnostics: Vec<Diagnostic>,
20}
21
22/// Parser state.
23struct Parser {
24    /// The original source text.
25    source: String,
26    /// The token stream (from the lexer).
27    tokens: Vec<Token>,
28    /// Current position in the token stream.
29    pos: usize,
30    /// The CST node arena being built.
31    nodes: Vec<CstNode>,
32    /// Diagnostics collected during parsing.
33    diagnostics: Vec<Diagnostic>,
34}
35
36impl Parser {
37    fn new(source: String, tokens: Vec<Token>) -> Self {
38        Self {
39            source,
40            tokens,
41            pos: 0,
42            nodes: Vec::new(),
43            diagnostics: Vec::new(),
44        }
45    }
46
47    // -- Token access -------------------------------------------------------
48
49    /// Peek at the current token without consuming it.
50    fn peek(&self) -> &Token {
51        &self.tokens[self.pos.min(self.tokens.len() - 1)]
52    }
53
54    /// Check if we're at EOF.
55    fn at_eof(&self) -> bool {
56        self.peek().kind == TokenKind::Eof
57    }
58
59    // -- Node creation ------------------------------------------------------
60
61    /// Allocate a new node in the arena, returning its `NodeId`.
62    fn alloc_node(&mut self, node: CstNode) -> NodeId {
63        let id = NodeId(self.nodes.len());
64        self.nodes.push(node);
65        id
66    }
67
68    /// Set the parent of `child` to `parent` and add `child` to `parent`'s
69    /// children list.
70    fn add_child(&mut self, parent: NodeId, child: NodeId) {
71        self.nodes[child.0].parent = Some(parent);
72        self.nodes[parent.0].children.push(child);
73    }
74
75    // -- Diagnostic helpers -------------------------------------------------
76
77    fn emit_error(&mut self, span: Span, message: impl Into<String>) {
78        self.diagnostics.push(Diagnostic::error(span, message));
79    }
80
81    // -- Skip / advance helpers ---------------------------------------------
82
83    // -- Parsing entry point ------------------------------------------------
84
85    /// Parse the entire `.cabal` file, producing a `ParseResult`.
86    fn parse(mut self) -> ParseResult {
87        // Create the root node.
88        let root_node = CstNode::new(CstNodeKind::Root, Span::new(0, self.source.len()));
89        let root_id = self.alloc_node(root_node);
90
91        // Parse top-level items.
92        self.parse_body(root_id, 0, true);
93
94        // Absorb any trailing trivia from the Eof token into the root.
95        if !self.at_eof() {
96            // Shouldn't happen, but be defensive.
97        }
98
99        // Update the root span.
100        self.nodes[root_id.0].span = Span::new(0, self.source.len());
101        self.nodes[root_id.0].content_span = Span::new(0, self.source.len());
102
103        ParseResult {
104            cst: CabalCst {
105                source: self.source,
106                nodes: self.nodes,
107                root: root_id,
108            },
109            diagnostics: self.diagnostics,
110        }
111    }
112
113    /// Parse a body (sequence of fields, sections, conditionals, comments,
114    /// blank lines) where all items have indent > `min_indent`.
115    ///
116    /// For the top-level, `min_indent` is 0, `is_root` is true, and we
117    /// accept items at indent 0.
118    /// For section bodies, `min_indent` is the section header's indent,
119    /// `is_root` is false, and we accept only items with indent > `min_indent`.
120    fn parse_body(&mut self, parent_id: NodeId, min_indent: usize, is_root: bool) {
121        loop {
122            if self.at_eof() {
123                // Absorb EOF trivia into the parent: but only once (at
124                // the root level) to avoid duplicating trailing content.
125                if is_root {
126                    let eof_tok = self.peek();
127                    if !eof_tok.leading_trivia.is_empty() {
128                        self.consume_trailing_trivia(parent_id);
129                    }
130                }
131                break;
132            }
133
134            let tok = self.peek();
135            let indent = tok.indent;
136
137            // For section bodies (is_root == false): items must be indented
138            // more than the parent. For the top-level root: accept all.
139            if !is_root && indent <= min_indent {
140                break;
141            }
142
143            match tok.kind {
144                TokenKind::Comment => {
145                    let node_id = self.parse_comment();
146                    self.add_child(parent_id, node_id);
147                }
148                TokenKind::SectionHeader => {
149                    let node_id = self.parse_section();
150                    self.add_child(parent_id, node_id);
151                }
152                TokenKind::If | TokenKind::Elif => {
153                    let node_id = self.parse_conditional(indent);
154                    self.add_child(parent_id, node_id);
155                }
156                TokenKind::Else => {
157                    // `else` not expected here: it's handled inside
158                    // parse_conditional. If we see it standalone, that's
159                    // an error. Break so the caller can handle it.
160                    break;
161                }
162                TokenKind::FieldName => {
163                    // Check if this is an `import:` field.
164                    let is_import = {
165                        let name_text = tok.span.slice(&self.source);
166                        name_text.eq_ignore_ascii_case("import")
167                    };
168                    if is_import {
169                        let node_id = self.parse_import();
170                        self.add_child(parent_id, node_id);
171                    } else {
172                        let node_id = self.parse_field(indent);
173                        self.add_child(parent_id, node_id);
174                    }
175                }
176                TokenKind::Value => {
177                    // A value line at the body level: this could be a
178                    // continuation of the previous field, but since we handle
179                    // continuations inside parse_field, seeing one here means
180                    // it's either misindented or a free-standing value.
181                    if is_root && indent == 0 {
182                        // Top-level value line: unusual but we should handle
183                        // it. Emit as an error + skip.
184                        let span = tok.span;
185                        self.emit_error(span, "unexpected value at top level");
186                        self.pos += 1;
187                    } else if indent > min_indent {
188                        // Indented value line in a section body: could be
189                        // a continuation or orphan. Emit as a ValueLine child.
190                        let node_id = self.parse_value_line();
191                        self.add_child(parent_id, node_id);
192                    } else {
193                        break;
194                    }
195                }
196                TokenKind::Eof => break,
197                _ => {
198                    // Unexpected token: skip with diagnostic.
199                    let span = tok.span;
200                    let kind = tok.kind;
201                    self.emit_error(span, format!("unexpected token: {kind:?}"));
202                    self.pos += 1;
203                }
204            }
205        }
206    }
207
208    /// Consume remaining trivia from the EOF token and create blank/comment
209    /// nodes as needed so they appear in the rendered output.
210    fn consume_trailing_trivia(&mut self, parent_id: NodeId) {
211        let eof_idx = self.pos.min(self.tokens.len() - 1);
212        if self.tokens[eof_idx].kind != TokenKind::Eof {
213            return;
214        }
215
216        // Take (not clone) the trivia so it can't be consumed again.
217        let trivia: Vec<_> = std::mem::take(&mut self.tokens[eof_idx].leading_trivia);
218        if trivia.is_empty() {
219            return;
220        }
221
222        // Gather all the trivia into a single BlankLine node that covers
223        // the full range. This is a simplification: it means trailing
224        // blank lines at the end of the file get rendered correctly.
225        let Some(first) = trivia.first() else {
226            return;
227        };
228        let Some(last) = trivia.last() else {
229            return;
230        };
231        let start = first.span.start;
232        let end = last.span.end;
233        let span = Span::new(start, end);
234
235        let mut node = CstNode::new(CstNodeKind::BlankLine, span);
236        node.content_span = span;
237        // We store the trivia so render works: the BlankLine node's
238        // content_span covers the text directly.
239        let node_id = self.alloc_node(node);
240        self.add_child(parent_id, node_id);
241    }
242
243    // -- Individual node parsers --------------------------------------------
244
245    /// Parse a standalone comment line.
246    fn parse_comment(&mut self) -> NodeId {
247        let tok = self.peek().clone();
248        debug_assert_eq!(tok.kind, TokenKind::Comment);
249        self.pos += 1;
250
251        let mut node = CstNode::new(CstNodeKind::Comment, tok.span);
252        node.leading_trivia = tok.leading_trivia;
253        node.content_span = tok.span;
254        node.indent = tok.indent;
255
256        // Grab the newline trivia that follows.
257        self.collect_trailing_newline(&mut node);
258
259        // Update span to cover leading trivia + content + trailing trivia.
260        self.finalize_node_span(&mut node);
261
262        self.alloc_node(node)
263    }
264
265    /// Parse a field: `field-name: value` with possible continuation lines.
266    fn parse_field(&mut self, field_indent: usize) -> NodeId {
267        // We expect: FieldName, Colon, optional Value.
268        let name_tok = self.peek().clone();
269        debug_assert_eq!(name_tok.kind, TokenKind::FieldName);
270        self.pos += 1;
271
272        let mut node = CstNode::new(CstNodeKind::Field, name_tok.span);
273        node.leading_trivia = name_tok.leading_trivia;
274        node.field_name = Some(name_tok.span);
275        node.indent = name_tok.indent;
276
277        // Expect Colon.
278        let mut colon_end = name_tok.span.end;
279        if !self.at_eof() && self.peek().kind == TokenKind::Colon {
280            let colon_tok = self.peek().clone();
281            // Absorb colon trivia (spacing between name and colon).
282            colon_end = colon_tok.span.end;
283            self.pos += 1;
284
285            // Check for value on the same line.
286            if !self.at_eof() && self.peek().kind == TokenKind::Value {
287                let val_tok = self.peek();
288                // Only take this value if it's not on a new line.
289                // We detect "same line" by checking that the value token's
290                // leading trivia does NOT contain a Newline.
291                let has_newline = val_tok
292                    .leading_trivia
293                    .iter()
294                    .any(|t| t.kind == TriviaKind::Newline);
295                if !has_newline {
296                    let val_tok = self.peek().clone();
297                    node.field_value = Some(val_tok.span);
298                    colon_end = val_tok.span.end;
299                    self.pos += 1;
300                }
301            }
302        }
303
304        node.content_span = Span::new(name_tok.span.start, colon_end);
305
306        // Check if the field value opens a braced freeform text block.
307        // E.g. `Description: {`: the value text ends with `{`.
308        let is_braced_field = node.field_value.is_some_and(|val_span| {
309            let val_text = val_span.slice(&self.source);
310            val_text.trim_end().ends_with('{')
311        });
312
313        // Collect trailing newline.
314        self.collect_trailing_newline(&mut node);
315
316        if is_braced_field {
317            // Braced freeform text block: consume all lines until `}`
318            // regardless of indentation.
319            self.parse_braced_field_continuation(&mut node);
320        } else {
321            // Continuation lines: any following line with indent > field_indent.
322            self.parse_continuation_lines(&mut node, field_indent);
323        }
324
325        self.finalize_node_span(&mut node);
326        self.alloc_node(node)
327    }
328
329    /// Parse continuation lines for a braced freeform text field (`Description: { ... }`).
330    /// Consumes all lines until a line whose content is just `}`.
331    fn parse_braced_field_continuation(&mut self, field_node: &mut CstNode) {
332        let mut child_ids = Vec::new();
333
334        loop {
335            if self.at_eof() {
336                break;
337            }
338            let tok = self.peek();
339
340            // Check if this token's text is just `}` (the closing brace line).
341            let is_closing =
342                tok.kind == TokenKind::Value && tok.span.slice(&self.source).trim() == "}";
343
344            match tok.kind {
345                TokenKind::Value | TokenKind::Comment => {
346                    let node_id = if tok.kind == TokenKind::Comment {
347                        self.parse_comment()
348                    } else {
349                        self.parse_value_line()
350                    };
351                    child_ids.push(node_id);
352                    if is_closing {
353                        break;
354                    }
355                }
356                _ => {
357                    // In braced mode, other token types (blank lines represented
358                    // via trivia) are handled by the value/comment lines above.
359                    // If we hit something unexpected, just break.
360                    break;
361                }
362            }
363        }
364
365        field_node.children = child_ids;
366    }
367
368    /// Parse continuation lines for a field. These are lines indented more
369    /// than the field's indent level.
370    fn parse_continuation_lines(&mut self, field_node: &mut CstNode, field_indent: usize) {
371        // Collect continuation lines as standalone nodes: their IDs will
372        // be stored in the field node's children vec.
373        let mut child_ids = Vec::new();
374
375        loop {
376            if self.at_eof() {
377                break;
378            }
379            let tok = self.peek();
380            let indent = tok.indent;
381
382            // A continuation line must be indented more than the field.
383            if indent <= field_indent {
384                // But check if this is a blank line or comment that might be
385                // "between" continuation lines.
386                if tok.kind == TokenKind::Comment {
387                    // Check if the next non-comment/non-blank token is still
388                    // a continuation. For now, treat indented comments as
389                    // part of the field too.
390                    if indent > field_indent {
391                        let node_id = self.parse_comment();
392                        child_ids.push(node_id);
393                        continue;
394                    }
395                }
396                break;
397            }
398
399            match tok.kind {
400                TokenKind::Value => {
401                    let node_id = self.parse_value_line();
402                    child_ids.push(node_id);
403                }
404                TokenKind::FieldName => {
405                    // If a field name appears indented deeper, it might be a
406                    // nested field (unlikely in .cabal) or misformatted.
407                    // Treat the entire line as a value for now.
408                    let node_id = self.parse_value_line_from_field();
409                    child_ids.push(node_id);
410                }
411                TokenKind::Comment => {
412                    let node_id = self.parse_comment();
413                    child_ids.push(node_id);
414                }
415                _ => {
416                    break;
417                }
418            }
419        }
420
421        field_node.children = child_ids;
422    }
423
424    /// Parse a value line (continuation line for a field value).
425    fn parse_value_line(&mut self) -> NodeId {
426        let tok = self.peek().clone();
427        self.pos += 1;
428
429        let mut node = CstNode::new(CstNodeKind::ValueLine, tok.span);
430        node.leading_trivia = tok.leading_trivia;
431        node.content_span = tok.span;
432        node.indent = tok.indent;
433        self.collect_trailing_newline(&mut node);
434        self.finalize_node_span(&mut node);
435        self.alloc_node(node)
436    }
437
438    /// Parse a field name token as a value line (for cases where a field-like
439    /// token appears as a continuation).
440    fn parse_value_line_from_field(&mut self) -> NodeId {
441        // Consume FieldName, Colon, and optional Value as one ValueLine.
442        let name_tok = self.peek().clone();
443        self.pos += 1;
444        let mut end = name_tok.span.end;
445
446        // Consume colon if present.
447        if !self.at_eof() && self.peek().kind == TokenKind::Colon {
448            end = self.peek().span.end;
449            self.pos += 1;
450        }
451
452        // Consume value if present on same line.
453        if !self.at_eof() && self.peek().kind == TokenKind::Value {
454            let has_newline = self
455                .peek()
456                .leading_trivia
457                .iter()
458                .any(|t| t.kind == TriviaKind::Newline);
459            if !has_newline {
460                end = self.peek().span.end;
461                self.pos += 1;
462            }
463        }
464
465        let content_span = Span::new(name_tok.span.start, end);
466        let mut node = CstNode::new(CstNodeKind::ValueLine, content_span);
467        node.leading_trivia = name_tok.leading_trivia;
468        node.content_span = content_span;
469        node.indent = name_tok.indent;
470        self.collect_trailing_newline(&mut node);
471        self.finalize_node_span(&mut node);
472        self.alloc_node(node)
473    }
474
475    /// Parse an `import: stanza-name` directive.
476    fn parse_import(&mut self) -> NodeId {
477        // Same shape as a field, but with Import kind.
478        let name_tok = self.peek().clone();
479        debug_assert_eq!(name_tok.kind, TokenKind::FieldName);
480        self.pos += 1;
481
482        let mut node = CstNode::new(CstNodeKind::Import, name_tok.span);
483        node.leading_trivia = name_tok.leading_trivia;
484        node.field_name = Some(name_tok.span);
485        node.indent = name_tok.indent;
486
487        let mut content_end = name_tok.span.end;
488
489        // Colon.
490        if !self.at_eof() && self.peek().kind == TokenKind::Colon {
491            content_end = self.peek().span.end;
492            self.pos += 1;
493
494            // Value (stanza name).
495            if !self.at_eof() && self.peek().kind == TokenKind::Value {
496                let has_newline = self
497                    .peek()
498                    .leading_trivia
499                    .iter()
500                    .any(|t| t.kind == TriviaKind::Newline);
501                if !has_newline {
502                    let val_tok = self.peek().clone();
503                    node.field_value = Some(val_tok.span);
504                    content_end = val_tok.span.end;
505                    self.pos += 1;
506                }
507            }
508        }
509
510        node.content_span = Span::new(name_tok.span.start, content_end);
511        self.collect_trailing_newline(&mut node);
512        self.finalize_node_span(&mut node);
513        self.alloc_node(node)
514    }
515
516    /// Parse a section: `library`, `executable foo`, etc.
517    /// Also handles braced layout: `library { ... }`, `executable foo { ... }`.
518    fn parse_section(&mut self) -> NodeId {
519        let header_tok = self.peek().clone();
520        debug_assert_eq!(header_tok.kind, TokenKind::SectionHeader);
521        let section_indent = header_tok.indent;
522        self.pos += 1;
523
524        let mut node = CstNode::new(CstNodeKind::Section, header_tok.span);
525        node.leading_trivia = header_tok.leading_trivia;
526        node.section_keyword = Some(header_tok.span);
527        node.indent = section_indent;
528
529        let mut content_end = header_tok.span.end;
530
531        // Section argument (e.g. `my-exe` in `executable my-exe`).
532        // Note: for `library {`, the lexer emits SectionArg("{").
533        // For `executable foo {`, the lexer emits SectionArg("foo {").
534        if !self.at_eof() && self.peek().kind == TokenKind::SectionArg {
535            let arg_tok = self.peek().clone();
536            node.section_arg = Some(arg_tok.span);
537            content_end = arg_tok.span.end;
538            self.pos += 1;
539        }
540
541        // Check for braced layout on the same line: the SectionArg ends with `{`.
542        // E.g. `library {` → SectionArg is "{", or `executable foo {` → SectionArg is "foo {".
543        let is_braced_same_line = node.section_arg.is_some_and(|arg_span| {
544            let arg_text = arg_span.slice(&self.source);
545            arg_text.trim_end().ends_with('{')
546        });
547
548        // Adjust the section_arg span if it contains a trailing `{`.
549        if is_braced_same_line {
550            if let Some(arg_span) = node.section_arg {
551                let arg_text = arg_span.slice(&self.source);
552                let trimmed = arg_text.trim_end();
553                // Remove the trailing `{` and any whitespace before it.
554                let without_brace = trimmed.trim_end_matches('{').trim_end();
555                if without_brace.is_empty() {
556                    // The entire arg was just `{`: no real section name.
557                    node.section_arg = None;
558                } else {
559                    // Trim the arg span to exclude the `{` and preceding whitespace.
560                    let new_end = arg_span.start + without_brace.len();
561                    node.section_arg = Some(Span::new(arg_span.start, new_end));
562                }
563            }
564        }
565
566        // Check for braced layout on the next line: a Value token containing `{`.
567        let is_braced_next_line = !is_braced_same_line
568            && !self.at_eof()
569            && self.peek().kind == TokenKind::Value
570            && self.peek().span.slice(&self.source).trim() == "{";
571
572        if is_braced_next_line {
573            // Include the `{` token in the content span.
574            content_end = self.peek().span.end;
575            self.pos += 1;
576        }
577
578        let is_braced = is_braced_same_line || is_braced_next_line;
579
580        node.content_span = Span::new(header_tok.span.start, content_end);
581
582        // Collect trailing newline for the header line.
583        self.collect_trailing_newline(&mut node);
584
585        // Allocate the section node now so children can reference it.
586        let section_id = self.alloc_node(node);
587
588        if is_braced {
589            // Parse braced section body: consume children until `}`.
590            self.parse_braced_body(section_id);
591        } else {
592            // Parse section body: items indented more than the section header.
593            self.parse_body(section_id, section_indent, false);
594        }
595
596        // Update the section's span to cover its entire body.
597        let body_end = self.last_child_end(section_id);
598        self.nodes[section_id.0].span = Span::new(self.nodes[section_id.0].span.start, body_end);
599
600        section_id
601    }
602
603    /// Parse a conditional: `if condition` + body, optional `else` + body.
604    /// Also handles braced layout: `if flag(dev) { ... }`.
605    fn parse_conditional(&mut self, cond_indent: usize) -> NodeId {
606        let kw_tok = self.peek().clone();
607        debug_assert!(matches!(kw_tok.kind, TokenKind::If | TokenKind::Elif));
608        self.pos += 1;
609
610        let mut node = CstNode::new(CstNodeKind::Conditional, kw_tok.span);
611        node.leading_trivia = kw_tok.leading_trivia;
612        node.condition_keyword = Some(kw_tok.span);
613        node.indent = cond_indent;
614
615        // Consume the condition expression tokens until we hit a newline.
616        // The condition is everything on the same line after the keyword.
617        // Note: consume_condition_expr stops before a `{` Value token.
618        let expr_start = self.find_condition_expr_start();
619        let expr_end = self.consume_condition_expr();
620
621        if expr_start < expr_end {
622            node.condition_expr = Some(Span::new(expr_start, expr_end));
623        }
624
625        // Check for braced layout on the same line: `{` Value token remaining.
626        let is_braced_same_line = !self.at_eof()
627            && self.peek().kind == TokenKind::Value
628            && !self
629                .peek()
630                .leading_trivia
631                .iter()
632                .any(|t| t.kind == TriviaKind::Newline)
633            && self.peek().span.slice(&self.source).trim() == "{";
634
635        let mut content_end = if expr_end > kw_tok.span.end {
636            expr_end
637        } else {
638            kw_tok.span.end
639        };
640
641        if is_braced_same_line {
642            // Consume the `{` token: include it in the content span.
643            content_end = self.peek().span.end;
644            self.pos += 1;
645        }
646
647        // Check for braced layout on the next line: a Value token `{` on a new line.
648        let is_braced_next_line = !is_braced_same_line
649            && !self.at_eof()
650            && self.peek().kind == TokenKind::Value
651            && self.peek().span.slice(&self.source).trim() == "{";
652
653        if is_braced_next_line {
654            content_end = self.peek().span.end;
655            self.pos += 1;
656        }
657
658        let is_braced = is_braced_same_line || is_braced_next_line;
659
660        node.content_span = Span::new(kw_tok.span.start, content_end);
661
662        // Collect trailing newline.
663        self.collect_trailing_newline(&mut node);
664
665        // Allocate the conditional node.
666        let cond_id = self.alloc_node(node);
667
668        if is_braced {
669            // Parse braced then-block: consume children until `}`.
670            self.parse_braced_body(cond_id);
671        } else {
672            // Parse then-block: items indented more than the conditional.
673            self.parse_body(cond_id, cond_indent, false);
674        }
675
676        // Check for `else` at the same indent level.
677        if !self.at_eof()
678            && self.peek().kind == TokenKind::Else
679            && self.peek().indent == cond_indent
680        {
681            let else_id = self.parse_else_block(cond_indent);
682            self.add_child(cond_id, else_id);
683        }
684
685        // Update span.
686        let body_end = self.last_child_end(cond_id);
687        self.nodes[cond_id.0].span = Span::new(self.nodes[cond_id.0].span.start, body_end);
688
689        cond_id
690    }
691
692    /// Parse an `else` block.
693    fn parse_else_block(&mut self, cond_indent: usize) -> NodeId {
694        let else_tok = self.peek().clone();
695        debug_assert_eq!(else_tok.kind, TokenKind::Else);
696        self.pos += 1;
697
698        // Check for braced else: `else {`: a Value token with `{` on the
699        // same line as the `else` keyword.
700        let is_braced = !self.at_eof()
701            && self.peek().kind == TokenKind::Value
702            && !self
703                .peek()
704                .leading_trivia
705                .iter()
706                .any(|t| t.kind == TriviaKind::Newline)
707            && self.peek().span.slice(&self.source).trim() == "{";
708
709        let content_end = if is_braced {
710            // Consume the `{` token: include it in the else block's content span.
711            let brace_tok = self.peek().clone();
712            let _ = brace_tok; // consumed below
713            let end = self.peek().span.end;
714            self.pos += 1;
715            end
716        } else {
717            else_tok.span.end
718        };
719
720        let mut node = CstNode::new(CstNodeKind::ElseBlock, else_tok.span);
721        node.leading_trivia = else_tok.leading_trivia;
722        node.content_span = Span::new(else_tok.span.start, content_end);
723        node.indent = else_tok.indent;
724
725        // Trailing newline.
726        self.collect_trailing_newline(&mut node);
727
728        // Allocate.
729        let else_id = self.alloc_node(node);
730
731        if is_braced {
732            // Parse braced else body: consume children until we see `}`.
733            self.parse_braced_body(else_id);
734        } else {
735            // Parse else body using indentation.
736            self.parse_body(else_id, cond_indent, false);
737        }
738
739        // Update span.
740        let body_end = self.last_child_end(else_id);
741        self.nodes[else_id.0].span = Span::new(self.nodes[else_id.0].span.start, body_end);
742
743        else_id
744    }
745
746    /// Parse a braced block body (`{ ... }`). Consumes children until we see
747    /// a Value token that is just `}`. The closing `}` is consumed and added
748    /// as a ValueLine child so it appears in the rendered output.
749    fn parse_braced_body(&mut self, parent_id: NodeId) {
750        loop {
751            if self.at_eof() {
752                break;
753            }
754
755            let tok = self.peek();
756
757            // Check for the closing `}`.
758            if tok.kind == TokenKind::Value && tok.span.slice(&self.source).trim() == "}" {
759                let node_id = self.parse_value_line();
760                self.add_child(parent_id, node_id);
761                break;
762            }
763
764            match tok.kind {
765                TokenKind::Comment => {
766                    let node_id = self.parse_comment();
767                    self.add_child(parent_id, node_id);
768                }
769                TokenKind::SectionHeader => {
770                    let node_id = self.parse_section();
771                    self.add_child(parent_id, node_id);
772                }
773                TokenKind::If | TokenKind::Elif => {
774                    let indent = tok.indent;
775                    let node_id = self.parse_conditional(indent);
776                    self.add_child(parent_id, node_id);
777                }
778                TokenKind::Else => {
779                    // Standalone else inside braced block: treat as error but
780                    // keep going.
781                    break;
782                }
783                TokenKind::FieldName => {
784                    let is_import = {
785                        let name_text = tok.span.slice(&self.source);
786                        name_text.eq_ignore_ascii_case("import")
787                    };
788                    if is_import {
789                        let node_id = self.parse_import();
790                        self.add_child(parent_id, node_id);
791                    } else {
792                        let indent = tok.indent;
793                        let node_id = self.parse_field(indent);
794                        self.add_child(parent_id, node_id);
795                    }
796                }
797                TokenKind::Value => {
798                    let node_id = self.parse_value_line();
799                    self.add_child(parent_id, node_id);
800                }
801                TokenKind::Eof => break,
802                _ => {
803                    let span = tok.span;
804                    let kind = tok.kind;
805                    self.emit_error(span, format!("unexpected token in braced block: {kind:?}"));
806                    self.pos += 1;
807                }
808            }
809        }
810    }
811
812    // -- Condition expression helpers ----------------------------------------
813
814    /// Find the byte offset where the condition expression starts (skipping
815    /// whitespace trivia tokens after the keyword).
816    fn find_condition_expr_start(&self) -> usize {
817        if self.at_eof() {
818            return self.source.len();
819        }
820        let tok = self.peek();
821        // The expression starts at the first non-trivia content after the keyword.
822        // The token's leading trivia includes whitespace, so the expr starts at
823        // the token's span start.
824        tok.span.start
825    }
826
827    /// Consume condition expression tokens (everything on the same line after
828    /// the `if` / `elif` keyword). Returns the end byte offset.
829    ///
830    /// Stops before consuming a trailing `{` Value token so that the caller
831    /// can detect braced layout.
832    fn consume_condition_expr(&mut self) -> usize {
833        let mut end = 0;
834        // Consume tokens until we see one whose leading trivia contains a
835        // newline (meaning it's on the next line) or we hit EOF.
836        loop {
837            if self.at_eof() {
838                break;
839            }
840            let tok = self.peek();
841            // Check if this token is on a new line.
842            let has_newline = tok
843                .leading_trivia
844                .iter()
845                .any(|t| t.kind == TriviaKind::Newline);
846            if has_newline {
847                break;
848            }
849            // Stop before a `{` Value token: it signals braced layout.
850            if tok.kind == TokenKind::Value && tok.span.slice(&self.source).trim() == "{" {
851                break;
852            }
853            // Token is on the same line: it's part of the condition expr.
854            end = tok.span.end;
855            self.pos += 1;
856        }
857        end
858    }
859
860    // -- Trivia helpers -----------------------------------------------------
861
862    /// If the next pending trivia (on the next token) starts with a Newline,
863    /// steal it and add to this node's trailing trivia. This handles the
864    /// common case where a node's line ends with `\n` and that newline should
865    /// belong to this node, not the next one.
866    fn collect_trailing_newline(&mut self, node: &mut CstNode) {
867        if self.pos >= self.tokens.len() {
868            return;
869        }
870        let next_tok = &mut self.tokens[self.pos];
871        // Collect leading newlines (and any whitespace before them that's
872        // actually the remainder of the current line: but our lexer puts
873        // newlines at the start of the next token's trivia).
874        let mut stolen = Vec::new();
875        let mut remaining = Vec::new();
876        let mut found_newline = false;
877
878        for tp in next_tok.leading_trivia.drain(..) {
879            if !found_newline && tp.kind == TriviaKind::Newline {
880                stolen.push(tp);
881                found_newline = true;
882            } else if found_newline {
883                remaining.push(tp);
884            } else {
885                // Trivia before the newline (shouldn't normally happen since
886                // newlines are first in the trivia of the next token, but
887                // just in case).
888                stolen.push(tp);
889            }
890        }
891
892        next_tok.leading_trivia = remaining;
893        node.trailing_trivia.extend(stolen);
894    }
895
896    /// Update a node's `span` to cover its leading trivia, content, and
897    /// trailing trivia.
898    fn finalize_node_span(&self, node: &mut CstNode) {
899        let start = node
900            .leading_trivia
901            .first()
902            .map(|t| t.span.start)
903            .unwrap_or(node.content_span.start);
904        let end = node
905            .trailing_trivia
906            .last()
907            .map(|t| t.span.end)
908            .unwrap_or(node.content_span.end);
909        node.span = Span::new(start, end);
910    }
911
912    /// Get the end byte offset of the last child of a node (or the node's
913    /// own span end if it has no children).
914    fn last_child_end(&self, node_id: NodeId) -> usize {
915        let node = &self.nodes[node_id.0];
916        if let Some(&last_child) = node.children.last() {
917            self.nodes[last_child.0].span.end
918        } else {
919            node.span.end
920        }
921    }
922}
923
924/// Parse a `.cabal` source string into a CST with diagnostics.
925pub fn parse(source: &str) -> ParseResult {
926    let tokens = tokenize(source);
927    let parser = Parser::new(source.to_owned(), tokens);
928    parser.parse()
929}
930
931// ---------------------------------------------------------------------------
932// Tests
933// ---------------------------------------------------------------------------
934
935#[cfg(test)]
936mod tests {
937    use super::*;
938
939    /// Helper: parse and verify round-trip.
940    fn assert_round_trip(source: &str) {
941        let result = parse(source);
942        let rendered = result.cst.render();
943        assert_eq!(
944            rendered, source,
945            "\n--- EXPECTED ---\n{source}\n--- GOT ---\n{rendered}\n"
946        );
947    }
948
949    // -- Round-trip tests ---------------------------------------------------
950
951    #[test]
952    fn round_trip_minimal() {
953        assert_round_trip("cabal-version: 3.0\nname: foo\nversion: 0.1.0.0\n");
954    }
955
956    #[test]
957    fn round_trip_with_comments() {
958        assert_round_trip(
959            "-- Top comment\ncabal-version: 3.0\nname: foo\n-- A comment\nversion: 0.1.0.0\n",
960        );
961    }
962
963    #[test]
964    fn round_trip_with_blank_lines() {
965        assert_round_trip("cabal-version: 3.0\nname: foo\n\nversion: 0.1.0.0\n");
966    }
967
968    #[test]
969    fn round_trip_section() {
970        let src = "\
971cabal-version: 3.0
972name: foo
973version: 0.1.0.0
974
975library
976  exposed-modules: Foo
977  build-depends: base >=4.14
978";
979        assert_round_trip(src);
980    }
981
982    #[test]
983    fn round_trip_section_with_arg() {
984        let src = "\
985cabal-version: 3.0
986name: foo
987version: 0.1.0.0
988
989executable my-exe
990  main-is: Main.hs
991  build-depends: base
992";
993        assert_round_trip(src);
994    }
995
996    #[test]
997    fn round_trip_conditional() {
998        let src = "\
999cabal-version: 3.0
1000name: foo
1001version: 0.1.0.0
1002
1003library
1004  build-depends: base
1005  if flag(dev)
1006    ghc-options: -O0
1007  else
1008    ghc-options: -O2
1009";
1010        assert_round_trip(src);
1011    }
1012
1013    #[test]
1014    fn round_trip_common_stanza() {
1015        let src = "\
1016cabal-version: 3.0
1017name: foo
1018version: 0.1.0.0
1019
1020common warnings
1021  ghc-options: -Wall
1022
1023library
1024  import: warnings
1025  exposed-modules: Foo
1026";
1027        assert_round_trip(src);
1028    }
1029
1030    #[test]
1031    fn round_trip_multiline_field() {
1032        let src = "\
1033cabal-version: 3.0
1034name: foo
1035version: 0.1.0.0
1036
1037library
1038  exposed-modules:
1039    Foo
1040    Bar
1041    Baz
1042";
1043        assert_round_trip(src);
1044    }
1045
1046    #[test]
1047    fn round_trip_leading_comma_deps() {
1048        let src = "\
1049cabal-version: 3.0
1050name: foo
1051version: 0.1.0.0
1052
1053library
1054  build-depends:
1055      base >=4.14
1056    , text >=2.0
1057    , aeson ^>=2.2
1058";
1059        assert_round_trip(src);
1060    }
1061
1062    #[test]
1063    fn round_trip_trailing_comma_deps() {
1064        let src = "\
1065cabal-version: 3.0
1066name: foo
1067version: 0.1.0.0
1068
1069library
1070  build-depends:
1071    base >=4.14,
1072    text >=2.0,
1073    aeson ^>=2.2
1074";
1075        assert_round_trip(src);
1076    }
1077
1078    #[test]
1079    fn round_trip_single_line_deps() {
1080        let src = "\
1081cabal-version: 3.0
1082name: foo
1083version: 0.1.0.0
1084
1085library
1086  build-depends: base >=4.14, text >=2.0, aeson ^>=2.2
1087";
1088        assert_round_trip(src);
1089    }
1090
1091    #[test]
1092    fn round_trip_multiple_sections() {
1093        let src = "\
1094cabal-version: 3.0
1095name: foo
1096version: 0.1.0.0
1097
1098library
1099  exposed-modules: Foo
1100  build-depends: base
1101
1102executable my-exe
1103  main-is: Main.hs
1104  build-depends: base, foo
1105
1106test-suite tests
1107  type: exitcode-stdio-1.0
1108  main-is: Main.hs
1109  build-depends: base, foo, tasty
1110";
1111        assert_round_trip(src);
1112    }
1113
1114    #[test]
1115    fn round_trip_complex_conditional() {
1116        let src = "\
1117cabal-version: 3.0
1118name: foo
1119version: 0.1.0.0
1120
1121library
1122  build-depends: base
1123  if flag(dev) && !os(windows)
1124    ghc-options: -O0
1125";
1126        assert_round_trip(src);
1127    }
1128
1129    #[test]
1130    fn round_trip_no_trailing_newline() {
1131        assert_round_trip("cabal-version: 3.0\nname: foo\nversion: 0.1.0.0");
1132    }
1133
1134    #[test]
1135    fn round_trip_field_extra_spaces() {
1136        assert_round_trip("name:    foo\nversion:    0.1.0.0\n");
1137    }
1138
1139    #[test]
1140    fn round_trip_comment_in_section() {
1141        let src = "\
1142library
1143  -- A comment in the library
1144  exposed-modules: Foo
1145";
1146        assert_round_trip(src);
1147    }
1148
1149    #[test]
1150    fn round_trip_blank_line_between_sections() {
1151        let src = "\
1152library
1153  exposed-modules: Foo
1154
1155executable bar
1156  main-is: Main.hs
1157";
1158        assert_round_trip(src);
1159    }
1160
1161    #[test]
1162    fn round_trip_flag_section() {
1163        let src = "\
1164cabal-version: 3.0
1165name: foo
1166version: 0.1.0.0
1167
1168flag dev
1169  description: Development mode
1170  default: False
1171  manual: True
1172";
1173        assert_round_trip(src);
1174    }
1175
1176    #[test]
1177    fn round_trip_source_repository() {
1178        let src = "\
1179cabal-version: 3.0
1180name: foo
1181version: 0.1.0.0
1182
1183source-repository head
1184  type: git
1185  location: https://github.com/example/foo
1186";
1187        assert_round_trip(src);
1188    }
1189
1190    // -- Structure tests ----------------------------------------------------
1191
1192    #[test]
1193    fn parse_structure_simple() {
1194        let src = "cabal-version: 3.0\nname: foo\n";
1195        let result = parse(src);
1196        let root = result.cst.node(result.cst.root);
1197        // Root should have 2 field children.
1198        assert_eq!(root.children.len(), 2);
1199        for &child_id in &root.children {
1200            assert_eq!(result.cst.node(child_id).kind, CstNodeKind::Field);
1201        }
1202    }
1203
1204    #[test]
1205    fn parse_structure_section_with_children() {
1206        let src = "\
1207library
1208  exposed-modules: Foo
1209  build-depends: base
1210";
1211        let result = parse(src);
1212        let root = result.cst.node(result.cst.root);
1213        assert_eq!(root.children.len(), 1);
1214        let section = result.cst.node(root.children[0]);
1215        assert_eq!(section.kind, CstNodeKind::Section);
1216        assert_eq!(section.section_keyword.unwrap().slice(src), "library");
1217        assert!(section.section_arg.is_none());
1218        assert_eq!(section.children.len(), 2);
1219    }
1220
1221    #[test]
1222    fn parse_structure_section_with_arg() {
1223        let src = "executable my-exe\n  main-is: Main.hs\n";
1224        let result = parse(src);
1225        let root = result.cst.node(result.cst.root);
1226        let section = result.cst.node(root.children[0]);
1227        assert_eq!(section.kind, CstNodeKind::Section);
1228        assert_eq!(section.section_keyword.unwrap().slice(src), "executable");
1229        assert_eq!(section.section_arg.unwrap().slice(src), "my-exe");
1230    }
1231
1232    #[test]
1233    fn parse_structure_conditional() {
1234        let src = "\
1235library
1236  build-depends: base
1237  if flag(dev)
1238    ghc-options: -O0
1239  else
1240    ghc-options: -O2
1241";
1242        let result = parse(src);
1243        let root = result.cst.node(result.cst.root);
1244        let section = result.cst.node(root.children[0]);
1245        // Section children: field (build-depends) + conditional.
1246        assert_eq!(section.children.len(), 2);
1247        let cond = result.cst.node(section.children[1]);
1248        assert_eq!(cond.kind, CstNodeKind::Conditional);
1249        // Conditional children: then-block field + else block.
1250        assert!(cond.children.len() >= 2);
1251        // Last child should be ElseBlock.
1252        let last = result.cst.node(*cond.children.last().unwrap());
1253        assert_eq!(last.kind, CstNodeKind::ElseBlock);
1254    }
1255
1256    #[test]
1257    fn parse_structure_import() {
1258        let src = "\
1259library
1260  import: warnings
1261  exposed-modules: Foo
1262";
1263        let result = parse(src);
1264        let root = result.cst.node(result.cst.root);
1265        let section = result.cst.node(root.children[0]);
1266        let import = result.cst.node(section.children[0]);
1267        assert_eq!(import.kind, CstNodeKind::Import);
1268        assert_eq!(import.field_name.unwrap().slice(src), "import");
1269        assert_eq!(import.field_value.unwrap().slice(src), "warnings");
1270    }
1271
1272    #[test]
1273    fn parse_structure_multiline_field() {
1274        let src = "\
1275library
1276  exposed-modules:
1277    Foo
1278    Bar
1279";
1280        let result = parse(src);
1281        let root = result.cst.node(result.cst.root);
1282        let section = result.cst.node(root.children[0]);
1283        let field = result.cst.node(section.children[0]);
1284        assert_eq!(field.kind, CstNodeKind::Field);
1285        assert_eq!(field.field_name.unwrap().slice(src), "exposed-modules");
1286        // Should have 2 ValueLine children.
1287        assert_eq!(field.children.len(), 2);
1288        for &child_id in &field.children {
1289            assert_eq!(result.cst.node(child_id).kind, CstNodeKind::ValueLine);
1290        }
1291    }
1292
1293    #[test]
1294    fn parse_no_diagnostics_for_valid_file() {
1295        let src = "\
1296cabal-version: 3.0
1297name: foo
1298version: 0.1.0.0
1299
1300common warnings
1301  ghc-options: -Wall
1302
1303library
1304  import: warnings
1305  exposed-modules:
1306    Foo
1307    Bar
1308  build-depends:
1309    base >=4.14
1310  if flag(dev)
1311    ghc-options: -O0
1312  else
1313    ghc-options: -O2
1314
1315executable my-exe
1316  import: warnings
1317  main-is: Main.hs
1318  build-depends: base, foo
1319";
1320        let result = parse(src);
1321        assert!(
1322            result.diagnostics.is_empty(),
1323            "expected no diagnostics, got: {:?}",
1324            result.diagnostics
1325        );
1326    }
1327
1328    // -- Error recovery tests -----------------------------------------------
1329
1330    #[test]
1331    fn parse_does_not_panic_on_empty_input() {
1332        let result = parse("");
1333        assert!(result.cst.render().is_empty());
1334    }
1335
1336    #[test]
1337    fn parse_does_not_panic_on_blank_lines_only() {
1338        let src = "\n\n\n";
1339        let result = parse(src);
1340        assert_eq!(result.cst.render(), src);
1341    }
1342
1343    #[test]
1344    fn parse_does_not_panic_on_comments_only() {
1345        let src = "-- just a comment\n-- another one\n";
1346        let result = parse(src);
1347        assert_eq!(result.cst.render(), src);
1348    }
1349
1350    // -- Field name/value span tests ----------------------------------------
1351
1352    #[test]
1353    fn field_name_and_value_spans() {
1354        let src = "name: foo\n";
1355        let result = parse(src);
1356        let root = result.cst.node(result.cst.root);
1357        let field = result.cst.node(root.children[0]);
1358        assert_eq!(field.field_name.unwrap().slice(src), "name");
1359        assert_eq!(field.field_value.unwrap().slice(src), "foo");
1360    }
1361
1362    #[test]
1363    fn field_no_value() {
1364        let src = "build-depends:\n";
1365        let result = parse(src);
1366        let root = result.cst.node(result.cst.root);
1367        let field = result.cst.node(root.children[0]);
1368        assert_eq!(field.field_name.unwrap().slice(src), "build-depends");
1369        assert!(field.field_value.is_none());
1370    }
1371
1372    // -- Large round-trip tests ---------------------------------------------
1373
1374    #[test]
1375    fn round_trip_realistic_file() {
1376        let src = "\
1377cabal-version:   3.0
1378name:            my-project
1379version:         0.1.0.0
1380synopsis:        A sample project
1381description:
1382  This is a longer description
1383  that spans multiple lines.
1384license:         MIT
1385license-file:    LICENSE
1386author:          Test Author
1387maintainer:      test@example.com
1388category:        Development
1389build-type:      Simple
1390
1391common warnings
1392  ghc-options: -Wall -Wcompat -Widentities
1393               -Wincomplete-record-updates
1394               -Wincomplete-uni-patterns
1395               -Wmissing-deriving-strategies
1396               -Wredundant-constraints
1397
1398flag dev
1399  description: Enable development mode
1400  default:     False
1401  manual:      True
1402
1403library
1404  import:           warnings
1405  exposed-modules:
1406    MyProject
1407    MyProject.Internal
1408    MyProject.Types
1409  other-modules:
1410    MyProject.Utils
1411  build-depends:
1412      base >=4.14 && <5
1413    , aeson ^>=2.2
1414    , text >=2.0 && <2.2
1415    , containers ^>=0.6
1416  hs-source-dirs:   src
1417  default-language: GHC2021
1418  default-extensions:
1419    OverloadedStrings
1420    DerivingStrategies
1421
1422  if flag(dev)
1423    ghc-options: -O0
1424  else
1425    ghc-options: -O2
1426
1427executable my-project
1428  import:           warnings
1429  main-is:          Main.hs
1430  other-modules:    Paths_my_project
1431  build-depends:
1432      base
1433    , my-project
1434    , optparse-applicative ^>=0.18
1435  hs-source-dirs:   app
1436  default-language: GHC2021
1437
1438test-suite my-project-test
1439  import:           warnings
1440  type:             exitcode-stdio-1.0
1441  main-is:          Main.hs
1442  other-modules:
1443    Test.MyProject
1444    Test.MyProject.Types
1445  build-depends:
1446      base
1447    , my-project
1448    , tasty ^>=1.5
1449    , tasty-hunit ^>=0.10
1450  hs-source-dirs:   test
1451  default-language: GHC2021
1452
1453source-repository head
1454  type:     git
1455  location: https://github.com/example/my-project
1456";
1457        assert_round_trip(src);
1458    }
1459
1460    #[test]
1461    fn round_trip_nested_conditionals() {
1462        let src = "\
1463library
1464  build-depends: base
1465  if os(linux)
1466    if flag(dbus)
1467      build-depends: dbus
1468      cpp-options: -DDBUS
1469  if os(windows)
1470    build-depends: Win32
1471";
1472        assert_round_trip(src);
1473    }
1474
1475    #[test]
1476    fn round_trip_benchmark_section() {
1477        let src = "\
1478benchmark my-bench
1479  type: exitcode-stdio-1.0
1480  main-is: Main.hs
1481  build-depends: base, criterion
1482  hs-source-dirs: bench
1483";
1484        assert_round_trip(src);
1485    }
1486
1487    // -- Braced layout tests --------------------------------------------------
1488
1489    #[test]
1490    fn round_trip_braced_section() {
1491        assert_round_trip("library {\n  exposed-modules: Foo\n  build-depends: base\n}\n");
1492    }
1493
1494    #[test]
1495    fn round_trip_braced_executable() {
1496        assert_round_trip("executable foo {\n  main-is: Main.hs\n}\n");
1497    }
1498
1499    #[test]
1500    fn round_trip_braced_if() {
1501        assert_round_trip(
1502            "library\n  build-depends: base\n  if flag(dev) {\n    ghc-options: -O0\n  }\n",
1503        );
1504    }
1505
1506    #[test]
1507    fn round_trip_braced_if_else() {
1508        assert_round_trip(
1509            "library\n  if flag(dev) {\n    ghc-options: -O0\n  } else {\n    ghc-options: -O2\n  }\n",
1510        );
1511    }
1512}