Skip to main content

cabalist_parser/
parse.rs

1//! Hand-written recursive descent parser for `.cabal` files.
2//!
3//! Transforms the token stream from the lexer into a CST. The parser uses
4//! indentation levels to determine nesting — sections contain fields that
5//! are indented more than the section header, fields contain continuation
6//! lines indented more than the field name, etc.
7
8use crate::cst::{CabalCst, CstNode, CstNodeKind};
9use crate::diagnostic::Diagnostic;
10use crate::lexer::{tokenize, Token, TokenKind, TriviaKind};
11use crate::span::{NodeId, Span};
12
13/// The result of parsing a `.cabal` file.
14#[derive(Debug, Clone)]
15pub struct ParseResult {
16    /// The concrete syntax tree.
17    pub cst: CabalCst,
18    /// Diagnostics (errors, warnings) encountered during parsing.
19    pub diagnostics: Vec<Diagnostic>,
20}
21
22/// Parser state.
23struct Parser {
24    /// The original source text.
25    source: String,
26    /// The token stream (from the lexer).
27    tokens: Vec<Token>,
28    /// Current position in the token stream.
29    pos: usize,
30    /// The CST node arena being built.
31    nodes: Vec<CstNode>,
32    /// Diagnostics collected during parsing.
33    diagnostics: Vec<Diagnostic>,
34}
35
36impl Parser {
37    fn new(source: String, tokens: Vec<Token>) -> Self {
38        Self {
39            source,
40            tokens,
41            pos: 0,
42            nodes: Vec::new(),
43            diagnostics: Vec::new(),
44        }
45    }
46
47    // -- Token access -------------------------------------------------------
48
49    /// Peek at the current token without consuming it.
50    fn peek(&self) -> &Token {
51        &self.tokens[self.pos.min(self.tokens.len() - 1)]
52    }
53
54    /// Check if we're at EOF.
55    fn at_eof(&self) -> bool {
56        self.peek().kind == TokenKind::Eof
57    }
58
59    // -- Node creation ------------------------------------------------------
60
61    /// Allocate a new node in the arena, returning its `NodeId`.
62    fn alloc_node(&mut self, node: CstNode) -> NodeId {
63        let id = NodeId(self.nodes.len());
64        self.nodes.push(node);
65        id
66    }
67
68    /// Set the parent of `child` to `parent` and add `child` to `parent`'s
69    /// children list.
70    fn add_child(&mut self, parent: NodeId, child: NodeId) {
71        self.nodes[child.0].parent = Some(parent);
72        self.nodes[parent.0].children.push(child);
73    }
74
75    // -- Diagnostic helpers -------------------------------------------------
76
77    fn emit_error(&mut self, span: Span, message: impl Into<String>) {
78        self.diagnostics.push(Diagnostic::error(span, message));
79    }
80
81    // -- Skip / advance helpers ---------------------------------------------
82
83    // -- Parsing entry point ------------------------------------------------
84
85    /// Parse the entire `.cabal` file, producing a `ParseResult`.
86    fn parse(mut self) -> ParseResult {
87        // Create the root node.
88        let root_node = CstNode::new(CstNodeKind::Root, Span::new(0, self.source.len()));
89        let root_id = self.alloc_node(root_node);
90
91        // Parse top-level items.
92        self.parse_body(root_id, 0, true);
93
94        // Absorb any trailing trivia from the Eof token into the root.
95        if !self.at_eof() {
96            // Shouldn't happen, but be defensive.
97        }
98
99        // Update the root span.
100        self.nodes[root_id.0].span = Span::new(0, self.source.len());
101        self.nodes[root_id.0].content_span = Span::new(0, self.source.len());
102
103        ParseResult {
104            cst: CabalCst {
105                source: self.source,
106                nodes: self.nodes,
107                root: root_id,
108            },
109            diagnostics: self.diagnostics,
110        }
111    }
112
113    /// Parse a body (sequence of fields, sections, conditionals, comments,
114    /// blank lines) where all items have indent > `min_indent`.
115    ///
116    /// For the top-level, `min_indent` is 0, `is_root` is true, and we
117    /// accept items at indent 0.
118    /// For section bodies, `min_indent` is the section header's indent,
119    /// `is_root` is false, and we accept only items with indent > `min_indent`.
120    fn parse_body(&mut self, parent_id: NodeId, min_indent: usize, is_root: bool) {
121        loop {
122            if self.at_eof() {
123                // Absorb EOF trivia into the parent — but only once (at
124                // the root level) to avoid duplicating trailing content.
125                if is_root {
126                    let eof_tok = self.peek();
127                    if !eof_tok.leading_trivia.is_empty() {
128                        self.consume_trailing_trivia(parent_id);
129                    }
130                }
131                break;
132            }
133
134            let tok = self.peek();
135            let indent = tok.indent;
136
137            // For section bodies (is_root == false): items must be indented
138            // more than the parent. For the top-level root: accept all.
139            if !is_root && indent <= min_indent {
140                break;
141            }
142
143            match tok.kind {
144                TokenKind::Comment => {
145                    let node_id = self.parse_comment();
146                    self.add_child(parent_id, node_id);
147                }
148                TokenKind::SectionHeader => {
149                    let node_id = self.parse_section();
150                    self.add_child(parent_id, node_id);
151                }
152                TokenKind::If | TokenKind::Elif => {
153                    let node_id = self.parse_conditional(indent);
154                    self.add_child(parent_id, node_id);
155                }
156                TokenKind::Else => {
157                    // `else` not expected here — it's handled inside
158                    // parse_conditional. If we see it standalone, that's
159                    // an error. Break so the caller can handle it.
160                    break;
161                }
162                TokenKind::FieldName => {
163                    // Check if this is an `import:` field.
164                    let is_import = {
165                        let name_text = tok.span.slice(&self.source);
166                        name_text.eq_ignore_ascii_case("import")
167                    };
168                    if is_import {
169                        let node_id = self.parse_import();
170                        self.add_child(parent_id, node_id);
171                    } else {
172                        let node_id = self.parse_field(indent);
173                        self.add_child(parent_id, node_id);
174                    }
175                }
176                TokenKind::Value => {
177                    // A value line at the body level — this could be a
178                    // continuation of the previous field, but since we handle
179                    // continuations inside parse_field, seeing one here means
180                    // it's either misindented or a free-standing value.
181                    if is_root && indent == 0 {
182                        // Top-level value line — unusual but we should handle
183                        // it. Emit as an error + skip.
184                        let span = tok.span;
185                        self.emit_error(span, "unexpected value at top level");
186                        self.pos += 1;
187                    } else if indent > min_indent {
188                        // Indented value line in a section body — could be
189                        // a continuation or orphan. Emit as a ValueLine child.
190                        let node_id = self.parse_value_line();
191                        self.add_child(parent_id, node_id);
192                    } else {
193                        break;
194                    }
195                }
196                TokenKind::Eof => break,
197                _ => {
198                    // Unexpected token — skip with diagnostic.
199                    let span = tok.span;
200                    let kind = tok.kind;
201                    self.emit_error(span, format!("unexpected token: {kind:?}"));
202                    self.pos += 1;
203                }
204            }
205        }
206    }
207
208    /// Consume remaining trivia from the EOF token and create blank/comment
209    /// nodes as needed so they appear in the rendered output.
210    fn consume_trailing_trivia(&mut self, parent_id: NodeId) {
211        let eof_idx = self.pos.min(self.tokens.len() - 1);
212        if self.tokens[eof_idx].kind != TokenKind::Eof {
213            return;
214        }
215
216        // Take (not clone) the trivia so it can't be consumed again.
217        let trivia: Vec<_> = std::mem::take(&mut self.tokens[eof_idx].leading_trivia);
218        if trivia.is_empty() {
219            return;
220        }
221
222        // Gather all the trivia into a single BlankLine node that covers
223        // the full range. This is a simplification — it means trailing
224        // blank lines at the end of the file get rendered correctly.
225        let start = trivia.first().unwrap().span.start;
226        let end = trivia.last().unwrap().span.end;
227        let span = Span::new(start, end);
228
229        let mut node = CstNode::new(CstNodeKind::BlankLine, span);
230        node.content_span = span;
231        // We store the trivia so render works: the BlankLine node's
232        // content_span covers the text directly.
233        let node_id = self.alloc_node(node);
234        self.add_child(parent_id, node_id);
235    }
236
237    // -- Individual node parsers --------------------------------------------
238
239    /// Parse a standalone comment line.
240    fn parse_comment(&mut self) -> NodeId {
241        let tok = self.peek().clone();
242        debug_assert_eq!(tok.kind, TokenKind::Comment);
243        self.pos += 1;
244
245        let mut node = CstNode::new(CstNodeKind::Comment, tok.span);
246        node.leading_trivia = tok.leading_trivia;
247        node.content_span = tok.span;
248        node.indent = tok.indent;
249
250        // Grab the newline trivia that follows.
251        self.collect_trailing_newline(&mut node);
252
253        // Update span to cover leading trivia + content + trailing trivia.
254        self.finalize_node_span(&mut node);
255
256        self.alloc_node(node)
257    }
258
259    /// Parse a field: `field-name: value` with possible continuation lines.
260    fn parse_field(&mut self, field_indent: usize) -> NodeId {
261        // We expect: FieldName, Colon, optional Value.
262        let name_tok = self.peek().clone();
263        debug_assert_eq!(name_tok.kind, TokenKind::FieldName);
264        self.pos += 1;
265
266        let mut node = CstNode::new(CstNodeKind::Field, name_tok.span);
267        node.leading_trivia = name_tok.leading_trivia;
268        node.field_name = Some(name_tok.span);
269        node.indent = name_tok.indent;
270
271        // Expect Colon.
272        let mut colon_end = name_tok.span.end;
273        if !self.at_eof() && self.peek().kind == TokenKind::Colon {
274            let colon_tok = self.peek().clone();
275            // Absorb colon trivia (spacing between name and colon).
276            colon_end = colon_tok.span.end;
277            self.pos += 1;
278
279            // Check for value on the same line.
280            if !self.at_eof() && self.peek().kind == TokenKind::Value {
281                let val_tok = self.peek();
282                // Only take this value if it's not on a new line.
283                // We detect "same line" by checking that the value token's
284                // leading trivia does NOT contain a Newline.
285                let has_newline = val_tok
286                    .leading_trivia
287                    .iter()
288                    .any(|t| t.kind == TriviaKind::Newline);
289                if !has_newline {
290                    let val_tok = self.peek().clone();
291                    node.field_value = Some(val_tok.span);
292                    colon_end = val_tok.span.end;
293                    self.pos += 1;
294                }
295            }
296        }
297
298        node.content_span = Span::new(name_tok.span.start, colon_end);
299
300        // Check if the field value opens a braced freeform text block.
301        // E.g. `Description: {` — the value text ends with `{`.
302        let is_braced_field = node.field_value.is_some_and(|val_span| {
303            let val_text = val_span.slice(&self.source);
304            val_text.trim_end().ends_with('{')
305        });
306
307        // Collect trailing newline.
308        self.collect_trailing_newline(&mut node);
309
310        if is_braced_field {
311            // Braced freeform text block: consume all lines until `}`
312            // regardless of indentation.
313            self.parse_braced_field_continuation(&mut node);
314        } else {
315            // Continuation lines: any following line with indent > field_indent.
316            self.parse_continuation_lines(&mut node, field_indent);
317        }
318
319        self.finalize_node_span(&mut node);
320        self.alloc_node(node)
321    }
322
323    /// Parse continuation lines for a braced freeform text field (`Description: { ... }`).
324    /// Consumes all lines until a line whose content is just `}`.
325    fn parse_braced_field_continuation(&mut self, field_node: &mut CstNode) {
326        let mut child_ids = Vec::new();
327
328        loop {
329            if self.at_eof() {
330                break;
331            }
332            let tok = self.peek();
333
334            // Check if this token's text is just `}` (the closing brace line).
335            let is_closing =
336                tok.kind == TokenKind::Value && tok.span.slice(&self.source).trim() == "}";
337
338            match tok.kind {
339                TokenKind::Value | TokenKind::Comment => {
340                    let node_id = if tok.kind == TokenKind::Comment {
341                        self.parse_comment()
342                    } else {
343                        self.parse_value_line()
344                    };
345                    child_ids.push(node_id);
346                    if is_closing {
347                        break;
348                    }
349                }
350                _ => {
351                    // In braced mode, other token types (blank lines represented
352                    // via trivia) are handled by the value/comment lines above.
353                    // If we hit something unexpected, just break.
354                    break;
355                }
356            }
357        }
358
359        field_node.children = child_ids;
360    }
361
362    /// Parse continuation lines for a field. These are lines indented more
363    /// than the field's indent level.
364    fn parse_continuation_lines(&mut self, field_node: &mut CstNode, field_indent: usize) {
365        // Collect continuation lines as standalone nodes — their IDs will
366        // be stored in the field node's children vec.
367        let mut child_ids = Vec::new();
368
369        loop {
370            if self.at_eof() {
371                break;
372            }
373            let tok = self.peek();
374            let indent = tok.indent;
375
376            // A continuation line must be indented more than the field.
377            if indent <= field_indent {
378                // But check if this is a blank line or comment that might be
379                // "between" continuation lines.
380                if tok.kind == TokenKind::Comment {
381                    // Check if the next non-comment/non-blank token is still
382                    // a continuation. For now, treat indented comments as
383                    // part of the field too.
384                    if indent > field_indent {
385                        let node_id = self.parse_comment();
386                        child_ids.push(node_id);
387                        continue;
388                    }
389                }
390                break;
391            }
392
393            match tok.kind {
394                TokenKind::Value => {
395                    let node_id = self.parse_value_line();
396                    child_ids.push(node_id);
397                }
398                TokenKind::FieldName => {
399                    // If a field name appears indented deeper, it might be a
400                    // nested field (unlikely in .cabal) or misformatted.
401                    // Treat the entire line as a value for now.
402                    let node_id = self.parse_value_line_from_field();
403                    child_ids.push(node_id);
404                }
405                TokenKind::Comment => {
406                    let node_id = self.parse_comment();
407                    child_ids.push(node_id);
408                }
409                _ => {
410                    break;
411                }
412            }
413        }
414
415        field_node.children = child_ids;
416    }
417
418    /// Parse a value line (continuation line for a field value).
419    fn parse_value_line(&mut self) -> NodeId {
420        let tok = self.peek().clone();
421        self.pos += 1;
422
423        let mut node = CstNode::new(CstNodeKind::ValueLine, tok.span);
424        node.leading_trivia = tok.leading_trivia;
425        node.content_span = tok.span;
426        node.indent = tok.indent;
427        self.collect_trailing_newline(&mut node);
428        self.finalize_node_span(&mut node);
429        self.alloc_node(node)
430    }
431
432    /// Parse a field name token as a value line (for cases where a field-like
433    /// token appears as a continuation).
434    fn parse_value_line_from_field(&mut self) -> NodeId {
435        // Consume FieldName, Colon, and optional Value as one ValueLine.
436        let name_tok = self.peek().clone();
437        self.pos += 1;
438        let mut end = name_tok.span.end;
439
440        // Consume colon if present.
441        if !self.at_eof() && self.peek().kind == TokenKind::Colon {
442            end = self.peek().span.end;
443            self.pos += 1;
444        }
445
446        // Consume value if present on same line.
447        if !self.at_eof() && self.peek().kind == TokenKind::Value {
448            let has_newline = self
449                .peek()
450                .leading_trivia
451                .iter()
452                .any(|t| t.kind == TriviaKind::Newline);
453            if !has_newline {
454                end = self.peek().span.end;
455                self.pos += 1;
456            }
457        }
458
459        let content_span = Span::new(name_tok.span.start, end);
460        let mut node = CstNode::new(CstNodeKind::ValueLine, content_span);
461        node.leading_trivia = name_tok.leading_trivia;
462        node.content_span = content_span;
463        node.indent = name_tok.indent;
464        self.collect_trailing_newline(&mut node);
465        self.finalize_node_span(&mut node);
466        self.alloc_node(node)
467    }
468
469    /// Parse an `import: stanza-name` directive.
470    fn parse_import(&mut self) -> NodeId {
471        // Same shape as a field, but with Import kind.
472        let name_tok = self.peek().clone();
473        debug_assert_eq!(name_tok.kind, TokenKind::FieldName);
474        self.pos += 1;
475
476        let mut node = CstNode::new(CstNodeKind::Import, name_tok.span);
477        node.leading_trivia = name_tok.leading_trivia;
478        node.field_name = Some(name_tok.span);
479        node.indent = name_tok.indent;
480
481        let mut content_end = name_tok.span.end;
482
483        // Colon.
484        if !self.at_eof() && self.peek().kind == TokenKind::Colon {
485            content_end = self.peek().span.end;
486            self.pos += 1;
487
488            // Value (stanza name).
489            if !self.at_eof() && self.peek().kind == TokenKind::Value {
490                let has_newline = self
491                    .peek()
492                    .leading_trivia
493                    .iter()
494                    .any(|t| t.kind == TriviaKind::Newline);
495                if !has_newline {
496                    let val_tok = self.peek().clone();
497                    node.field_value = Some(val_tok.span);
498                    content_end = val_tok.span.end;
499                    self.pos += 1;
500                }
501            }
502        }
503
504        node.content_span = Span::new(name_tok.span.start, content_end);
505        self.collect_trailing_newline(&mut node);
506        self.finalize_node_span(&mut node);
507        self.alloc_node(node)
508    }
509
510    /// Parse a section: `library`, `executable foo`, etc.
511    /// Also handles braced layout: `library { ... }`, `executable foo { ... }`.
512    fn parse_section(&mut self) -> NodeId {
513        let header_tok = self.peek().clone();
514        debug_assert_eq!(header_tok.kind, TokenKind::SectionHeader);
515        let section_indent = header_tok.indent;
516        self.pos += 1;
517
518        let mut node = CstNode::new(CstNodeKind::Section, header_tok.span);
519        node.leading_trivia = header_tok.leading_trivia;
520        node.section_keyword = Some(header_tok.span);
521        node.indent = section_indent;
522
523        let mut content_end = header_tok.span.end;
524
525        // Section argument (e.g. `my-exe` in `executable my-exe`).
526        // Note: for `library {`, the lexer emits SectionArg("{").
527        // For `executable foo {`, the lexer emits SectionArg("foo {").
528        if !self.at_eof() && self.peek().kind == TokenKind::SectionArg {
529            let arg_tok = self.peek().clone();
530            node.section_arg = Some(arg_tok.span);
531            content_end = arg_tok.span.end;
532            self.pos += 1;
533        }
534
535        // Check for braced layout on the same line: the SectionArg ends with `{`.
536        // E.g. `library {` → SectionArg is "{", or `executable foo {` → SectionArg is "foo {".
537        let is_braced_same_line = node.section_arg.is_some_and(|arg_span| {
538            let arg_text = arg_span.slice(&self.source);
539            arg_text.trim_end().ends_with('{')
540        });
541
542        // Adjust the section_arg span if it contains a trailing `{`.
543        if is_braced_same_line {
544            if let Some(arg_span) = node.section_arg {
545                let arg_text = arg_span.slice(&self.source);
546                let trimmed = arg_text.trim_end();
547                // Remove the trailing `{` and any whitespace before it.
548                let without_brace = trimmed.trim_end_matches('{').trim_end();
549                if without_brace.is_empty() {
550                    // The entire arg was just `{` — no real section name.
551                    node.section_arg = None;
552                } else {
553                    // Trim the arg span to exclude the `{` and preceding whitespace.
554                    let new_end = arg_span.start + without_brace.len();
555                    node.section_arg = Some(Span::new(arg_span.start, new_end));
556                }
557            }
558        }
559
560        // Check for braced layout on the next line: a Value token containing `{`.
561        let is_braced_next_line = !is_braced_same_line
562            && !self.at_eof()
563            && self.peek().kind == TokenKind::Value
564            && self.peek().span.slice(&self.source).trim() == "{";
565
566        if is_braced_next_line {
567            // Include the `{` token in the content span.
568            content_end = self.peek().span.end;
569            self.pos += 1;
570        }
571
572        let is_braced = is_braced_same_line || is_braced_next_line;
573
574        node.content_span = Span::new(header_tok.span.start, content_end);
575
576        // Collect trailing newline for the header line.
577        self.collect_trailing_newline(&mut node);
578
579        // Allocate the section node now so children can reference it.
580        let section_id = self.alloc_node(node);
581
582        if is_braced {
583            // Parse braced section body: consume children until `}`.
584            self.parse_braced_body(section_id);
585        } else {
586            // Parse section body: items indented more than the section header.
587            self.parse_body(section_id, section_indent, false);
588        }
589
590        // Update the section's span to cover its entire body.
591        let body_end = self.last_child_end(section_id);
592        self.nodes[section_id.0].span = Span::new(self.nodes[section_id.0].span.start, body_end);
593
594        section_id
595    }
596
597    /// Parse a conditional: `if condition` + body, optional `else` + body.
598    /// Also handles braced layout: `if flag(dev) { ... }`.
599    fn parse_conditional(&mut self, cond_indent: usize) -> NodeId {
600        let kw_tok = self.peek().clone();
601        debug_assert!(matches!(kw_tok.kind, TokenKind::If | TokenKind::Elif));
602        self.pos += 1;
603
604        let mut node = CstNode::new(CstNodeKind::Conditional, kw_tok.span);
605        node.leading_trivia = kw_tok.leading_trivia;
606        node.condition_keyword = Some(kw_tok.span);
607        node.indent = cond_indent;
608
609        // Consume the condition expression tokens until we hit a newline.
610        // The condition is everything on the same line after the keyword.
611        // Note: consume_condition_expr stops before a `{` Value token.
612        let expr_start = self.find_condition_expr_start();
613        let expr_end = self.consume_condition_expr();
614
615        if expr_start < expr_end {
616            node.condition_expr = Some(Span::new(expr_start, expr_end));
617        }
618
619        // Check for braced layout on the same line: `{` Value token remaining.
620        let is_braced_same_line = !self.at_eof()
621            && self.peek().kind == TokenKind::Value
622            && !self
623                .peek()
624                .leading_trivia
625                .iter()
626                .any(|t| t.kind == TriviaKind::Newline)
627            && self.peek().span.slice(&self.source).trim() == "{";
628
629        let mut content_end = if expr_end > kw_tok.span.end {
630            expr_end
631        } else {
632            kw_tok.span.end
633        };
634
635        if is_braced_same_line {
636            // Consume the `{` token — include it in the content span.
637            content_end = self.peek().span.end;
638            self.pos += 1;
639        }
640
641        // Check for braced layout on the next line: a Value token `{` on a new line.
642        let is_braced_next_line = !is_braced_same_line
643            && !self.at_eof()
644            && self.peek().kind == TokenKind::Value
645            && self.peek().span.slice(&self.source).trim() == "{";
646
647        if is_braced_next_line {
648            content_end = self.peek().span.end;
649            self.pos += 1;
650        }
651
652        let is_braced = is_braced_same_line || is_braced_next_line;
653
654        node.content_span = Span::new(kw_tok.span.start, content_end);
655
656        // Collect trailing newline.
657        self.collect_trailing_newline(&mut node);
658
659        // Allocate the conditional node.
660        let cond_id = self.alloc_node(node);
661
662        if is_braced {
663            // Parse braced then-block: consume children until `}`.
664            self.parse_braced_body(cond_id);
665        } else {
666            // Parse then-block: items indented more than the conditional.
667            self.parse_body(cond_id, cond_indent, false);
668        }
669
670        // Check for `else` at the same indent level.
671        if !self.at_eof()
672            && self.peek().kind == TokenKind::Else
673            && self.peek().indent == cond_indent
674        {
675            let else_id = self.parse_else_block(cond_indent);
676            self.add_child(cond_id, else_id);
677        }
678
679        // Update span.
680        let body_end = self.last_child_end(cond_id);
681        self.nodes[cond_id.0].span = Span::new(self.nodes[cond_id.0].span.start, body_end);
682
683        cond_id
684    }
685
686    /// Parse an `else` block.
687    fn parse_else_block(&mut self, cond_indent: usize) -> NodeId {
688        let else_tok = self.peek().clone();
689        debug_assert_eq!(else_tok.kind, TokenKind::Else);
690        self.pos += 1;
691
692        // Check for braced else: `else {` — a Value token with `{` on the
693        // same line as the `else` keyword.
694        let is_braced = !self.at_eof()
695            && self.peek().kind == TokenKind::Value
696            && !self
697                .peek()
698                .leading_trivia
699                .iter()
700                .any(|t| t.kind == TriviaKind::Newline)
701            && self.peek().span.slice(&self.source).trim() == "{";
702
703        let content_end = if is_braced {
704            // Consume the `{` token — include it in the else block's content span.
705            let brace_tok = self.peek().clone();
706            let _ = brace_tok; // consumed below
707            let end = self.peek().span.end;
708            self.pos += 1;
709            end
710        } else {
711            else_tok.span.end
712        };
713
714        let mut node = CstNode::new(CstNodeKind::ElseBlock, else_tok.span);
715        node.leading_trivia = else_tok.leading_trivia;
716        node.content_span = Span::new(else_tok.span.start, content_end);
717        node.indent = else_tok.indent;
718
719        // Trailing newline.
720        self.collect_trailing_newline(&mut node);
721
722        // Allocate.
723        let else_id = self.alloc_node(node);
724
725        if is_braced {
726            // Parse braced else body: consume children until we see `}`.
727            self.parse_braced_body(else_id);
728        } else {
729            // Parse else body using indentation.
730            self.parse_body(else_id, cond_indent, false);
731        }
732
733        // Update span.
734        let body_end = self.last_child_end(else_id);
735        self.nodes[else_id.0].span = Span::new(self.nodes[else_id.0].span.start, body_end);
736
737        else_id
738    }
739
740    /// Parse a braced block body (`{ ... }`). Consumes children until we see
741    /// a Value token that is just `}`. The closing `}` is consumed and added
742    /// as a ValueLine child so it appears in the rendered output.
743    fn parse_braced_body(&mut self, parent_id: NodeId) {
744        loop {
745            if self.at_eof() {
746                break;
747            }
748
749            let tok = self.peek();
750
751            // Check for the closing `}`.
752            if tok.kind == TokenKind::Value && tok.span.slice(&self.source).trim() == "}" {
753                let node_id = self.parse_value_line();
754                self.add_child(parent_id, node_id);
755                break;
756            }
757
758            match tok.kind {
759                TokenKind::Comment => {
760                    let node_id = self.parse_comment();
761                    self.add_child(parent_id, node_id);
762                }
763                TokenKind::SectionHeader => {
764                    let node_id = self.parse_section();
765                    self.add_child(parent_id, node_id);
766                }
767                TokenKind::If | TokenKind::Elif => {
768                    let indent = tok.indent;
769                    let node_id = self.parse_conditional(indent);
770                    self.add_child(parent_id, node_id);
771                }
772                TokenKind::Else => {
773                    // Standalone else inside braced block — treat as error but
774                    // keep going.
775                    break;
776                }
777                TokenKind::FieldName => {
778                    let is_import = {
779                        let name_text = tok.span.slice(&self.source);
780                        name_text.eq_ignore_ascii_case("import")
781                    };
782                    if is_import {
783                        let node_id = self.parse_import();
784                        self.add_child(parent_id, node_id);
785                    } else {
786                        let indent = tok.indent;
787                        let node_id = self.parse_field(indent);
788                        self.add_child(parent_id, node_id);
789                    }
790                }
791                TokenKind::Value => {
792                    let node_id = self.parse_value_line();
793                    self.add_child(parent_id, node_id);
794                }
795                TokenKind::Eof => break,
796                _ => {
797                    let span = tok.span;
798                    let kind = tok.kind;
799                    self.emit_error(span, format!("unexpected token in braced block: {kind:?}"));
800                    self.pos += 1;
801                }
802            }
803        }
804    }
805
806    // -- Condition expression helpers ----------------------------------------
807
808    /// Find the byte offset where the condition expression starts (skipping
809    /// whitespace trivia tokens after the keyword).
810    fn find_condition_expr_start(&self) -> usize {
811        if self.at_eof() {
812            return self.source.len();
813        }
814        let tok = self.peek();
815        // The expression starts at the first non-trivia content after the keyword.
816        // The token's leading trivia includes whitespace, so the expr starts at
817        // the token's span start.
818        tok.span.start
819    }
820
821    /// Consume condition expression tokens (everything on the same line after
822    /// the `if` / `elif` keyword). Returns the end byte offset.
823    ///
824    /// Stops before consuming a trailing `{` Value token so that the caller
825    /// can detect braced layout.
826    fn consume_condition_expr(&mut self) -> usize {
827        let mut end = 0;
828        // Consume tokens until we see one whose leading trivia contains a
829        // newline (meaning it's on the next line) or we hit EOF.
830        loop {
831            if self.at_eof() {
832                break;
833            }
834            let tok = self.peek();
835            // Check if this token is on a new line.
836            let has_newline = tok
837                .leading_trivia
838                .iter()
839                .any(|t| t.kind == TriviaKind::Newline);
840            if has_newline {
841                break;
842            }
843            // Stop before a `{` Value token — it signals braced layout.
844            if tok.kind == TokenKind::Value && tok.span.slice(&self.source).trim() == "{" {
845                break;
846            }
847            // Token is on the same line — it's part of the condition expr.
848            end = tok.span.end;
849            self.pos += 1;
850        }
851        end
852    }
853
854    // -- Trivia helpers -----------------------------------------------------
855
856    /// If the next pending trivia (on the next token) starts with a Newline,
857    /// steal it and add to this node's trailing trivia. This handles the
858    /// common case where a node's line ends with `\n` and that newline should
859    /// belong to this node, not the next one.
860    fn collect_trailing_newline(&mut self, node: &mut CstNode) {
861        if self.pos >= self.tokens.len() {
862            return;
863        }
864        let next_tok = &mut self.tokens[self.pos];
865        // Collect leading newlines (and any whitespace before them that's
866        // actually the remainder of the current line — but our lexer puts
867        // newlines at the start of the next token's trivia).
868        let mut stolen = Vec::new();
869        let mut remaining = Vec::new();
870        let mut found_newline = false;
871
872        for tp in next_tok.leading_trivia.drain(..) {
873            if !found_newline && tp.kind == TriviaKind::Newline {
874                stolen.push(tp);
875                found_newline = true;
876            } else if found_newline {
877                remaining.push(tp);
878            } else {
879                // Trivia before the newline (shouldn't normally happen since
880                // newlines are first in the trivia of the next token, but
881                // just in case).
882                stolen.push(tp);
883            }
884        }
885
886        next_tok.leading_trivia = remaining;
887        node.trailing_trivia.extend(stolen);
888    }
889
890    /// Update a node's `span` to cover its leading trivia, content, and
891    /// trailing trivia.
892    fn finalize_node_span(&self, node: &mut CstNode) {
893        let start = node
894            .leading_trivia
895            .first()
896            .map(|t| t.span.start)
897            .unwrap_or(node.content_span.start);
898        let end = node
899            .trailing_trivia
900            .last()
901            .map(|t| t.span.end)
902            .unwrap_or(node.content_span.end);
903        node.span = Span::new(start, end);
904    }
905
906    /// Get the end byte offset of the last child of a node (or the node's
907    /// own span end if it has no children).
908    fn last_child_end(&self, node_id: NodeId) -> usize {
909        let node = &self.nodes[node_id.0];
910        if let Some(&last_child) = node.children.last() {
911            self.nodes[last_child.0].span.end
912        } else {
913            node.span.end
914        }
915    }
916}
917
918/// Parse a `.cabal` source string into a CST with diagnostics.
919pub fn parse(source: &str) -> ParseResult {
920    let tokens = tokenize(source);
921    let parser = Parser::new(source.to_owned(), tokens);
922    parser.parse()
923}
924
925// ---------------------------------------------------------------------------
926// Tests
927// ---------------------------------------------------------------------------
928
929#[cfg(test)]
930mod tests {
931    use super::*;
932
933    /// Helper: parse and verify round-trip.
934    fn assert_round_trip(source: &str) {
935        let result = parse(source);
936        let rendered = result.cst.render();
937        assert_eq!(
938            rendered, source,
939            "\n--- EXPECTED ---\n{source}\n--- GOT ---\n{rendered}\n"
940        );
941    }
942
943    // -- Round-trip tests ---------------------------------------------------
944
945    #[test]
946    fn round_trip_minimal() {
947        assert_round_trip("cabal-version: 3.0\nname: foo\nversion: 0.1.0.0\n");
948    }
949
950    #[test]
951    fn round_trip_with_comments() {
952        assert_round_trip(
953            "-- Top comment\ncabal-version: 3.0\nname: foo\n-- A comment\nversion: 0.1.0.0\n",
954        );
955    }
956
957    #[test]
958    fn round_trip_with_blank_lines() {
959        assert_round_trip("cabal-version: 3.0\nname: foo\n\nversion: 0.1.0.0\n");
960    }
961
962    #[test]
963    fn round_trip_section() {
964        let src = "\
965cabal-version: 3.0
966name: foo
967version: 0.1.0.0
968
969library
970  exposed-modules: Foo
971  build-depends: base >=4.14
972";
973        assert_round_trip(src);
974    }
975
976    #[test]
977    fn round_trip_section_with_arg() {
978        let src = "\
979cabal-version: 3.0
980name: foo
981version: 0.1.0.0
982
983executable my-exe
984  main-is: Main.hs
985  build-depends: base
986";
987        assert_round_trip(src);
988    }
989
990    #[test]
991    fn round_trip_conditional() {
992        let src = "\
993cabal-version: 3.0
994name: foo
995version: 0.1.0.0
996
997library
998  build-depends: base
999  if flag(dev)
1000    ghc-options: -O0
1001  else
1002    ghc-options: -O2
1003";
1004        assert_round_trip(src);
1005    }
1006
1007    #[test]
1008    fn round_trip_common_stanza() {
1009        let src = "\
1010cabal-version: 3.0
1011name: foo
1012version: 0.1.0.0
1013
1014common warnings
1015  ghc-options: -Wall
1016
1017library
1018  import: warnings
1019  exposed-modules: Foo
1020";
1021        assert_round_trip(src);
1022    }
1023
1024    #[test]
1025    fn round_trip_multiline_field() {
1026        let src = "\
1027cabal-version: 3.0
1028name: foo
1029version: 0.1.0.0
1030
1031library
1032  exposed-modules:
1033    Foo
1034    Bar
1035    Baz
1036";
1037        assert_round_trip(src);
1038    }
1039
1040    #[test]
1041    fn round_trip_leading_comma_deps() {
1042        let src = "\
1043cabal-version: 3.0
1044name: foo
1045version: 0.1.0.0
1046
1047library
1048  build-depends:
1049      base >=4.14
1050    , text >=2.0
1051    , aeson ^>=2.2
1052";
1053        assert_round_trip(src);
1054    }
1055
1056    #[test]
1057    fn round_trip_trailing_comma_deps() {
1058        let src = "\
1059cabal-version: 3.0
1060name: foo
1061version: 0.1.0.0
1062
1063library
1064  build-depends:
1065    base >=4.14,
1066    text >=2.0,
1067    aeson ^>=2.2
1068";
1069        assert_round_trip(src);
1070    }
1071
1072    #[test]
1073    fn round_trip_single_line_deps() {
1074        let src = "\
1075cabal-version: 3.0
1076name: foo
1077version: 0.1.0.0
1078
1079library
1080  build-depends: base >=4.14, text >=2.0, aeson ^>=2.2
1081";
1082        assert_round_trip(src);
1083    }
1084
1085    #[test]
1086    fn round_trip_multiple_sections() {
1087        let src = "\
1088cabal-version: 3.0
1089name: foo
1090version: 0.1.0.0
1091
1092library
1093  exposed-modules: Foo
1094  build-depends: base
1095
1096executable my-exe
1097  main-is: Main.hs
1098  build-depends: base, foo
1099
1100test-suite tests
1101  type: exitcode-stdio-1.0
1102  main-is: Main.hs
1103  build-depends: base, foo, tasty
1104";
1105        assert_round_trip(src);
1106    }
1107
1108    #[test]
1109    fn round_trip_complex_conditional() {
1110        let src = "\
1111cabal-version: 3.0
1112name: foo
1113version: 0.1.0.0
1114
1115library
1116  build-depends: base
1117  if flag(dev) && !os(windows)
1118    ghc-options: -O0
1119";
1120        assert_round_trip(src);
1121    }
1122
1123    #[test]
1124    fn round_trip_no_trailing_newline() {
1125        assert_round_trip("cabal-version: 3.0\nname: foo\nversion: 0.1.0.0");
1126    }
1127
1128    #[test]
1129    fn round_trip_field_extra_spaces() {
1130        assert_round_trip("name:    foo\nversion:    0.1.0.0\n");
1131    }
1132
1133    #[test]
1134    fn round_trip_comment_in_section() {
1135        let src = "\
1136library
1137  -- A comment in the library
1138  exposed-modules: Foo
1139";
1140        assert_round_trip(src);
1141    }
1142
1143    #[test]
1144    fn round_trip_blank_line_between_sections() {
1145        let src = "\
1146library
1147  exposed-modules: Foo
1148
1149executable bar
1150  main-is: Main.hs
1151";
1152        assert_round_trip(src);
1153    }
1154
1155    #[test]
1156    fn round_trip_flag_section() {
1157        let src = "\
1158cabal-version: 3.0
1159name: foo
1160version: 0.1.0.0
1161
1162flag dev
1163  description: Development mode
1164  default: False
1165  manual: True
1166";
1167        assert_round_trip(src);
1168    }
1169
1170    #[test]
1171    fn round_trip_source_repository() {
1172        let src = "\
1173cabal-version: 3.0
1174name: foo
1175version: 0.1.0.0
1176
1177source-repository head
1178  type: git
1179  location: https://github.com/example/foo
1180";
1181        assert_round_trip(src);
1182    }
1183
1184    // -- Structure tests ----------------------------------------------------
1185
1186    #[test]
1187    fn parse_structure_simple() {
1188        let src = "cabal-version: 3.0\nname: foo\n";
1189        let result = parse(src);
1190        let root = result.cst.node(result.cst.root);
1191        // Root should have 2 field children.
1192        assert_eq!(root.children.len(), 2);
1193        for &child_id in &root.children {
1194            assert_eq!(result.cst.node(child_id).kind, CstNodeKind::Field);
1195        }
1196    }
1197
1198    #[test]
1199    fn parse_structure_section_with_children() {
1200        let src = "\
1201library
1202  exposed-modules: Foo
1203  build-depends: base
1204";
1205        let result = parse(src);
1206        let root = result.cst.node(result.cst.root);
1207        assert_eq!(root.children.len(), 1);
1208        let section = result.cst.node(root.children[0]);
1209        assert_eq!(section.kind, CstNodeKind::Section);
1210        assert_eq!(section.section_keyword.unwrap().slice(&src), "library");
1211        assert!(section.section_arg.is_none());
1212        assert_eq!(section.children.len(), 2);
1213    }
1214
1215    #[test]
1216    fn parse_structure_section_with_arg() {
1217        let src = "executable my-exe\n  main-is: Main.hs\n";
1218        let result = parse(src);
1219        let root = result.cst.node(result.cst.root);
1220        let section = result.cst.node(root.children[0]);
1221        assert_eq!(section.kind, CstNodeKind::Section);
1222        assert_eq!(section.section_keyword.unwrap().slice(&src), "executable");
1223        assert_eq!(section.section_arg.unwrap().slice(&src), "my-exe");
1224    }
1225
1226    #[test]
1227    fn parse_structure_conditional() {
1228        let src = "\
1229library
1230  build-depends: base
1231  if flag(dev)
1232    ghc-options: -O0
1233  else
1234    ghc-options: -O2
1235";
1236        let result = parse(src);
1237        let root = result.cst.node(result.cst.root);
1238        let section = result.cst.node(root.children[0]);
1239        // Section children: field (build-depends) + conditional.
1240        assert_eq!(section.children.len(), 2);
1241        let cond = result.cst.node(section.children[1]);
1242        assert_eq!(cond.kind, CstNodeKind::Conditional);
1243        // Conditional children: then-block field + else block.
1244        assert!(cond.children.len() >= 2);
1245        // Last child should be ElseBlock.
1246        let last = result.cst.node(*cond.children.last().unwrap());
1247        assert_eq!(last.kind, CstNodeKind::ElseBlock);
1248    }
1249
1250    #[test]
1251    fn parse_structure_import() {
1252        let src = "\
1253library
1254  import: warnings
1255  exposed-modules: Foo
1256";
1257        let result = parse(src);
1258        let root = result.cst.node(result.cst.root);
1259        let section = result.cst.node(root.children[0]);
1260        let import = result.cst.node(section.children[0]);
1261        assert_eq!(import.kind, CstNodeKind::Import);
1262        assert_eq!(import.field_name.unwrap().slice(&src), "import");
1263        assert_eq!(import.field_value.unwrap().slice(&src), "warnings");
1264    }
1265
1266    #[test]
1267    fn parse_structure_multiline_field() {
1268        let src = "\
1269library
1270  exposed-modules:
1271    Foo
1272    Bar
1273";
1274        let result = parse(src);
1275        let root = result.cst.node(result.cst.root);
1276        let section = result.cst.node(root.children[0]);
1277        let field = result.cst.node(section.children[0]);
1278        assert_eq!(field.kind, CstNodeKind::Field);
1279        assert_eq!(field.field_name.unwrap().slice(&src), "exposed-modules");
1280        // Should have 2 ValueLine children.
1281        assert_eq!(field.children.len(), 2);
1282        for &child_id in &field.children {
1283            assert_eq!(result.cst.node(child_id).kind, CstNodeKind::ValueLine);
1284        }
1285    }
1286
1287    #[test]
1288    fn parse_no_diagnostics_for_valid_file() {
1289        let src = "\
1290cabal-version: 3.0
1291name: foo
1292version: 0.1.0.0
1293
1294common warnings
1295  ghc-options: -Wall
1296
1297library
1298  import: warnings
1299  exposed-modules:
1300    Foo
1301    Bar
1302  build-depends:
1303    base >=4.14
1304  if flag(dev)
1305    ghc-options: -O0
1306  else
1307    ghc-options: -O2
1308
1309executable my-exe
1310  import: warnings
1311  main-is: Main.hs
1312  build-depends: base, foo
1313";
1314        let result = parse(src);
1315        assert!(
1316            result.diagnostics.is_empty(),
1317            "expected no diagnostics, got: {:?}",
1318            result.diagnostics
1319        );
1320    }
1321
1322    // -- Error recovery tests -----------------------------------------------
1323
1324    #[test]
1325    fn parse_does_not_panic_on_empty_input() {
1326        let result = parse("");
1327        assert!(result.cst.render().is_empty());
1328    }
1329
1330    #[test]
1331    fn parse_does_not_panic_on_blank_lines_only() {
1332        let src = "\n\n\n";
1333        let result = parse(src);
1334        assert_eq!(result.cst.render(), src);
1335    }
1336
1337    #[test]
1338    fn parse_does_not_panic_on_comments_only() {
1339        let src = "-- just a comment\n-- another one\n";
1340        let result = parse(src);
1341        assert_eq!(result.cst.render(), src);
1342    }
1343
1344    // -- Field name/value span tests ----------------------------------------
1345
1346    #[test]
1347    fn field_name_and_value_spans() {
1348        let src = "name: foo\n";
1349        let result = parse(src);
1350        let root = result.cst.node(result.cst.root);
1351        let field = result.cst.node(root.children[0]);
1352        assert_eq!(field.field_name.unwrap().slice(&src), "name");
1353        assert_eq!(field.field_value.unwrap().slice(&src), "foo");
1354    }
1355
1356    #[test]
1357    fn field_no_value() {
1358        let src = "build-depends:\n";
1359        let result = parse(src);
1360        let root = result.cst.node(result.cst.root);
1361        let field = result.cst.node(root.children[0]);
1362        assert_eq!(field.field_name.unwrap().slice(&src), "build-depends");
1363        assert!(field.field_value.is_none());
1364    }
1365
1366    // -- Large round-trip tests ---------------------------------------------
1367
1368    #[test]
1369    fn round_trip_realistic_file() {
1370        let src = "\
1371cabal-version:   3.0
1372name:            my-project
1373version:         0.1.0.0
1374synopsis:        A sample project
1375description:
1376  This is a longer description
1377  that spans multiple lines.
1378license:         MIT
1379license-file:    LICENSE
1380author:          Test Author
1381maintainer:      test@example.com
1382category:        Development
1383build-type:      Simple
1384
1385common warnings
1386  ghc-options: -Wall -Wcompat -Widentities
1387               -Wincomplete-record-updates
1388               -Wincomplete-uni-patterns
1389               -Wmissing-deriving-strategies
1390               -Wredundant-constraints
1391
1392flag dev
1393  description: Enable development mode
1394  default:     False
1395  manual:      True
1396
1397library
1398  import:           warnings
1399  exposed-modules:
1400    MyProject
1401    MyProject.Internal
1402    MyProject.Types
1403  other-modules:
1404    MyProject.Utils
1405  build-depends:
1406      base >=4.14 && <5
1407    , aeson ^>=2.2
1408    , text >=2.0 && <2.2
1409    , containers ^>=0.6
1410  hs-source-dirs:   src
1411  default-language: GHC2021
1412  default-extensions:
1413    OverloadedStrings
1414    DerivingStrategies
1415
1416  if flag(dev)
1417    ghc-options: -O0
1418  else
1419    ghc-options: -O2
1420
1421executable my-project
1422  import:           warnings
1423  main-is:          Main.hs
1424  other-modules:    Paths_my_project
1425  build-depends:
1426      base
1427    , my-project
1428    , optparse-applicative ^>=0.18
1429  hs-source-dirs:   app
1430  default-language: GHC2021
1431
1432test-suite my-project-test
1433  import:           warnings
1434  type:             exitcode-stdio-1.0
1435  main-is:          Main.hs
1436  other-modules:
1437    Test.MyProject
1438    Test.MyProject.Types
1439  build-depends:
1440      base
1441    , my-project
1442    , tasty ^>=1.5
1443    , tasty-hunit ^>=0.10
1444  hs-source-dirs:   test
1445  default-language: GHC2021
1446
1447source-repository head
1448  type:     git
1449  location: https://github.com/example/my-project
1450";
1451        assert_round_trip(src);
1452    }
1453
1454    #[test]
1455    fn round_trip_nested_conditionals() {
1456        let src = "\
1457library
1458  build-depends: base
1459  if os(linux)
1460    if flag(dbus)
1461      build-depends: dbus
1462      cpp-options: -DDBUS
1463  if os(windows)
1464    build-depends: Win32
1465";
1466        assert_round_trip(src);
1467    }
1468
1469    #[test]
1470    fn round_trip_benchmark_section() {
1471        let src = "\
1472benchmark my-bench
1473  type: exitcode-stdio-1.0
1474  main-is: Main.hs
1475  build-depends: base, criterion
1476  hs-source-dirs: bench
1477";
1478        assert_round_trip(src);
1479    }
1480
1481    // -- Braced layout tests --------------------------------------------------
1482
1483    #[test]
1484    fn round_trip_braced_section() {
1485        assert_round_trip("library {\n  exposed-modules: Foo\n  build-depends: base\n}\n");
1486    }
1487
1488    #[test]
1489    fn round_trip_braced_executable() {
1490        assert_round_trip("executable foo {\n  main-is: Main.hs\n}\n");
1491    }
1492
1493    #[test]
1494    fn round_trip_braced_if() {
1495        assert_round_trip(
1496            "library\n  build-depends: base\n  if flag(dev) {\n    ghc-options: -O0\n  }\n",
1497        );
1498    }
1499
1500    #[test]
1501    fn round_trip_braced_if_else() {
1502        assert_round_trip(
1503            "library\n  if flag(dev) {\n    ghc-options: -O0\n  } else {\n    ghc-options: -O2\n  }\n",
1504        );
1505    }
1506}