Skip to main content

cabalist_parser/
cst.rs

1//! Concrete Syntax Tree (CST) for `.cabal` files.
2//!
3//! The CST is a flat arena of nodes that mirrors the exact structure of the
4//! `.cabal` file, preserving all formatting details: whitespace, comments,
5//! blank lines, indentation style. The `render()` method reproduces the
6//! original source byte-for-byte when no edits have been made.
7
8use crate::lexer::TriviaPiece;
9use crate::span::{NodeId, Span};
10
11/// The kind of a CST node.
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum CstNodeKind {
14    /// The root container. Its children are top-level fields, sections,
15    /// comments, and blank lines.
16    Root,
17    /// A field: `field-name: value` (possibly with multi-line continuation).
18    Field,
19    /// A section: `library`, `executable foo`, `common warnings`, etc.
20    /// Children are the section body (fields, conditionals, imports, comments).
21    Section,
22    /// A conditional block: `if condition` + body, with optional `else` block.
23    Conditional,
24    /// An `import: stanza-name` directive inside a section.
25    Import,
26    /// A standalone comment line.
27    Comment,
28    /// A preserved blank line between stanzas or fields.
29    BlankLine,
30    /// A value continuation line that is a direct child of its parent field
31    /// or section. Used to preserve multi-line field values.
32    ValueLine,
33    /// An `else` block attached to a `Conditional` node.
34    ElseBlock,
35}
36
37/// A single node in the CST arena.
38#[derive(Debug, Clone)]
39pub struct CstNode {
40    /// What kind of syntax element this node represents.
41    pub kind: CstNodeKind,
42
43    /// Full byte span of this node in the source, including leading trivia.
44    pub span: Span,
45
46    /// Span of just the meaningful content (excluding leading/trailing trivia
47    /// that belongs to this node).
48    pub content_span: Span,
49
50    /// Children of this node (indices into the arena).
51    pub children: Vec<NodeId>,
52
53    /// Parent node (None only for the Root).
54    pub parent: Option<NodeId>,
55
56    // -- Field-specific spans --
57    /// For `Field` / `Import` nodes: span of the field name.
58    pub field_name: Option<Span>,
59
60    /// For `Field` / `Import` nodes: span of the field value (first line
61    /// only; continuation lines are child `ValueLine` nodes).
62    pub field_value: Option<Span>,
63
64    // -- Section-specific spans --
65    /// For `Section` nodes: span of the section keyword.
66    pub section_keyword: Option<Span>,
67
68    /// For `Section` nodes: span of the section argument (e.g. `my-exe`).
69    pub section_arg: Option<Span>,
70
71    // -- Conditional-specific spans --
72    /// For `Conditional` nodes: span of the keyword (`if` / `elif`).
73    pub condition_keyword: Option<Span>,
74
75    /// For `Conditional` nodes: span of the condition expression text.
76    pub condition_expr: Option<Span>,
77
78    /// Leading trivia pieces (whitespace, newlines, comments) that precede
79    /// this node's content.
80    pub leading_trivia: Vec<TriviaPiece>,
81
82    /// Trailing trivia pieces (typically a newline at the end of the line).
83    pub trailing_trivia: Vec<TriviaPiece>,
84
85    /// The indentation level (visual column) of this node.
86    pub indent: usize,
87}
88
89impl CstNode {
90    /// Create a new node with the given kind, defaulting all optional fields
91    /// to `None` / empty.
92    pub fn new(kind: CstNodeKind, span: Span) -> Self {
93        Self {
94            kind,
95            span,
96            content_span: span,
97            children: Vec::new(),
98            parent: None,
99            field_name: None,
100            field_value: None,
101            section_keyword: None,
102            section_arg: None,
103            condition_keyword: None,
104            condition_expr: None,
105            leading_trivia: Vec::new(),
106            trailing_trivia: Vec::new(),
107            indent: 0,
108        }
109    }
110}
111
112/// The concrete syntax tree for a `.cabal` file.
113///
114/// All [`Span`]s reference byte offsets into [`source`](CabalCst::source).
115#[derive(Debug, Clone)]
116pub struct CabalCst {
117    /// The original source text (owned).
118    pub source: String,
119
120    /// Flat arena of all CST nodes.
121    pub nodes: Vec<CstNode>,
122
123    /// Index of the root node (always `NodeId(0)`).
124    pub root: NodeId,
125}
126
127impl CabalCst {
128    /// Render the CST back to text. When no edits have been made, this must
129    /// produce byte-identical output to the original source.
130    pub fn render(&self) -> String {
131        // Strategy: walk every node in the tree in source order and emit
132        // their trivia + content spans. Since the arena stores nodes in
133        // source order (they are added during a left-to-right parse), we
134        // can do a depth-first traversal from the root and collect spans.
135        //
136        // However, the simplest correct approach for an un-edited CST is
137        // to just return the source. For an edited CST we need the full
138        // render. We implement the full render so it works in both cases.
139        let mut out = String::with_capacity(self.source.len());
140        self.render_node(self.root, &mut out);
141        out
142    }
143
144    /// Recursively render a single node and its descendants.
145    fn render_node(&self, node_id: NodeId, out: &mut String) {
146        let node = &self.nodes[node_id.0];
147
148        // Emit leading trivia.
149        for tp in &node.leading_trivia {
150            out.push_str(tp.span.slice(&self.source));
151        }
152
153        // Emit the node's own content based on kind.
154        match node.kind {
155            CstNodeKind::Root => {
156                // Root has no content of its own; just render children.
157                for &child_id in &node.children {
158                    self.render_node(child_id, out);
159                }
160            }
161
162            CstNodeKind::Field => {
163                // field name
164                if let Some(ref name_span) = node.field_name {
165                    out.push_str(name_span.slice(&self.source));
166                }
167                // The colon and spacing between name, colon, and value are
168                // captured in the content_span. We emit the content_span
169                // region that isn't the field_name or field_value.
170                //
171                // Actually, we store the full line content between
172                // field_name.end and field_value.start (colon + spacing) as
173                // part of content_span. Let's emit the "middle" region.
174                let name_end = node
175                    .field_name
176                    .map(|s| s.end)
177                    .unwrap_or(node.content_span.start);
178                let value_start = node
179                    .field_value
180                    .map(|s| s.start)
181                    .unwrap_or(node.content_span.end);
182                // Middle: everything between field name and value.
183                if name_end < value_start {
184                    out.push_str(&self.source[name_end..value_start]);
185                }
186                // field value (first line)
187                if let Some(ref val_span) = node.field_value {
188                    out.push_str(val_span.slice(&self.source));
189                }
190                // Trailing trivia (newline).
191                for tp in &node.trailing_trivia {
192                    out.push_str(tp.span.slice(&self.source));
193                }
194                // Children: continuation ValueLine nodes.
195                for &child_id in &node.children {
196                    self.render_node(child_id, out);
197                }
198            }
199
200            CstNodeKind::Import => {
201                // Same structure as Field.
202                if let Some(ref name_span) = node.field_name {
203                    out.push_str(name_span.slice(&self.source));
204                }
205                let name_end = node
206                    .field_name
207                    .map(|s| s.end)
208                    .unwrap_or(node.content_span.start);
209                let value_start = node
210                    .field_value
211                    .map(|s| s.start)
212                    .unwrap_or(node.content_span.end);
213                if name_end < value_start {
214                    out.push_str(&self.source[name_end..value_start]);
215                }
216                if let Some(ref val_span) = node.field_value {
217                    out.push_str(val_span.slice(&self.source));
218                }
219                for tp in &node.trailing_trivia {
220                    out.push_str(tp.span.slice(&self.source));
221                }
222            }
223
224            CstNodeKind::Section => {
225                // Section keyword.
226                if let Some(ref kw_span) = node.section_keyword {
227                    out.push_str(kw_span.slice(&self.source));
228                }
229                // Spacing + arg.
230                let kw_end = node
231                    .section_keyword
232                    .map(|s| s.end)
233                    .unwrap_or(node.content_span.start);
234                let arg_start = node.section_arg.map(|s| s.start);
235                let arg_end = node.section_arg.map(|s| s.end);
236                match (arg_start, arg_end) {
237                    (Some(astart), Some(aend)) => {
238                        // Spacing between keyword and arg.
239                        if kw_end < astart {
240                            out.push_str(&self.source[kw_end..astart]);
241                        }
242                        out.push_str(&self.source[astart..aend]);
243                        // Anything between arg end and content_span end
244                        // (trailing whitespace on the header line).
245                        if aend < node.content_span.end {
246                            out.push_str(&self.source[aend..node.content_span.end]);
247                        }
248                    }
249                    _ => {
250                        // No arg: emit any trailing content.
251                        if kw_end < node.content_span.end {
252                            out.push_str(&self.source[kw_end..node.content_span.end]);
253                        }
254                    }
255                }
256                // Trailing trivia (newline after header).
257                for tp in &node.trailing_trivia {
258                    out.push_str(tp.span.slice(&self.source));
259                }
260                // Section body children.
261                for &child_id in &node.children {
262                    self.render_node(child_id, out);
263                }
264            }
265
266            CstNodeKind::Conditional => {
267                // Keyword (if / elif).
268                if let Some(ref kw_span) = node.condition_keyword {
269                    out.push_str(kw_span.slice(&self.source));
270                }
271                let kw_end = node
272                    .condition_keyword
273                    .map(|s| s.end)
274                    .unwrap_or(node.content_span.start);
275                // Condition expression.
276                if let Some(ref expr_span) = node.condition_expr {
277                    if kw_end < expr_span.start {
278                        out.push_str(&self.source[kw_end..expr_span.start]);
279                    }
280                    out.push_str(expr_span.slice(&self.source));
281                    if expr_span.end < node.content_span.end {
282                        out.push_str(&self.source[expr_span.end..node.content_span.end]);
283                    }
284                } else if kw_end < node.content_span.end {
285                    out.push_str(&self.source[kw_end..node.content_span.end]);
286                }
287                // Trailing trivia.
288                for tp in &node.trailing_trivia {
289                    out.push_str(tp.span.slice(&self.source));
290                }
291                // Children (then-block fields + optional ElseBlock).
292                for &child_id in &node.children {
293                    self.render_node(child_id, out);
294                }
295            }
296
297            CstNodeKind::ElseBlock => {
298                // The `else` keyword line.
299                out.push_str(node.content_span.slice(&self.source));
300                for tp in &node.trailing_trivia {
301                    out.push_str(tp.span.slice(&self.source));
302                }
303                // Else body children.
304                for &child_id in &node.children {
305                    self.render_node(child_id, out);
306                }
307            }
308
309            CstNodeKind::Comment => {
310                out.push_str(node.content_span.slice(&self.source));
311                for tp in &node.trailing_trivia {
312                    out.push_str(tp.span.slice(&self.source));
313                }
314            }
315
316            CstNodeKind::BlankLine => {
317                out.push_str(node.content_span.slice(&self.source));
318                for tp in &node.trailing_trivia {
319                    out.push_str(tp.span.slice(&self.source));
320                }
321            }
322
323            CstNodeKind::ValueLine => {
324                out.push_str(node.content_span.slice(&self.source));
325                for tp in &node.trailing_trivia {
326                    out.push_str(tp.span.slice(&self.source));
327                }
328            }
329        }
330    }
331
332    /// Number of nodes in the arena.
333    pub fn node_count(&self) -> usize {
334        self.nodes.len()
335    }
336
337    /// Get a reference to a node by its id.
338    pub fn node(&self, id: NodeId) -> &CstNode {
339        &self.nodes[id.0]
340    }
341
342    /// Get a mutable reference to a node by its id.
343    pub fn node_mut(&mut self, id: NodeId) -> &mut CstNode {
344        &mut self.nodes[id.0]
345    }
346
347    /// Iterate over the direct children of a node.
348    pub fn children(&self, id: NodeId) -> &[NodeId] {
349        &self.nodes[id.0].children
350    }
351}