cabalist_parser/cst.rs
1//! Concrete Syntax Tree (CST) for `.cabal` files.
2//!
3//! The CST is a flat arena of nodes that mirrors the exact structure of the
4//! `.cabal` file, preserving all formatting details: whitespace, comments,
5//! blank lines, indentation style. The `render()` method reproduces the
6//! original source byte-for-byte when no edits have been made.
7
8use crate::lexer::TriviaPiece;
9use crate::span::{NodeId, Span};
10
11/// The kind of a CST node.
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum CstNodeKind {
14 /// The root container. Its children are top-level fields, sections,
15 /// comments, and blank lines.
16 Root,
17 /// A field: `field-name: value` (possibly with multi-line continuation).
18 Field,
19 /// A section: `library`, `executable foo`, `common warnings`, etc.
20 /// Children are the section body (fields, conditionals, imports, comments).
21 Section,
22 /// A conditional block: `if condition` + body, with optional `else` block.
23 Conditional,
24 /// An `import: stanza-name` directive inside a section.
25 Import,
26 /// A standalone comment line.
27 Comment,
28 /// A preserved blank line between stanzas or fields.
29 BlankLine,
30 /// A value continuation line that is a direct child of its parent field
31 /// or section. Used to preserve multi-line field values.
32 ValueLine,
33 /// An `else` block attached to a `Conditional` node.
34 ElseBlock,
35}
36
37/// A single node in the CST arena.
38#[derive(Debug, Clone)]
39pub struct CstNode {
40 /// What kind of syntax element this node represents.
41 pub kind: CstNodeKind,
42
43 /// Full byte span of this node in the source, including leading trivia.
44 pub span: Span,
45
46 /// Span of just the meaningful content (excluding leading/trailing trivia
47 /// that belongs to this node).
48 pub content_span: Span,
49
50 /// Children of this node (indices into the arena).
51 pub children: Vec<NodeId>,
52
53 /// Parent node (None only for the Root).
54 pub parent: Option<NodeId>,
55
56 // -- Field-specific spans --
57 /// For `Field` / `Import` nodes: span of the field name.
58 pub field_name: Option<Span>,
59
60 /// For `Field` / `Import` nodes: span of the field value (first line
61 /// only; continuation lines are child `ValueLine` nodes).
62 pub field_value: Option<Span>,
63
64 // -- Section-specific spans --
65 /// For `Section` nodes: span of the section keyword.
66 pub section_keyword: Option<Span>,
67
68 /// For `Section` nodes: span of the section argument (e.g. `my-exe`).
69 pub section_arg: Option<Span>,
70
71 // -- Conditional-specific spans --
72 /// For `Conditional` nodes: span of the keyword (`if` / `elif`).
73 pub condition_keyword: Option<Span>,
74
75 /// For `Conditional` nodes: span of the condition expression text.
76 pub condition_expr: Option<Span>,
77
78 /// Leading trivia pieces (whitespace, newlines, comments) that precede
79 /// this node's content.
80 pub leading_trivia: Vec<TriviaPiece>,
81
82 /// Trailing trivia pieces (typically a newline at the end of the line).
83 pub trailing_trivia: Vec<TriviaPiece>,
84
85 /// The indentation level (visual column) of this node.
86 pub indent: usize,
87}
88
89impl CstNode {
90 /// Create a new node with the given kind, defaulting all optional fields
91 /// to `None` / empty.
92 pub fn new(kind: CstNodeKind, span: Span) -> Self {
93 Self {
94 kind,
95 span,
96 content_span: span,
97 children: Vec::new(),
98 parent: None,
99 field_name: None,
100 field_value: None,
101 section_keyword: None,
102 section_arg: None,
103 condition_keyword: None,
104 condition_expr: None,
105 leading_trivia: Vec::new(),
106 trailing_trivia: Vec::new(),
107 indent: 0,
108 }
109 }
110}
111
112/// The concrete syntax tree for a `.cabal` file.
113///
114/// All [`Span`]s reference byte offsets into [`source`](CabalCst::source).
115#[derive(Debug, Clone)]
116pub struct CabalCst {
117 /// The original source text (owned).
118 pub source: String,
119
120 /// Flat arena of all CST nodes.
121 pub nodes: Vec<CstNode>,
122
123 /// Index of the root node (always `NodeId(0)`).
124 pub root: NodeId,
125}
126
127impl CabalCst {
128 /// Render the CST back to text. When no edits have been made, this must
129 /// produce byte-identical output to the original source.
130 pub fn render(&self) -> String {
131 // Strategy: walk every node in the tree in source order and emit
132 // their trivia + content spans. Since the arena stores nodes in
133 // source order (they are added during a left-to-right parse), we
134 // can do a depth-first traversal from the root and collect spans.
135 //
136 // However, the simplest correct approach for an un-edited CST is
137 // to just return the source. For an edited CST we need the full
138 // render. We implement the full render so it works in both cases.
139 let mut out = String::with_capacity(self.source.len());
140 self.render_node(self.root, &mut out);
141 out
142 }
143
144 /// Recursively render a single node and its descendants.
145 fn render_node(&self, node_id: NodeId, out: &mut String) {
146 let node = &self.nodes[node_id.0];
147
148 // Emit leading trivia.
149 for tp in &node.leading_trivia {
150 out.push_str(tp.span.slice(&self.source));
151 }
152
153 // Emit the node's own content based on kind.
154 match node.kind {
155 CstNodeKind::Root => {
156 // Root has no content of its own; just render children.
157 for &child_id in &node.children {
158 self.render_node(child_id, out);
159 }
160 }
161
162 CstNodeKind::Field => {
163 // field name
164 if let Some(ref name_span) = node.field_name {
165 out.push_str(name_span.slice(&self.source));
166 }
167 // The colon and spacing between name, colon, and value are
168 // captured in the content_span. We emit the content_span
169 // region that isn't the field_name or field_value.
170 //
171 // Actually, we store the full line content between
172 // field_name.end and field_value.start (colon + spacing) as
173 // part of content_span. Let's emit the "middle" region.
174 let name_end = node
175 .field_name
176 .map(|s| s.end)
177 .unwrap_or(node.content_span.start);
178 let value_start = node
179 .field_value
180 .map(|s| s.start)
181 .unwrap_or(node.content_span.end);
182 // Middle: everything between field name and value.
183 if name_end < value_start {
184 out.push_str(&self.source[name_end..value_start]);
185 }
186 // field value (first line)
187 if let Some(ref val_span) = node.field_value {
188 out.push_str(val_span.slice(&self.source));
189 }
190 // Trailing trivia (newline).
191 for tp in &node.trailing_trivia {
192 out.push_str(tp.span.slice(&self.source));
193 }
194 // Children: continuation ValueLine nodes.
195 for &child_id in &node.children {
196 self.render_node(child_id, out);
197 }
198 }
199
200 CstNodeKind::Import => {
201 // Same structure as Field.
202 if let Some(ref name_span) = node.field_name {
203 out.push_str(name_span.slice(&self.source));
204 }
205 let name_end = node
206 .field_name
207 .map(|s| s.end)
208 .unwrap_or(node.content_span.start);
209 let value_start = node
210 .field_value
211 .map(|s| s.start)
212 .unwrap_or(node.content_span.end);
213 if name_end < value_start {
214 out.push_str(&self.source[name_end..value_start]);
215 }
216 if let Some(ref val_span) = node.field_value {
217 out.push_str(val_span.slice(&self.source));
218 }
219 for tp in &node.trailing_trivia {
220 out.push_str(tp.span.slice(&self.source));
221 }
222 }
223
224 CstNodeKind::Section => {
225 // Section keyword.
226 if let Some(ref kw_span) = node.section_keyword {
227 out.push_str(kw_span.slice(&self.source));
228 }
229 // Spacing + arg.
230 let kw_end = node
231 .section_keyword
232 .map(|s| s.end)
233 .unwrap_or(node.content_span.start);
234 let arg_start = node.section_arg.map(|s| s.start);
235 let arg_end = node.section_arg.map(|s| s.end);
236 match (arg_start, arg_end) {
237 (Some(astart), Some(aend)) => {
238 // Spacing between keyword and arg.
239 if kw_end < astart {
240 out.push_str(&self.source[kw_end..astart]);
241 }
242 out.push_str(&self.source[astart..aend]);
243 // Anything between arg end and content_span end
244 // (trailing whitespace on the header line).
245 if aend < node.content_span.end {
246 out.push_str(&self.source[aend..node.content_span.end]);
247 }
248 }
249 _ => {
250 // No arg: emit any trailing content.
251 if kw_end < node.content_span.end {
252 out.push_str(&self.source[kw_end..node.content_span.end]);
253 }
254 }
255 }
256 // Trailing trivia (newline after header).
257 for tp in &node.trailing_trivia {
258 out.push_str(tp.span.slice(&self.source));
259 }
260 // Section body children.
261 for &child_id in &node.children {
262 self.render_node(child_id, out);
263 }
264 }
265
266 CstNodeKind::Conditional => {
267 // Keyword (if / elif).
268 if let Some(ref kw_span) = node.condition_keyword {
269 out.push_str(kw_span.slice(&self.source));
270 }
271 let kw_end = node
272 .condition_keyword
273 .map(|s| s.end)
274 .unwrap_or(node.content_span.start);
275 // Condition expression.
276 if let Some(ref expr_span) = node.condition_expr {
277 if kw_end < expr_span.start {
278 out.push_str(&self.source[kw_end..expr_span.start]);
279 }
280 out.push_str(expr_span.slice(&self.source));
281 if expr_span.end < node.content_span.end {
282 out.push_str(&self.source[expr_span.end..node.content_span.end]);
283 }
284 } else if kw_end < node.content_span.end {
285 out.push_str(&self.source[kw_end..node.content_span.end]);
286 }
287 // Trailing trivia.
288 for tp in &node.trailing_trivia {
289 out.push_str(tp.span.slice(&self.source));
290 }
291 // Children (then-block fields + optional ElseBlock).
292 for &child_id in &node.children {
293 self.render_node(child_id, out);
294 }
295 }
296
297 CstNodeKind::ElseBlock => {
298 // The `else` keyword line.
299 out.push_str(node.content_span.slice(&self.source));
300 for tp in &node.trailing_trivia {
301 out.push_str(tp.span.slice(&self.source));
302 }
303 // Else body children.
304 for &child_id in &node.children {
305 self.render_node(child_id, out);
306 }
307 }
308
309 CstNodeKind::Comment => {
310 out.push_str(node.content_span.slice(&self.source));
311 for tp in &node.trailing_trivia {
312 out.push_str(tp.span.slice(&self.source));
313 }
314 }
315
316 CstNodeKind::BlankLine => {
317 out.push_str(node.content_span.slice(&self.source));
318 for tp in &node.trailing_trivia {
319 out.push_str(tp.span.slice(&self.source));
320 }
321 }
322
323 CstNodeKind::ValueLine => {
324 out.push_str(node.content_span.slice(&self.source));
325 for tp in &node.trailing_trivia {
326 out.push_str(tp.span.slice(&self.source));
327 }
328 }
329 }
330 }
331
332 /// Number of nodes in the arena.
333 pub fn node_count(&self) -> usize {
334 self.nodes.len()
335 }
336
337 /// Get a reference to a node by its id.
338 pub fn node(&self, id: NodeId) -> &CstNode {
339 &self.nodes[id.0]
340 }
341
342 /// Get a mutable reference to a node by its id.
343 pub fn node_mut(&mut self, id: NodeId) -> &mut CstNode {
344 &mut self.nodes[id.0]
345 }
346
347 /// Iterate over the direct children of a node.
348 pub fn children(&self, id: NodeId) -> &[NodeId] {
349 &self.nodes[id.0].children
350 }
351}