1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
//! Concrete Syntax Tree (CST) for `.cabal` files.
//!
//! The CST is a flat arena of nodes that mirrors the exact structure of the
//! `.cabal` file, preserving all formatting details: whitespace, comments,
//! blank lines, indentation style. The `render()` method reproduces the
//! original source byte-for-byte when no edits have been made.
use crate::lexer::TriviaPiece;
use crate::span::{NodeId, Span};
/// The kind of a CST node.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CstNodeKind {
/// The root container. Its children are top-level fields, sections,
/// comments, and blank lines.
Root,
/// A field: `field-name: value` (possibly with multi-line continuation).
Field,
/// A section: `library`, `executable foo`, `common warnings`, etc.
/// Children are the section body (fields, conditionals, imports, comments).
Section,
/// A conditional block: `if condition` + body, with optional `else` block.
Conditional,
/// An `import: stanza-name` directive inside a section.
Import,
/// A standalone comment line.
Comment,
/// A preserved blank line between stanzas or fields.
BlankLine,
/// A value continuation line that is a direct child of its parent field
/// or section. Used to preserve multi-line field values.
ValueLine,
/// An `else` block attached to a `Conditional` node.
ElseBlock,
}
/// A single node in the CST arena.
#[derive(Debug, Clone)]
pub struct CstNode {
/// What kind of syntax element this node represents.
pub kind: CstNodeKind,
/// Full byte span of this node in the source, including leading trivia.
pub span: Span,
/// Span of just the meaningful content (excluding leading/trailing trivia
/// that belongs to this node).
pub content_span: Span,
/// Children of this node (indices into the arena).
pub children: Vec<NodeId>,
/// Parent node (None only for the Root).
pub parent: Option<NodeId>,
// -- Field-specific spans --
/// For `Field` / `Import` nodes: span of the field name.
pub field_name: Option<Span>,
/// For `Field` / `Import` nodes: span of the field value (first line
/// only; continuation lines are child `ValueLine` nodes).
pub field_value: Option<Span>,
// -- Section-specific spans --
/// For `Section` nodes: span of the section keyword.
pub section_keyword: Option<Span>,
/// For `Section` nodes: span of the section argument (e.g. `my-exe`).
pub section_arg: Option<Span>,
// -- Conditional-specific spans --
/// For `Conditional` nodes: span of the keyword (`if` / `elif`).
pub condition_keyword: Option<Span>,
/// For `Conditional` nodes: span of the condition expression text.
pub condition_expr: Option<Span>,
/// Leading trivia pieces (whitespace, newlines, comments) that precede
/// this node's content.
pub leading_trivia: Vec<TriviaPiece>,
/// Trailing trivia pieces (typically a newline at the end of the line).
pub trailing_trivia: Vec<TriviaPiece>,
/// The indentation level (visual column) of this node.
pub indent: usize,
}
impl CstNode {
/// Create a new node with the given kind, defaulting all optional fields
/// to `None` / empty.
pub fn new(kind: CstNodeKind, span: Span) -> Self {
Self {
kind,
span,
content_span: span,
children: Vec::new(),
parent: None,
field_name: None,
field_value: None,
section_keyword: None,
section_arg: None,
condition_keyword: None,
condition_expr: None,
leading_trivia: Vec::new(),
trailing_trivia: Vec::new(),
indent: 0,
}
}
}
/// The concrete syntax tree for a `.cabal` file.
///
/// All [`Span`]s reference byte offsets into [`source`](CabalCst::source).
#[derive(Debug, Clone)]
pub struct CabalCst {
/// The original source text (owned).
pub source: String,
/// Flat arena of all CST nodes.
pub nodes: Vec<CstNode>,
/// Index of the root node (always `NodeId(0)`).
pub root: NodeId,
}
impl CabalCst {
/// Render the CST back to text. When no edits have been made, this must
/// produce byte-identical output to the original source.
pub fn render(&self) -> String {
// Strategy: walk every node in the tree in source order and emit
// their trivia + content spans. Since the arena stores nodes in
// source order (they are added during a left-to-right parse), we
// can do a depth-first traversal from the root and collect spans.
//
// However, the simplest correct approach for an un-edited CST is
// to just return the source. For an edited CST we need the full
// render. We implement the full render so it works in both cases.
let mut out = String::with_capacity(self.source.len());
self.render_node(self.root, &mut out);
out
}
/// Recursively render a single node and its descendants.
fn render_node(&self, node_id: NodeId, out: &mut String) {
let node = &self.nodes[node_id.0];
// Emit leading trivia.
for tp in &node.leading_trivia {
out.push_str(tp.span.slice(&self.source));
}
// Emit the node's own content based on kind.
match node.kind {
CstNodeKind::Root => {
// Root has no content of its own; just render children.
for &child_id in &node.children {
self.render_node(child_id, out);
}
}
CstNodeKind::Field => {
// field name
if let Some(ref name_span) = node.field_name {
out.push_str(name_span.slice(&self.source));
}
// The colon and spacing between name, colon, and value are
// captured in the content_span. We emit the content_span
// region that isn't the field_name or field_value.
//
// Actually, we store the full line content between
// field_name.end and field_value.start (colon + spacing) as
// part of content_span. Let's emit the "middle" region.
let name_end = node
.field_name
.map(|s| s.end)
.unwrap_or(node.content_span.start);
let value_start = node
.field_value
.map(|s| s.start)
.unwrap_or(node.content_span.end);
// Middle: everything between field name and value.
if name_end < value_start {
out.push_str(&self.source[name_end..value_start]);
}
// field value (first line)
if let Some(ref val_span) = node.field_value {
out.push_str(val_span.slice(&self.source));
}
// Trailing trivia (newline).
for tp in &node.trailing_trivia {
out.push_str(tp.span.slice(&self.source));
}
// Children: continuation ValueLine nodes.
for &child_id in &node.children {
self.render_node(child_id, out);
}
}
CstNodeKind::Import => {
// Same structure as Field.
if let Some(ref name_span) = node.field_name {
out.push_str(name_span.slice(&self.source));
}
let name_end = node
.field_name
.map(|s| s.end)
.unwrap_or(node.content_span.start);
let value_start = node
.field_value
.map(|s| s.start)
.unwrap_or(node.content_span.end);
if name_end < value_start {
out.push_str(&self.source[name_end..value_start]);
}
if let Some(ref val_span) = node.field_value {
out.push_str(val_span.slice(&self.source));
}
for tp in &node.trailing_trivia {
out.push_str(tp.span.slice(&self.source));
}
}
CstNodeKind::Section => {
// Section keyword.
if let Some(ref kw_span) = node.section_keyword {
out.push_str(kw_span.slice(&self.source));
}
// Spacing + arg.
let kw_end = node
.section_keyword
.map(|s| s.end)
.unwrap_or(node.content_span.start);
let arg_start = node.section_arg.map(|s| s.start);
let arg_end = node.section_arg.map(|s| s.end);
match (arg_start, arg_end) {
(Some(astart), Some(aend)) => {
// Spacing between keyword and arg.
if kw_end < astart {
out.push_str(&self.source[kw_end..astart]);
}
out.push_str(&self.source[astart..aend]);
// Anything between arg end and content_span end
// (trailing whitespace on the header line).
if aend < node.content_span.end {
out.push_str(&self.source[aend..node.content_span.end]);
}
}
_ => {
// No arg: emit any trailing content.
if kw_end < node.content_span.end {
out.push_str(&self.source[kw_end..node.content_span.end]);
}
}
}
// Trailing trivia (newline after header).
for tp in &node.trailing_trivia {
out.push_str(tp.span.slice(&self.source));
}
// Section body children.
for &child_id in &node.children {
self.render_node(child_id, out);
}
}
CstNodeKind::Conditional => {
// Keyword (if / elif).
if let Some(ref kw_span) = node.condition_keyword {
out.push_str(kw_span.slice(&self.source));
}
let kw_end = node
.condition_keyword
.map(|s| s.end)
.unwrap_or(node.content_span.start);
// Condition expression.
if let Some(ref expr_span) = node.condition_expr {
if kw_end < expr_span.start {
out.push_str(&self.source[kw_end..expr_span.start]);
}
out.push_str(expr_span.slice(&self.source));
if expr_span.end < node.content_span.end {
out.push_str(&self.source[expr_span.end..node.content_span.end]);
}
} else if kw_end < node.content_span.end {
out.push_str(&self.source[kw_end..node.content_span.end]);
}
// Trailing trivia.
for tp in &node.trailing_trivia {
out.push_str(tp.span.slice(&self.source));
}
// Children (then-block fields + optional ElseBlock).
for &child_id in &node.children {
self.render_node(child_id, out);
}
}
CstNodeKind::ElseBlock => {
// The `else` keyword line.
out.push_str(node.content_span.slice(&self.source));
for tp in &node.trailing_trivia {
out.push_str(tp.span.slice(&self.source));
}
// Else body children.
for &child_id in &node.children {
self.render_node(child_id, out);
}
}
CstNodeKind::Comment => {
out.push_str(node.content_span.slice(&self.source));
for tp in &node.trailing_trivia {
out.push_str(tp.span.slice(&self.source));
}
}
CstNodeKind::BlankLine => {
out.push_str(node.content_span.slice(&self.source));
for tp in &node.trailing_trivia {
out.push_str(tp.span.slice(&self.source));
}
}
CstNodeKind::ValueLine => {
out.push_str(node.content_span.slice(&self.source));
for tp in &node.trailing_trivia {
out.push_str(tp.span.slice(&self.source));
}
}
}
}
/// Number of nodes in the arena.
pub fn node_count(&self) -> usize {
self.nodes.len()
}
/// Get a reference to a node by its id.
pub fn node(&self, id: NodeId) -> &CstNode {
&self.nodes[id.0]
}
/// Get a mutable reference to a node by its id.
pub fn node_mut(&mut self, id: NodeId) -> &mut CstNode {
&mut self.nodes[id.0]
}
/// Iterate over the direct children of a node.
pub fn children(&self, id: NodeId) -> &[NodeId] {
&self.nodes[id.0].children
}
}