1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
use std::{cell::RefCell, collections::HashMap, rc::Rc};
use crate::{Declaration, OntologyFile, error::Span};
/// A single comment from the source.
#[cfg_attr(feature = "python", pyo3::pyclass(from_py_object, get_all))]
#[derive(Debug, Clone, PartialEq)]
pub struct Comment {
/// The text content, without the leading `#` and without trailing newline.
pub text: String,
/// Original text including the `#`.
pub raw: String,
/// Position in source.
pub span: Span,
/// Line number (1-based) where this comment appears.
pub line: usize,
/// Column where the `#` starts (1-based).
pub column: usize,
}
/// Where a comment sits relative to a code node.
#[derive(Debug, Clone, PartialEq)]
pub enum CommentPlacement {
/// Comment on a line before the node, at the same or deeper indentation.
///
/// ```dolfin
/// # This describes Person
/// concept Person:
/// ```
Leading,
/// Comment on the same line, after the code.
///
/// ```dolfin
/// has name: string # required field
/// ```
Trailing,
/// Comment at the beginning of a block, that
/// can't be seen at Leading nor Trailing.
///
/// ```dolfin
/// has name: string
/// # anything here
/// ```
Inside,
/// Comment inside an empty block, or between nodes where
/// it doesn't clearly belong to either.
///
/// ```dolfin
/// concept Empty:
/// # TODO: add members
/// ```
Dangling,
}
/// A comment attached to a specific AST node.
#[derive(Debug, Clone)]
pub struct AttachedComment {
pub comment: Comment,
pub placement: CommentPlacement,
}
#[derive(Debug, Clone, Default)]
/// Shared comment sink that survives the lexer being consumed.
pub struct CommentSink {
inner: Rc<RefCell<Vec<Comment>>>,
}
impl CommentSink {
pub fn new() -> Self {
Self {
inner: Rc::new(RefCell::new(Vec::new())),
}
}
pub fn push(&self, comment: Comment) {
self.inner.borrow_mut().push(comment);
}
pub fn push_and_merge(&self, comment: Comment) {
let mut inner = self.inner.borrow_mut();
if let Some(previous) = inner.pop() {
if previous.span.end.line + 1 == comment.line && previous.column == comment.column {
let new_comment = Comment {
text: format!("{}\n{}", previous.text, comment.text),
raw: format!("{}{}", previous.raw, comment.raw),
span: previous.span.merge(&comment.span),
line: previous.line,
column: previous.column,
};
inner.push(new_comment);
} else {
inner.push(previous);
inner.push(comment);
}
} else {
inner.push(comment);
}
}
/// Extract all collected comments.
pub fn take(&self) -> Vec<Comment> {
std::mem::take(&mut *self.inner.borrow_mut())
}
pub fn comments(&self) -> Vec<Comment> {
self.inner.borrow().clone()
}
}
/// Map from AST node spans to their attached comments.
#[derive(Debug, Default, Clone)]
pub struct CommentMap {
/// Leading comments: appear on lines before the node.
pub leading: HashMap<Span, Vec<Comment>>,
/// Trailing comments: appear on the same line after the node.
pub trailing: HashMap<Span, Vec<Comment>>,
/// Inside comments: appear at the beginning of a indented block.
/// being neither leading nor trailing.
pub inside: HashMap<Span, Vec<Comment>>,
/// Dangling comments: inside empty blocks or unattachable.
pub dangling: Vec<Comment>,
}
impl CommentMap {
/// Build the comment map from an AST and collected comments.
pub fn build(ontology: &OntologyFile, mut comments: Vec<Comment>) -> Self {
if comments.is_empty() {
return Self::default();
}
// Sort comments by position
comments.sort_by_key(|c| c.span);
// collect all node sspans from the AST, sorted
let mut node_spans = Vec::new();
collect_spans(&ontology, &mut node_spans);
node_spans.sort();
let mut map = CommentMap::default();
let mut unattached: Vec<Comment> = Vec::new();
for comment in comments {
match classify_comment(&comment, &node_spans) {
Some((placement, node_span)) => {
let bucket = match placement {
CommentPlacement::Leading => map.leading.entry(node_span).or_default(),
CommentPlacement::Trailing => map.trailing.entry(node_span).or_default(),
CommentPlacement::Inside => map.inside.entry(node_span).or_default(),
CommentPlacement::Dangling => {
map.dangling.push(comment);
continue;
}
};
bucket.push(comment);
}
None => {
unattached.push(comment);
}
}
}
map.dangling.extend(unattached);
map
}
/// Get leading comments for a node.
pub fn leading_comments(&self, span: &Span) -> &[Comment] {
self.leading.get(span).map(|v| v.as_slice()).unwrap_or(&[])
}
pub fn trailing_comments(&self, span: &Span) -> &[Comment] {
self.trailing.get(span).map(|v| v.as_slice()).unwrap_or(&[])
}
pub fn dangling_comments(&self, span: &Span) -> Vec<Comment> {
self.dangling
.iter()
.filter(|v| {
span.start.offset <= v.span.start.offset && v.span.end.offset <= span.end.offset
})
.cloned()
.collect()
}
pub fn inside_comments(&self, span: &Span) -> &[Comment] {
self.inside.get(span).map(|v| v.as_slice()).unwrap_or(&[])
}
}
/// Determine where a comment belongs relative to the known node spans.
fn classify_comment(comment: &Comment, node_spans: &[Span]) -> Option<(CommentPlacement, Span)> {
let comment_line = comment.line;
let comment_offset = comment.span.start.offset;
let comment_end_offset: usize = comment.span.end.offset;
let comment_lines_count = comment.text.split('\n').count();
for span in node_spans.iter().rev() {
// 1. Trailing: is there a node that ENDS on the same line,
// before the comment ?
// e.g., `has name: string # required`
if span.end.line == comment_line && span.end.offset <= comment_offset {
return Some((CommentPlacement::Trailing, *span));
}
// Trailing: is there a comments on the line above that
// starts on the same column and no spans exists
// between it and us ?
// e.g., `has name: string # required`
// ` # it means mandatory`
if span.end.offset <= comment_offset
&& node_spans
.iter()
.filter(|s| span < s && s < &&comment.span)
.count()
== 0
&& span.end.column + 4 >= comment.span.start.column
&& span.end.column <= comment.span.start.column
{
return Some((CommentPlacement::Trailing, *span));
}
}
// 2. Leading: find the next node that STARTS after the comment.
// The comment "leads into" that node.
// e.g., `# Describes a person`
// `concept Person:`
let next_node = node_spans
.iter()
.find(|span| span.start.offset > comment_end_offset);
let mut postponed = None;
if let Some(next) = next_node {
// Only attach as leading if the comment is "close" to the node.
// Heuristic: comment must be within 1 blank line of the node.
let line_gap = next.start.line.saturating_sub(comment_line);
if line_gap == comment_lines_count {
return Some((CommentPlacement::Leading, *next));
}
// Heuristic: if there is one blank line gap then we postpone the return
// to check if the comments will not be choosen for a inside.
if line_gap == 1 + comment_lines_count {
postponed = Some((CommentPlacement::Leading, *next));
}
}
let mut the_last_return = None;
for span in node_spans.iter().rev() {
// Inside: is the line above that starts on a column
// before us and no spans exists between it and us ?
// e.g., `has name: string`
// ` # say my name, say my name, ...`
if span.end.offset <= comment_offset
&& node_spans
.iter()
.filter(|s| span < s && s < &&comment.span)
.count()
== 0
&& span.start.column < comment.span.start.column
{
the_last_return = Some((CommentPlacement::Inside, *span));
}
// Inside: inside a span
// and no spans inside it exists between it and us ?
// e.g., `concept Flower:`
// ` # Spring power !`
// ` `
// ` has color: Color`
if span.start.offset <= comment_offset
&& comment_end_offset <= span.end.offset
&& node_spans
.iter()
.filter(|s| {
span.start.offset < s.start.offset
&& s.end.offset <= span.end.offset
&& s < &&comment.span
})
.count()
== 0
&& span.start.column < comment.span.start.column
{
the_last_return = Some((CommentPlacement::Inside, *span));
}
}
if let Some(_) = the_last_return {
return the_last_return;
}
if let Some(_) = postponed {
return postponed;
}
// 3. Is the comment inside a node's span? -> Dangling.
for span in node_spans.iter() {
if span.start.offset < comment_offset && comment_offset < span.end.offset {
return Some((CommentPlacement::Dangling, *span));
}
}
// 4. Unattachable (e.g., comment at the very end of file with no follwing node).
// Attach to previous node a trailing if possible.
let prev_node = node_spans
.iter()
.rev()
.find(|span| span.end.offset <= comment_offset);
if let Some(prev) = prev_node {
let line_gap = comment_line.saturating_sub(prev.end.line);
if line_gap <= 1 {
return Some((CommentPlacement::Trailing, *prev));
}
}
None // Truly dangling - file-level comment
}
/// Walk the AST and collect all node spans.
fn collect_spans(ontology: &OntologyFile, out: &mut Vec<Span>) {
if let Some(span) = ontology.span {
out.push(span);
}
for prefix in &ontology.prefixes {
if let Some(span) = prefix.span {
out.push(span);
}
}
for decl in &ontology.declarations {
collect_declaration_spans(decl, out);
}
}
fn collect_declaration_spans(decl: &Declaration, out: &mut Vec<Span>) {
match decl {
Declaration::Concept(concept_def) => {
if let Some(span) = concept_def.span {
out.push(span);
}
for hd in &concept_def.has_declarations {
if let Some(span) = hd.span {
out.push(span);
}
}
for sd in &concept_def.parents {
match sd {
crate::TypeRef::Named { name: _, span } => {
if let Some(span) = span {
out.push(*span);
}
}
crate::TypeRef::Primitive { kind: _, span } => {
if let Some(span) = span {
out.push(*span);
}
}
}
}
if let Some(one_of) = &concept_def.one_of {
for oo in one_of {
if let Some(span) = oo.span {
out.push(span);
}
}
}
}
Declaration::Property(property_def) => {
if let Some(span) = property_def.span {
out.push(span);
}
}
Declaration::Rule(rule_def) => {
if let Some(span) = rule_def.span {
out.push(span);
}
// Recurse into match/then blocks...
}
}
}