Skip to main content

php_ast/
comment_map.rs

1/// Maps comments to the AST nodes they logically belong to.
2///
3/// Uses a span-proximity heuristic: each comment is associated with the first
4/// AST statement whose span starts **at or after** the comment's end. Comments
5/// that follow the last statement are collected as "trailing" comments.
6///
7/// # Example
8///
9/// ```
10/// use php_ast::comment_map::CommentMap;
11/// use php_ast::ast::Comment;
12/// use php_ast::Span;
13///
14/// // Given parse result with comments and a program:
15/// // let result = php_rs_parser::parse(&arena, source);
16/// // let map = CommentMap::build(&result.comments, &result.program.stmts);
17/// // let leading = map.leading_comments(some_stmt_span);
18/// ```
19use crate::ast::{Comment, Stmt};
20use crate::Span;
21use std::collections::BTreeMap;
22
23/// Associates comments with statement spans using a leading-comment heuristic.
24///
25/// A comment is "leading" for a statement if it appears before the statement
26/// and no other statement is closer. Comments after the last statement are
27/// "trailing".
28pub struct CommentMap<'a, 'src> {
29    /// Maps statement start offset → comments that lead that statement.
30    leading: BTreeMap<u32, Vec<&'a Comment<'src>>>,
31    /// Comments that appear after all statements.
32    trailing: Vec<&'a Comment<'src>>,
33}
34
35impl<'a, 'src> CommentMap<'a, 'src> {
36    /// Build a comment map from a list of comments and statements.
37    ///
38    /// Both `comments` and `stmts` must be in source order (which they are
39    /// as produced by the parser).
40    pub fn build<'arena>(comments: &'a [Comment<'src>], stmts: &[Stmt<'arena, 'src>]) -> Self {
41        let mut leading: BTreeMap<u32, Vec<&'a Comment<'src>>> = BTreeMap::new();
42        let mut trailing: Vec<&'a Comment<'src>> = Vec::new();
43
44        // Collect statement start offsets in sorted order
45        let stmt_starts: Vec<u32> = stmts.iter().map(|s| s.span.start).collect();
46
47        for comment in comments {
48            // Find the first statement that starts at or after this comment ends
49            match stmt_starts.binary_search(&comment.span.end) {
50                Ok(idx) => {
51                    leading.entry(stmt_starts[idx]).or_default().push(comment);
52                }
53                Err(idx) => {
54                    if idx < stmt_starts.len() {
55                        leading.entry(stmt_starts[idx]).or_default().push(comment);
56                    } else {
57                        trailing.push(comment);
58                    }
59                }
60            }
61        }
62
63        Self { leading, trailing }
64    }
65
66    /// Get comments that lead the statement at the given span.
67    pub fn leading_comments(&self, stmt_span: Span) -> &[&'a Comment<'src>] {
68        self.leading
69            .get(&stmt_span.start)
70            .map(|v| v.as_slice())
71            .unwrap_or(&[])
72    }
73
74    /// Get comments that appear after all statements.
75    pub fn trailing_comments(&self) -> &[&'a Comment<'src>] {
76        &self.trailing
77    }
78
79    /// Iterate over all (stmt_start_offset, comments) pairs.
80    pub fn iter_leading(&self) -> impl Iterator<Item = (u32, &[&'a Comment<'src>])> {
81        self.leading.iter().map(|(k, v)| (*k, v.as_slice()))
82    }
83
84    /// Returns true if no comments were mapped.
85    pub fn is_empty(&self) -> bool {
86        self.leading.is_empty() && self.trailing.is_empty()
87    }
88
89    /// Total number of comments in the map.
90    pub fn len(&self) -> usize {
91        self.leading.values().map(|v| v.len()).sum::<usize>() + self.trailing.len()
92    }
93}
94
95#[cfg(test)]
96mod tests {
97    use super::*;
98    use crate::ast::*;
99
100    fn make_comment(start: u32, end: u32, text: &str) -> Comment<'_> {
101        Comment {
102            kind: CommentKind::Line,
103            text,
104            span: Span::new(start, end),
105        }
106    }
107
108    fn make_stmt(start: u32, end: u32) -> Stmt<'static, 'static> {
109        Stmt {
110            kind: StmtKind::Nop,
111            span: Span::new(start, end),
112        }
113    }
114
115    #[test]
116    fn comments_attach_to_following_stmt() {
117        let comments = vec![
118            make_comment(0, 10, "// first"),
119            make_comment(11, 22, "// second"),
120        ];
121        let stmts = vec![make_stmt(23, 30), make_stmt(31, 40)];
122
123        let map = CommentMap::build(&comments, &stmts);
124
125        // Both comments lead the first statement
126        let leading = map.leading_comments(stmts[0].span);
127        assert_eq!(leading.len(), 2);
128        assert_eq!(leading[0].text, "// first");
129        assert_eq!(leading[1].text, "// second");
130
131        // Second statement has no leading comments
132        assert_eq!(map.leading_comments(stmts[1].span).len(), 0);
133        assert!(map.trailing_comments().is_empty());
134    }
135
136    #[test]
137    fn trailing_comments() {
138        let comments = vec![make_comment(50, 60, "// trailing")];
139        let stmts = vec![make_stmt(10, 40)];
140
141        let map = CommentMap::build(&comments, &stmts);
142        assert_eq!(map.leading_comments(stmts[0].span).len(), 0);
143        assert_eq!(map.trailing_comments().len(), 1);
144        assert_eq!(map.trailing_comments()[0].text, "// trailing");
145    }
146
147    #[test]
148    fn no_comments() {
149        let comments: Vec<Comment> = vec![];
150        let stmts = vec![make_stmt(0, 10)];
151
152        let map = CommentMap::build(&comments, &stmts);
153        assert!(map.is_empty());
154        assert_eq!(map.len(), 0);
155    }
156
157    #[test]
158    fn no_stmts() {
159        let comments = vec![make_comment(0, 5, "// alone")];
160        let stmts: Vec<Stmt> = vec![];
161
162        let map = CommentMap::build(&comments, &stmts);
163        assert_eq!(map.trailing_comments().len(), 1);
164    }
165
166    #[test]
167    fn interleaved_comments() {
168        // comment1  stmt1  comment2  stmt2
169        let comments = vec![make_comment(0, 8, "// c1"), make_comment(20, 28, "// c2")];
170        let stmts = vec![make_stmt(10, 18), make_stmt(30, 40)];
171
172        let map = CommentMap::build(&comments, &stmts);
173        assert_eq!(map.leading_comments(stmts[0].span).len(), 1);
174        assert_eq!(map.leading_comments(stmts[0].span)[0].text, "// c1");
175        assert_eq!(map.leading_comments(stmts[1].span).len(), 1);
176        assert_eq!(map.leading_comments(stmts[1].span)[0].text, "// c2");
177    }
178
179    #[test]
180    fn len_counts_all() {
181        let comments = vec![
182            make_comment(0, 5, "// a"),
183            make_comment(6, 10, "// b"),
184            make_comment(50, 55, "// c"),
185        ];
186        let stmts = vec![make_stmt(11, 40)];
187
188        let map = CommentMap::build(&comments, &stmts);
189        assert_eq!(map.len(), 3);
190    }
191}