tinymist_analysis/syntax/
comment.rs

1//! Convenient utilities to match comment in code.
2
3use crate::prelude::*;
4
5/// Extract the module-level documentation from a source.
6pub fn find_module_level_docs(src: &Source) -> Option<String> {
7    crate::log_debug_ct!("finding docs at: {id:?}", id = src.id());
8
9    let root = LinkedNode::new(src.root());
10    for n in root.children() {
11        if n.kind().is_trivia() {
12            continue;
13        }
14
15        return extract_mod_docs_between(&root, 0..n.offset(), true);
16    }
17
18    extract_mod_docs_between(&root, 0..src.text().len(), true)
19}
20
21fn extract_mod_docs_between(
22    node: &LinkedNode,
23    rng: Range<usize>,
24    first_group: bool,
25) -> Option<String> {
26    let mut matcher = DocCommentMatcher {
27        strict: true,
28        ..Default::default()
29    };
30    let nodes = node.children();
31    'scan_comments: for n in nodes {
32        let offset = n.offset();
33        if offset < rng.start {
34            continue 'scan_comments;
35        }
36        if offset >= rng.end {
37            break 'scan_comments;
38        }
39
40        crate::log_debug_ct!("found comment for docs: {:?}: {:?}", n.kind(), n.text());
41        if matcher.process(n.get()) {
42            if first_group {
43                break 'scan_comments;
44            }
45            matcher.comments.clear();
46        }
47    }
48
49    matcher.collect()
50}
51
52/// A signal raised by the comment group matcher.
53pub enum CommentGroupSignal {
54    /// A hash marker is found.
55    Hash,
56    /// A space is found.
57    Space,
58    /// A line comment is found.
59    LineComment,
60    /// A block comment is found.
61    BlockComment,
62    /// The comment group should be broken.
63    BreakGroup,
64}
65
66/// A matcher that groups comments.
67#[derive(Default)]
68pub struct CommentGroupMatcher {
69    newline_count: u32,
70}
71
72impl CommentGroupMatcher {
73    /// Reset the matcher. This usually happens after a group is collected or
74    /// when some other child item is breaking the comment group manually.
75    pub fn reset(&mut self) {
76        self.newline_count = 0;
77    }
78
79    /// Process a child relative to some [`SyntaxNode`].
80    ///
81    /// ## Example
82    ///
83    /// See [`DocCommentMatcher`] for a real-world example.
84    pub fn process(&mut self, n: &SyntaxNode) -> CommentGroupSignal {
85        match n.kind() {
86            SyntaxKind::Hash => {
87                self.newline_count = 0;
88
89                CommentGroupSignal::Hash
90            }
91            SyntaxKind::Space => {
92                if n.text().contains('\n') {
93                    self.newline_count += 1;
94                }
95                if self.newline_count > 1 {
96                    return CommentGroupSignal::BreakGroup;
97                }
98
99                CommentGroupSignal::Space
100            }
101            SyntaxKind::Parbreak => {
102                self.newline_count = 2;
103                CommentGroupSignal::BreakGroup
104            }
105            SyntaxKind::LineComment => {
106                self.newline_count = 0;
107                CommentGroupSignal::LineComment
108            }
109            SyntaxKind::BlockComment => {
110                self.newline_count = 0;
111                CommentGroupSignal::BlockComment
112            }
113            _ => {
114                self.newline_count = 0;
115                CommentGroupSignal::BreakGroup
116            }
117        }
118    }
119}
120enum RawComment {
121    Line(EcoString),
122    Block(EcoString),
123}
124
125/// A matcher that collects documentation comments.
126#[derive(Default)]
127pub struct DocCommentMatcher {
128    comments: Vec<RawComment>,
129    group_matcher: CommentGroupMatcher,
130    strict: bool,
131}
132
133impl DocCommentMatcher {
134    /// Reset the matcher. This usually happens after a group is collected or
135    /// when some other child item is breaking the comment group manually.
136    pub fn reset(&mut self) {
137        self.comments.clear();
138        self.group_matcher.reset();
139    }
140
141    /// Process a child relative to some [`SyntaxNode`].
142    pub fn process(&mut self, n: &SyntaxNode) -> bool {
143        match self.group_matcher.process(n) {
144            CommentGroupSignal::LineComment => {
145                let text = n.text();
146                if !self.strict || text.starts_with("///") {
147                    self.comments.push(RawComment::Line(text.clone()));
148                }
149            }
150            CommentGroupSignal::BlockComment => {
151                let text = n.text();
152                if !self.strict {
153                    self.comments.push(RawComment::Block(text.clone()));
154                }
155            }
156            CommentGroupSignal::BreakGroup => {
157                return true;
158            }
159            CommentGroupSignal::Hash | CommentGroupSignal::Space => {}
160        }
161
162        false
163    }
164
165    /// Collect the comments and return the result.
166    pub fn collect(&mut self) -> Option<String> {
167        let comments = &self.comments;
168        if comments.is_empty() {
169            return None;
170        }
171
172        let comments = comments.iter().map(|comment| match comment {
173            RawComment::Line(line) => {
174                // strip all slash prefix
175                let text = line.trim_start_matches('/');
176                text
177            }
178            RawComment::Block(block) => {
179                fn remove_comment(text: &str) -> Option<&str> {
180                    let mut text = text.strip_prefix("/*")?.strip_suffix("*/")?.trim();
181                    // trip start star
182                    if text.starts_with('*') {
183                        text = text.strip_prefix('*')?.trim();
184                    }
185                    Some(text)
186                }
187
188                remove_comment(block).unwrap_or(block.as_str())
189            }
190        });
191        let comments = comments.collect::<Vec<_>>();
192
193        let dedent = comments.iter().fold(usize::MAX, |acc, content| {
194            let indent = content.chars().take_while(|ch| ch.is_whitespace()).count();
195            acc.min(indent)
196        });
197
198        let size_hint = comments.iter().map(|comment| comment.len()).sum::<usize>();
199        let mut comments = comments
200            .iter()
201            .map(|comment| comment.chars().skip(dedent).collect::<String>());
202
203        let res = comments.try_fold(String::with_capacity(size_hint), |mut acc, comment| {
204            if !acc.is_empty() {
205                acc.push('\n');
206            }
207
208            acc.push_str(&comment);
209            Some(acc)
210        });
211
212        self.comments.clear();
213        res
214    }
215}