Skip to main content

rustpython_ruff_python_trivia/
comment_ranges.rs

1use std::fmt::{Debug, Formatter};
2use std::ops::Deref;
3
4use itertools::Itertools;
5
6use ruff_source_file::LineRanges;
7use ruff_text_size::{Ranged, TextRange, TextSize};
8
9use crate::{has_leading_content, has_trailing_content, is_python_whitespace};
10
11/// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping.
12#[derive(Clone, Default)]
13pub struct CommentRanges {
14    raw: Vec<TextRange>,
15}
16
17impl CommentRanges {
18    pub fn new(ranges: Vec<TextRange>) -> Self {
19        Self { raw: ranges }
20    }
21
22    /// Returns `true` if the given range intersects with any comment range.
23    pub fn intersects(&self, target: TextRange) -> bool {
24        self.raw
25            .binary_search_by(|range| {
26                if target.intersect(*range).is_some() {
27                    std::cmp::Ordering::Equal
28                } else if range.end() < target.start() {
29                    std::cmp::Ordering::Less
30                } else {
31                    std::cmp::Ordering::Greater
32                }
33            })
34            .is_ok()
35    }
36
37    /// Returns the comments who are within the range
38    pub fn comments_in_range(&self, range: TextRange) -> &[TextRange] {
39        let start = self
40            .raw
41            .partition_point(|comment| comment.start() < range.start());
42        // We expect there are few comments, so switching to find should be faster
43        match self.raw[start..]
44            .iter()
45            .find_position(|comment| comment.end() > range.end())
46        {
47            Some((in_range, _element)) => &self.raw[start..start + in_range],
48            None => &self.raw[start..],
49        }
50    }
51
52    /// Returns `true` if a statement or expression includes at least one comment.
53    pub fn has_comments<T>(&self, node: &T, source: &str) -> bool
54    where
55        T: Ranged,
56    {
57        let start = if has_leading_content(node.start(), source) {
58            node.start()
59        } else {
60            source.line_start(node.start())
61        };
62        let end = if has_trailing_content(node.end(), source) {
63            node.end()
64        } else {
65            source.line_end(node.end())
66        };
67
68        self.intersects(TextRange::new(start, end))
69    }
70
71    /// Given a [`CommentRanges`], determine which comments are grouped together
72    /// in "comment blocks". A "comment block" is a sequence of consecutive
73    /// own-line comments in which the comment hash (`#`) appears in the same
74    /// column in each line, and at least one comment is non-empty.
75    ///
76    /// Returns a sorted vector containing the offset of the leading hash (`#`)
77    /// for each comment in any block comment.
78    ///
79    /// ## Examples
80    /// ```python
81    /// # This is a block comment
82    /// # because it spans multiple lines
83    ///
84    ///     # This is also a block comment
85    ///     # even though it is indented
86    ///
87    /// # this is not a block comment
88    ///
89    /// x = 1  # this is not a block comment because
90    /// y = 2  # the lines do not *only* contain comments
91    ///
92    /// # This is not a block comment because
93    ///     # not all consecutive lines have the
94    /// # first `#` character in the same column
95    ///
96    /// """
97    /// # This is not a block comment because it is
98    /// # contained within a multi-line string/comment
99    /// """
100    /// ```
101    pub fn block_comments(&self, source: &str) -> Vec<TextSize> {
102        let mut block_comments: Vec<TextSize> = Vec::new();
103
104        let mut current_block: Vec<TextSize> = Vec::new();
105        let mut current_block_column: Option<TextSize> = None;
106        let mut current_block_non_empty = false;
107
108        let mut prev_line_end = None;
109
110        for comment_range in &self.raw {
111            let offset = comment_range.start();
112            let line_start = source.line_start(offset);
113            let line_end = source.full_line_end(offset);
114            let column = offset - line_start;
115
116            // If this is an end-of-line comment, reset the current block.
117            if !Self::is_own_line(offset, source) {
118                // Push the current block, and reset.
119                if current_block.len() > 1 && current_block_non_empty {
120                    block_comments.extend(current_block);
121                }
122                current_block = vec![];
123                current_block_column = None;
124                current_block_non_empty = false;
125                prev_line_end = Some(line_end);
126                continue;
127            }
128
129            // If there's a blank line between this comment and the previous
130            // comment, reset the current block.
131            if prev_line_end.is_some_and(|prev_line_end| {
132                source.contains_line_break(TextRange::new(prev_line_end, line_start))
133            }) {
134                // Push the current block.
135                if current_block.len() > 1 && current_block_non_empty {
136                    block_comments.extend(current_block);
137                }
138
139                // Reset the block state.
140                current_block = vec![offset];
141                current_block_column = Some(column);
142                current_block_non_empty = !Self::is_empty(*comment_range, source);
143                prev_line_end = Some(line_end);
144                continue;
145            }
146
147            if let Some(current_column) = current_block_column {
148                if column == current_column {
149                    // Add the comment to the current block.
150                    current_block.push(offset);
151                    current_block_non_empty |= !Self::is_empty(*comment_range, source);
152                    prev_line_end = Some(line_end);
153                } else {
154                    // Push the current block.
155                    if current_block.len() > 1 && current_block_non_empty {
156                        block_comments.extend(current_block);
157                    }
158
159                    // Reset the block state.
160                    current_block = vec![offset];
161                    current_block_column = Some(column);
162                    current_block_non_empty = !Self::is_empty(*comment_range, source);
163                    prev_line_end = Some(line_end);
164                }
165            } else {
166                // Push the current block.
167                if current_block.len() > 1 && current_block_non_empty {
168                    block_comments.extend(current_block);
169                }
170
171                // Reset the block state.
172                current_block = vec![offset];
173                current_block_column = Some(column);
174                current_block_non_empty = !Self::is_empty(*comment_range, source);
175                prev_line_end = Some(line_end);
176            }
177        }
178
179        // Push any lingering blocks.
180        if current_block.len() > 1 && current_block_non_empty {
181            block_comments.extend(current_block);
182        }
183
184        block_comments
185    }
186
187    /// Returns `true` if the given range is an empty comment.
188    fn is_empty(range: TextRange, source: &str) -> bool {
189        source[range].chars().skip(1).all(is_python_whitespace)
190    }
191
192    /// Returns `true` if a comment is an own-line comment (as opposed to an end-of-line comment).
193    pub fn is_own_line(offset: TextSize, source: &str) -> bool {
194        let range = TextRange::new(source.line_start(offset), offset);
195        source[range].chars().all(is_python_whitespace)
196    }
197}
198
199impl Deref for CommentRanges {
200    type Target = [TextRange];
201
202    fn deref(&self) -> &Self::Target {
203        self.raw.as_slice()
204    }
205}
206
207impl Debug for CommentRanges {
208    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
209        f.debug_tuple("CommentRanges").field(&self.raw).finish()
210    }
211}
212
213impl<'a> IntoIterator for &'a CommentRanges {
214    type Item = TextRange;
215    type IntoIter = std::iter::Copied<std::slice::Iter<'a, TextRange>>;
216
217    fn into_iter(self) -> Self::IntoIter {
218        self.raw.iter().copied()
219    }
220}