Skip to main content

tsz_common/
comments.rs

1//! Comment Preservation
2//!
3//! This module handles extracting and emitting comments from TypeScript source.
4//! Comments are not part of the AST, so they must be extracted separately
5//! from the source text and associated with nodes for emission.
6
7use serde::{Deserialize, Serialize};
8
9/// A range representing a comment in the source text.
10#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
11pub struct CommentRange {
12    /// Start position (byte offset)
13    pub pos: u32,
14    /// End position (byte offset)
15    pub end: u32,
16    /// Whether this is a multi-line comment
17    pub is_multi_line: bool,
18    /// Whether this comment has a trailing newline
19    pub has_trailing_new_line: bool,
20}
21
22impl CommentRange {
23    /// Create a new comment range.
24    #[must_use]
25    pub const fn new(pos: u32, end: u32, is_multi_line: bool, has_trailing_new_line: bool) -> Self {
26        Self {
27            pos,
28            end,
29            is_multi_line,
30            has_trailing_new_line,
31        }
32    }
33
34    /// Get the comment text from source.
35    #[must_use]
36    pub fn get_text<'a>(&self, source: &'a str) -> &'a str {
37        let start = self.pos as usize;
38        let end = self.end as usize;
39        if end <= source.len() && start < end {
40            &source[start..end]
41        } else {
42            ""
43        }
44    }
45}
46
47/// Extract all comment ranges from source text.
48///
49/// This scans the source text and returns all single-line (//) and
50/// multi-line (/* */) comments with their positions.
51#[must_use]
52pub fn get_comment_ranges(source: &str) -> Vec<CommentRange> {
53    let mut comments = Vec::new();
54    let bytes = source.as_bytes();
55    let len = bytes.len();
56    let mut pos = 0;
57
58    while pos < len {
59        let ch = bytes[pos];
60
61        // Skip whitespace
62        if ch == b' ' || ch == b'\t' || ch == b'\r' || ch == b'\n' {
63            pos += 1;
64            continue;
65        }
66
67        // Check for comment start
68        if ch == b'/' && pos + 1 < len {
69            let next = bytes[pos + 1];
70
71            if next == b'/' {
72                // Single-line comment
73                let Ok(start) = u32::try_from(pos) else {
74                    break;
75                };
76                pos += 2;
77
78                // Scan to end of line
79                while pos < len && bytes[pos] != b'\n' && bytes[pos] != b'\r' {
80                    pos += 1;
81                }
82
83                let has_trailing_new_line = pos < len;
84                comments.push(CommentRange::new(
85                    start,
86                    u32::try_from(pos).unwrap_or(u32::MAX),
87                    false,
88                    has_trailing_new_line,
89                ));
90
91                // Skip the newline
92                if pos < len && bytes[pos] == b'\r' {
93                    pos += 1;
94                }
95                if pos < len && bytes[pos] == b'\n' {
96                    pos += 1;
97                }
98                continue;
99            } else if next == b'*' {
100                // Multi-line comment
101                let Ok(start) = u32::try_from(pos) else {
102                    break;
103                };
104                pos += 2;
105
106                // Scan to closing */
107                let mut closed = false;
108                while pos + 1 < len {
109                    if bytes[pos] == b'*' && bytes[pos + 1] == b'/' {
110                        pos += 2;
111                        closed = true;
112                        break;
113                    }
114                    pos += 1;
115                }
116
117                if !closed {
118                    pos = len; // Unclosed comment - go to end
119                }
120
121                // Check for trailing newline
122                let has_trailing_new_line =
123                    pos < len && (bytes[pos] == b'\n' || bytes[pos] == b'\r');
124
125                comments.push(CommentRange::new(
126                    start,
127                    u32::try_from(pos).unwrap_or(u32::MAX),
128                    true,
129                    has_trailing_new_line,
130                ));
131                continue;
132            }
133        }
134
135        // Not in a comment or whitespace, skip this character
136        // (In practice, we'd stop at actual code, but for simplicity
137        // we're just extracting top-level comments here)
138        pos += 1;
139    }
140
141    comments
142}
143
144/// Get leading comments before a position.
145///
146/// Returns comments that appear before `pos` and after any previous code.
147#[must_use]
148pub fn get_leading_comments(
149    _source: &str,
150    pos: u32,
151    all_comments: &[CommentRange],
152) -> Vec<CommentRange> {
153    all_comments
154        .iter()
155        .filter(|c| c.end <= pos)
156        .cloned()
157        .collect()
158}
159
160/// Get trailing comments after a position.
161///
162/// Returns comments that appear after `pos` on the same line.
163#[must_use]
164pub fn get_trailing_comments(
165    source: &str,
166    pos: u32,
167    all_comments: &[CommentRange],
168) -> Vec<CommentRange> {
169    let bytes = source.as_bytes();
170
171    // Find the next newline after pos
172    let Ok(mut line_end) = usize::try_from(pos) else {
173        return Vec::new();
174    };
175    while line_end < bytes.len() && bytes[line_end] != b'\n' && bytes[line_end] != b'\r' {
176        line_end += 1;
177    }
178
179    let line_end = u32::try_from(line_end).unwrap_or(u32::MAX);
180
181    all_comments
182        .iter()
183        .filter(|c| c.pos >= pos && c.pos < line_end && !c.is_multi_line)
184        .cloned()
185        .collect()
186}
187
188/// Format a single-line comment for output.
189#[must_use]
190pub fn format_single_line_comment(text: &str) -> String {
191    // Already includes // prefix
192    text.to_string()
193}
194
195/// Format a multi-line comment for output.
196#[must_use]
197pub fn format_multi_line_comment(text: &str, indent: &str) -> String {
198    // For multi-line comments, we need to add indentation to each line
199    let lines: Vec<&str> = text.lines().collect();
200    if lines.len() <= 1 {
201        return text.to_string();
202    }
203
204    let mut result = String::new();
205    for (i, line) in lines.iter().enumerate() {
206        if i > 0 {
207            result.push('\n');
208            // Add indentation for continuation lines (except first line)
209            if !line.trim().is_empty() {
210                result.push_str(indent);
211            }
212        }
213        result.push_str(line);
214    }
215    result
216}
217
218/// Check if a comment is a `JSDoc` comment.
219#[must_use]
220pub fn is_jsdoc_comment(comment: &CommentRange, source: &str) -> bool {
221    let text = comment.get_text(source);
222    text.starts_with("/**") && !text.starts_with("/***")
223}
224
225/// Check if a comment is a triple-slash directive.
226#[must_use]
227pub fn is_triple_slash_directive(comment: &CommentRange, source: &str) -> bool {
228    let text = comment.get_text(source);
229    text.starts_with("///")
230}
231
232/// Extract the content of a `JSDoc` comment (without the delimiters).
233#[must_use]
234pub fn get_jsdoc_content(comment: &CommentRange, source: &str) -> String {
235    let text = comment.get_text(source);
236    if text.starts_with("/**") && text.ends_with("*/") {
237        let inner = &text[3..text.len() - 2];
238        // Remove leading * from each line
239        inner
240            .lines()
241            .map(|line| {
242                let trimmed = line.trim_start();
243                if let Some(stripped) = trimmed.strip_prefix('*') {
244                    stripped.trim_start()
245                } else {
246                    trimmed
247                }
248            })
249            .collect::<Vec<_>>()
250            .join("\n")
251            .trim()
252            .to_string()
253    } else {
254        text.to_string()
255    }
256}
257
258/// Get leading comments from cached comment ranges.
259///
260/// This is an optimized version that uses pre-computed comment ranges
261/// instead of rescanning the source. Returns comments that precede the
262/// given position.
263///
264/// # Arguments
265/// * `comments` - The cached comment ranges from `SourceFileData`
266/// * `pos` - The position to find leading comments for
267///
268/// # Returns
269/// Vector of comment ranges that appear before the given position.
270/// Comments are filtered to only include those immediately preceding
271/// the position (with at most one line of whitespace between).
272#[must_use]
273pub fn get_leading_comments_from_cache(
274    comments: &[CommentRange],
275    pos: u32,
276    source: &str,
277) -> Vec<CommentRange> {
278    if comments.is_empty() {
279        return Vec::new();
280    }
281
282    // Binary search to find the partition point where comments end at or before `pos`
283    // Comments are sorted by their start position, but we need ones that *end* before pos
284    let idx = comments.partition_point(|c| c.end <= pos);
285
286    if idx == 0 {
287        return Vec::new(); // No comments before this position
288    }
289
290    let mut result: Vec<CommentRange> = Vec::new();
291
292    // Iterate backwards from the last comment that ends at or before `pos`
293    // Stop when we encounter comments that are too far away (> 2 newlines)
294    for i in (0..idx).rev() {
295        let comment = &comments[i];
296
297        // Check if there's too much whitespace between comment and target position
298        // For the first comment, check against `pos`; for subsequent ones, check against previous comment
299        let check_pos = if result.is_empty() {
300            pos
301        } else {
302            match result.last() {
303                Some(last) => last.pos,
304                None => pos,
305            }
306        };
307        let Ok(start) = usize::try_from(comment.end) else {
308            continue;
309        };
310        let Ok(end) = usize::try_from(check_pos) else {
311            continue;
312        };
313        if start > end || end > source.len() {
314            continue;
315        }
316        let Some(text_between) = source.get(start..end) else {
317            continue;
318        };
319        // Count newlines with early exit — we only need to know if count > 2
320        let mut newline_count = 0usize;
321        for byte in text_between.as_bytes() {
322            if *byte == b'\n' {
323                newline_count += 1;
324                if newline_count > 2 {
325                    break;
326                }
327            }
328        }
329
330        // Allow up to 2 newlines (JSDoc pattern: /** comment */ \n function)
331        if newline_count > 2 {
332            break;
333        }
334
335        result.push(comment.clone());
336
337        // Stop after collecting adjacent comments
338        // (if we've collected some and hit a gap, that's the boundary)
339        if newline_count >= 1 && result.len() > 1 {
340            break;
341        }
342    }
343
344    result.reverse(); // Restore original order
345    result
346}