Skip to main content

mago_syntax/comments/
mod.rs

1use crate::ast::Trivia;
2use crate::ast::TriviaKind;
3
4pub mod docblock;
5
6/// Splits a byte slice into lines on `\n`, returning each line without its trailing
7/// `\n` (and trailing `\r` if present). Empty trailing line after a final newline is
8/// dropped to mirror `str::lines` behaviour.
9fn byte_lines(input: &[u8]) -> impl Iterator<Item = &[u8]> + '_ {
10    let mut rest = input;
11    std::iter::from_fn(move || {
12        if rest.is_empty() {
13            return None;
14        }
15        match memchr::memchr(b'\n', rest) {
16            Some(pos) => {
17                let mut line = &rest[..pos];
18                if line.last() == Some(&b'\r') {
19                    line = &line[..line.len() - 1];
20                }
21
22                rest = &rest[pos + 1..];
23                Some(line)
24            }
25            None => {
26                let line = rest;
27                rest = &[];
28                Some(line)
29            }
30        }
31    })
32}
33
34fn trim_ascii_whitespace_start(input: &[u8]) -> &[u8] {
35    let start = input.iter().position(|b| !b.is_ascii_whitespace()).unwrap_or(input.len());
36    &input[start..]
37}
38
39/// Splits a comment into lines, preserving the offset of each line from the start of the trivia.
40///
41/// This is crucial for calculating the precise `Span` of pragmas within a comment.
42///
43/// # Returns
44///
45/// A `Vec` of `(u32, &[u8])` tuples, where the `u32` is the byte offset of the
46/// line from the start of the entire trivia text (including `/**`, `//`, etc.),
47/// and the `&[u8]` is the cleaned line content.
48#[inline]
49#[must_use]
50pub fn comment_lines<'arena>(trivia: &Trivia<'arena>) -> Vec<(u32, &'arena [u8])> {
51    let full_text = trivia.value;
52    let (content_start_offset, content_end_offset) = match trivia.kind {
53        TriviaKind::MultiLineComment => (2u32, full_text.len() as u32 - 2),
54        TriviaKind::DocBlockComment => (3u32, full_text.len() as u32 - 2),
55        TriviaKind::SingleLineComment => (2u32, full_text.len() as u32),
56        TriviaKind::HashComment => (1u32, full_text.len() as u32),
57        TriviaKind::WhiteSpace => return vec![],
58    };
59
60    // Handle empty comments like `/**/` to prevent slicing panics.
61    if content_start_offset >= content_end_offset {
62        return vec![];
63    }
64
65    let content = &full_text[content_start_offset as usize..content_end_offset as usize];
66
67    let mut lines = Vec::new();
68
69    for line in byte_lines(content) {
70        // Calculate the offset of the line relative to the start of the `content` slice.
71        let relative_line_offset = (line.as_ptr() as u32) - (content.as_ptr() as u32);
72        // Add the initial offset to get the position from the start of the entire trivia string.
73        let offset_in_trivia = content_start_offset + relative_line_offset;
74
75        let cleaned_line = if trivia.kind.is_block_comment() {
76            let trimmed = trim_ascii_whitespace_start(line);
77            if let Some(stripped) = trimmed.strip_prefix(b"*") { trim_ascii_whitespace_start(stripped) } else { line }
78        } else {
79            line
80        };
81
82        // Calculate how many bytes were trimmed from the start of the original line slice.
83        let trimmed_bytes = (cleaned_line.as_ptr() as u32) - (line.as_ptr() as u32);
84        let final_offset = offset_in_trivia + trimmed_bytes;
85
86        lines.push((final_offset, cleaned_line));
87    }
88
89    lines
90}