Skip to main content

rumdl_lib/
doc_comment_lint.rs

1//! Linting of markdown embedded in Rust doc comments (`///` and `//!`).
2//!
3//! This module provides extraction and check-only logic for line doc comments.
4//! It is used by both the CLI and LSP to lint Rust doc comments.
5//!
6//! **Precondition:** Input content must be LF-normalized (no `\r\n`).
7//! The CLI path handles this via `normalize_line_ending`, but callers using
8//! these functions directly must normalize first.
9//!
10//! **Not supported:** Block doc comments (`/** ... */`) are not extracted.
11
12use crate::config as rumdl_config;
13use crate::lint_context::LintContext;
14use crate::rule::{LintWarning, Rule};
15
16/// The kind of doc comment: outer (`///`) or inner (`//!`).
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum DocCommentKind {
19    /// Outer doc comment (`///`)
20    Outer,
21    /// Inner doc comment (`//!`)
22    Inner,
23}
24
25/// Metadata for a single line in a doc comment block.
26#[derive(Debug, Clone)]
27pub struct DocCommentLineInfo {
28    /// Leading whitespace before the doc comment prefix (e.g. `"    "` for indented code)
29    pub leading_whitespace: String,
30    /// The doc comment prefix as it appeared in source (e.g. `"/// "`, `"///"`, `"///\t"`)
31    pub prefix: String,
32}
33
34/// A contiguous block of same-kind doc comments extracted from a Rust source file.
35#[derive(Debug, Clone)]
36pub struct DocCommentBlock {
37    /// Whether this is an outer (`///`) or inner (`//!`) doc comment.
38    pub kind: DocCommentKind,
39    /// 0-indexed line number of the first line in the original file.
40    pub start_line: usize,
41    /// 0-indexed line number of the last line in the original file (inclusive).
42    pub end_line: usize,
43    /// Byte offset of the first character of the first line in the block.
44    pub byte_start: usize,
45    /// Byte offset past the last character (including `\n`) of the last line in the block.
46    pub byte_end: usize,
47    /// Extracted markdown content with prefixes stripped.
48    pub markdown: String,
49    /// Per-line metadata for prefix restoration during fix mode.
50    pub line_metadata: Vec<DocCommentLineInfo>,
51    /// Length of leading whitespace + prefix (in bytes) for column offset remapping.
52    /// Each entry corresponds to a line in `line_metadata`.
53    pub prefix_byte_lengths: Vec<usize>,
54}
55
56/// Classify a line as a doc comment, returning the kind, leading whitespace,
57/// and the full prefix (including the conventional single space if present).
58///
59/// Returns `None` if the line is not a doc comment. A doc comment must start
60/// with optional whitespace followed by `///` or `//!`. Lines starting with
61/// `////` are regular comments (not doc comments).
62///
63/// Handles all valid rustdoc forms:
64///
65/// - `/// content` (space after prefix)
66/// - `///content` (no space — valid rustdoc, content is `content`)
67/// - `///` (bare prefix, empty content)
68/// - `///\tcontent` (tab after prefix)
69fn classify_doc_comment_line(line: &str) -> Option<(DocCommentKind, String, String)> {
70    let trimmed = line.trim_start();
71    let leading_ws = &line[..line.len() - trimmed.len()];
72
73    // `////` is NOT a doc comment (regular comment)
74    if trimmed.starts_with("////") {
75        return None;
76    }
77
78    if trimmed.starts_with("///") {
79        let after = &trimmed[3..];
80        // Determine the prefix: include the conventional space/tab if present
81        let prefix = if after.starts_with(' ') || after.starts_with('\t') {
82            format!("///{}", &after[..1])
83        } else {
84            "///".to_string()
85        };
86        Some((DocCommentKind::Outer, leading_ws.to_string(), prefix))
87    } else if trimmed.starts_with("//!") {
88        let after = &trimmed[3..];
89        let prefix = if after.starts_with(' ') || after.starts_with('\t') {
90            format!("//!{}", &after[..1])
91        } else {
92            "//!".to_string()
93        };
94        Some((DocCommentKind::Inner, leading_ws.to_string(), prefix))
95    } else {
96        None
97    }
98}
99
100/// Extract the markdown content from a doc comment line after stripping the prefix.
101fn extract_markdown_from_line(trimmed: &str, kind: DocCommentKind) -> &str {
102    let prefix = match kind {
103        DocCommentKind::Outer => "///",
104        DocCommentKind::Inner => "//!",
105    };
106
107    let after_prefix = &trimmed[prefix.len()..];
108    // Strip exactly one leading space if present (conventional rustdoc formatting)
109    if after_prefix.starts_with(' ') {
110        &after_prefix[1..]
111    } else {
112        after_prefix
113    }
114}
115
116/// Extract all doc comment blocks from Rust source code.
117///
118/// Groups contiguous same-kind doc comment lines into blocks. A block boundary
119/// occurs when:
120///
121/// - A line is not a doc comment
122/// - The doc comment kind changes (from `///` to `//!` or vice versa)
123///
124/// Each block's `markdown` field contains the extracted markdown with prefixes
125/// stripped. The `line_metadata` field preserves the original indentation and
126/// prefix for each line, enabling faithful restoration during fix mode.
127///
128/// **Precondition:** `content` must be LF-normalized (no `\r\n`).
129pub fn extract_doc_comment_blocks(content: &str) -> Vec<DocCommentBlock> {
130    let mut blocks = Vec::new();
131    let mut current_block: Option<DocCommentBlock> = None;
132    let mut byte_offset = 0;
133
134    let lines: Vec<&str> = content.split('\n').collect();
135    let num_lines = lines.len();
136
137    for (line_idx, line) in lines.iter().enumerate() {
138        let line_byte_start = byte_offset;
139        // Only add 1 for the newline if this is not the last segment
140        let has_newline = line_idx < num_lines - 1 || content.ends_with('\n');
141        let line_byte_end = byte_offset + line.len() + if has_newline { 1 } else { 0 };
142
143        if let Some((kind, leading_ws, prefix)) = classify_doc_comment_line(line) {
144            let trimmed = line.trim_start();
145            let md_content = extract_markdown_from_line(trimmed, kind);
146
147            // Compute column offset: leading whitespace bytes + prefix bytes
148            let prefix_byte_len = leading_ws.len() + prefix.len();
149
150            let line_info = DocCommentLineInfo {
151                leading_whitespace: leading_ws,
152                prefix,
153            };
154
155            match current_block.as_mut() {
156                Some(block) if block.kind == kind => {
157                    // Continue the current block
158                    block.end_line = line_idx;
159                    block.byte_end = line_byte_end;
160                    block.markdown.push('\n');
161                    block.markdown.push_str(md_content);
162                    block.line_metadata.push(line_info);
163                    block.prefix_byte_lengths.push(prefix_byte_len);
164                }
165                _ => {
166                    // Flush any existing block
167                    if let Some(block) = current_block.take() {
168                        blocks.push(block);
169                    }
170                    // Start a new block
171                    current_block = Some(DocCommentBlock {
172                        kind,
173                        start_line: line_idx,
174                        end_line: line_idx,
175                        byte_start: line_byte_start,
176                        byte_end: line_byte_end,
177                        markdown: md_content.to_string(),
178                        line_metadata: vec![line_info],
179                        prefix_byte_lengths: vec![prefix_byte_len],
180                    });
181                }
182            }
183        } else {
184            // Not a doc comment line — flush current block
185            if let Some(block) = current_block.take() {
186                blocks.push(block);
187            }
188        }
189
190        byte_offset = line_byte_end;
191    }
192
193    // Flush final block
194    if let Some(block) = current_block.take() {
195        blocks.push(block);
196    }
197
198    blocks
199}
200
201/// Rules that should be skipped when linting doc comment blocks.
202///
203/// - MD025: Multiple H1 headings are standard in rustdoc (`# Errors`, `# Examples`, `# Safety`).
204/// - MD041: "First line should be a heading" doesn't apply — doc blocks aren't standalone documents.
205/// - MD047: "File should end with a newline" doesn't apply for the same reason.
206/// - MD052: Intra-doc links like `[crate::io]` are rustdoc syntax, not markdown reference links.
207pub const SKIPPED_RULES: &[&str] = &["MD025", "MD041", "MD047", "MD052"];
208
209/// Check all doc comment blocks in a Rust source file and return lint warnings.
210///
211/// Warnings have their line numbers and column numbers remapped to point to the
212/// correct location in the original Rust file. Fix suggestions are stripped
213/// (fixes are only applied through the fix mode path in the binary crate).
214///
215/// Empty doc comment blocks (only whitespace content) are skipped.
216pub fn check_doc_comment_blocks(
217    content: &str,
218    rules: &[Box<dyn Rule>],
219    config: &rumdl_config::Config,
220) -> Vec<LintWarning> {
221    let blocks = extract_doc_comment_blocks(content);
222    let mut all_warnings = Vec::new();
223
224    for block in &blocks {
225        // Skip empty blocks to avoid spurious warnings
226        if block.markdown.trim().is_empty() {
227            continue;
228        }
229
230        let ctx = LintContext::new(&block.markdown, config.markdown_flavor(), None);
231
232        for rule in rules {
233            if SKIPPED_RULES.contains(&rule.name()) {
234                continue;
235            }
236
237            if let Ok(rule_warnings) = rule.check(&ctx) {
238                for warning in rule_warnings {
239                    // Remap line numbers:
240                    // warning.line is 1-indexed within the block markdown
241                    // block.start_line is 0-indexed in the file
242                    // (1-indexed block) + (0-indexed file start) = 1-indexed file line
243                    let file_line = warning.line + block.start_line;
244                    let file_end_line = warning.end_line + block.start_line;
245
246                    // Remap column: add the prefix byte length for the corresponding line
247                    let block_line_idx = warning.line.saturating_sub(1);
248                    let col_offset = block.prefix_byte_lengths.get(block_line_idx).copied().unwrap_or(0);
249                    let file_column = warning.column + col_offset;
250
251                    let block_end_line_idx = warning.end_line.saturating_sub(1);
252                    let end_col_offset = block.prefix_byte_lengths.get(block_end_line_idx).copied().unwrap_or(0);
253                    let file_end_column = warning.end_column + end_col_offset;
254
255                    all_warnings.push(LintWarning {
256                        line: file_line,
257                        end_line: file_end_line,
258                        column: file_column,
259                        end_column: file_end_column,
260                        fix: None,
261                        ..warning
262                    });
263                }
264            }
265        }
266    }
267
268    all_warnings
269}
270
271#[cfg(test)]
272mod tests {
273    use super::*;
274
275    #[test]
276    fn test_classify_outer_doc_comment() {
277        let (kind, ws, prefix) = classify_doc_comment_line("/// Hello").unwrap();
278        assert_eq!(kind, DocCommentKind::Outer);
279        assert_eq!(ws, "");
280        assert_eq!(prefix, "/// ");
281    }
282
283    #[test]
284    fn test_classify_inner_doc_comment() {
285        let (kind, ws, prefix) = classify_doc_comment_line("//! Module doc").unwrap();
286        assert_eq!(kind, DocCommentKind::Inner);
287        assert_eq!(ws, "");
288        assert_eq!(prefix, "//! ");
289    }
290
291    #[test]
292    fn test_classify_empty_outer() {
293        let (kind, ws, prefix) = classify_doc_comment_line("///").unwrap();
294        assert_eq!(kind, DocCommentKind::Outer);
295        assert_eq!(ws, "");
296        assert_eq!(prefix, "///");
297    }
298
299    #[test]
300    fn test_classify_empty_inner() {
301        let (kind, ws, prefix) = classify_doc_comment_line("//!").unwrap();
302        assert_eq!(kind, DocCommentKind::Inner);
303        assert_eq!(ws, "");
304        assert_eq!(prefix, "//!");
305    }
306
307    #[test]
308    fn test_classify_indented() {
309        let (kind, ws, prefix) = classify_doc_comment_line("    /// Indented").unwrap();
310        assert_eq!(kind, DocCommentKind::Outer);
311        assert_eq!(ws, "    ");
312        assert_eq!(prefix, "/// ");
313    }
314
315    #[test]
316    fn test_classify_no_space_after_prefix() {
317        // `///content` is valid rustdoc — content is "content"
318        let (kind, ws, prefix) = classify_doc_comment_line("///content").unwrap();
319        assert_eq!(kind, DocCommentKind::Outer);
320        assert_eq!(ws, "");
321        assert_eq!(prefix, "///");
322    }
323
324    #[test]
325    fn test_classify_tab_after_prefix() {
326        let (kind, ws, prefix) = classify_doc_comment_line("///\tcontent").unwrap();
327        assert_eq!(kind, DocCommentKind::Outer);
328        assert_eq!(ws, "");
329        assert_eq!(prefix, "///\t");
330    }
331
332    #[test]
333    fn test_classify_inner_no_space() {
334        let (kind, _, prefix) = classify_doc_comment_line("//!content").unwrap();
335        assert_eq!(kind, DocCommentKind::Inner);
336        assert_eq!(prefix, "//!");
337    }
338
339    #[test]
340    fn test_classify_four_slashes_is_not_doc() {
341        assert!(classify_doc_comment_line("//// Not a doc comment").is_none());
342    }
343
344    #[test]
345    fn test_classify_regular_comment() {
346        assert!(classify_doc_comment_line("// Regular comment").is_none());
347    }
348
349    #[test]
350    fn test_classify_code_line() {
351        assert!(classify_doc_comment_line("let x = 3;").is_none());
352    }
353
354    #[test]
355    fn test_extract_no_space_content() {
356        let content = "///no space here\n";
357        let blocks = extract_doc_comment_blocks(content);
358        assert_eq!(blocks.len(), 1);
359        assert_eq!(blocks[0].markdown, "no space here");
360    }
361
362    #[test]
363    fn test_extract_basic_outer_block() {
364        let content = "/// First line\n/// Second line\nfn foo() {}\n";
365        let blocks = extract_doc_comment_blocks(content);
366        assert_eq!(blocks.len(), 1);
367        assert_eq!(blocks[0].kind, DocCommentKind::Outer);
368        assert_eq!(blocks[0].start_line, 0);
369        assert_eq!(blocks[0].end_line, 1);
370        assert_eq!(blocks[0].markdown, "First line\nSecond line");
371        assert_eq!(blocks[0].line_metadata.len(), 2);
372    }
373
374    #[test]
375    fn test_extract_basic_inner_block() {
376        let content = "//! Module doc\n//! More info\n\nuse std::io;\n";
377        let blocks = extract_doc_comment_blocks(content);
378        assert_eq!(blocks.len(), 1);
379        assert_eq!(blocks[0].kind, DocCommentKind::Inner);
380        assert_eq!(blocks[0].markdown, "Module doc\nMore info");
381    }
382
383    #[test]
384    fn test_extract_multiple_blocks() {
385        let content = "/// Block 1\nfn foo() {}\n/// Block 2\nfn bar() {}\n";
386        let blocks = extract_doc_comment_blocks(content);
387        assert_eq!(blocks.len(), 2);
388        assert_eq!(blocks[0].markdown, "Block 1");
389        assert_eq!(blocks[0].start_line, 0);
390        assert_eq!(blocks[1].markdown, "Block 2");
391        assert_eq!(blocks[1].start_line, 2);
392    }
393
394    #[test]
395    fn test_extract_mixed_kinds_separate_blocks() {
396        let content = "//! Inner\n/// Outer\n";
397        let blocks = extract_doc_comment_blocks(content);
398        assert_eq!(blocks.len(), 2);
399        assert_eq!(blocks[0].kind, DocCommentKind::Inner);
400        assert_eq!(blocks[1].kind, DocCommentKind::Outer);
401    }
402
403    #[test]
404    fn test_extract_empty_doc_line() {
405        let content = "/// First\n///\n/// Third\n";
406        let blocks = extract_doc_comment_blocks(content);
407        assert_eq!(blocks.len(), 1);
408        assert_eq!(blocks[0].markdown, "First\n\nThird");
409    }
410
411    #[test]
412    fn test_extract_preserves_extra_space() {
413        let content = "///  Two spaces\n";
414        let blocks = extract_doc_comment_blocks(content);
415        assert_eq!(blocks.len(), 1);
416        assert_eq!(blocks[0].markdown, " Two spaces");
417    }
418
419    #[test]
420    fn test_extract_indented_doc_comments() {
421        let content = "    /// Indented\n    /// More\n";
422        let blocks = extract_doc_comment_blocks(content);
423        assert_eq!(blocks.len(), 1);
424        assert_eq!(blocks[0].markdown, "Indented\nMore");
425        assert_eq!(blocks[0].line_metadata[0].leading_whitespace, "    ");
426    }
427
428    #[test]
429    fn test_no_doc_comments() {
430        let content = "fn main() {\n    let x = 3;\n}\n";
431        let blocks = extract_doc_comment_blocks(content);
432        assert!(blocks.is_empty());
433    }
434
435    #[test]
436    fn test_byte_offsets() {
437        let content = "/// Hello\nfn foo() {}\n/// World\n";
438        let blocks = extract_doc_comment_blocks(content);
439        assert_eq!(blocks.len(), 2);
440        // First block: "/// Hello\n" = 10 bytes
441        assert_eq!(blocks[0].byte_start, 0);
442        assert_eq!(blocks[0].byte_end, 10);
443        // Second block starts after "fn foo() {}\n" (12 bytes), at offset 22
444        assert_eq!(blocks[1].byte_start, 22);
445        assert_eq!(blocks[1].byte_end, 32);
446    }
447
448    #[test]
449    fn test_byte_offsets_no_trailing_newline() {
450        let content = "/// Hello";
451        let blocks = extract_doc_comment_blocks(content);
452        assert_eq!(blocks.len(), 1);
453        assert_eq!(blocks[0].byte_start, 0);
454        // No trailing newline, so byte_end == content.len()
455        assert_eq!(blocks[0].byte_end, content.len());
456    }
457
458    #[test]
459    fn test_prefix_byte_lengths() {
460        let content = "    /// Indented\n/// Top-level\n";
461        let blocks = extract_doc_comment_blocks(content);
462        assert_eq!(blocks.len(), 1);
463        // "    " (4) + "/// " (4) = 8 bytes for first line
464        assert_eq!(blocks[0].prefix_byte_lengths[0], 8);
465        // "" (0) + "/// " (4) = 4 bytes for second line
466        assert_eq!(blocks[0].prefix_byte_lengths[1], 4);
467    }
468}