Skip to main content

rumdl_lib/
doc_comment_lint.rs

1//! Linting of markdown embedded in Rust doc comments (`///` and `//!`).
2//!
3//! This module provides extraction and check-only logic for line doc comments.
4//! It is used by both the CLI and LSP to lint Rust doc comments.
5//!
6//! **Precondition:** Input content must be LF-normalized (no `\r\n`).
7//! The CLI path handles this via `normalize_line_ending`, but callers using
8//! these functions directly must normalize first.
9//!
10//! **Not supported:** Block doc comments (`/** ... */`) are not extracted.
11
12use crate::config as rumdl_config;
13use crate::lint_context::LintContext;
14use crate::rule::{LintWarning, Rule};
15
16/// The kind of doc comment: outer (`///`) or inner (`//!`).
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum DocCommentKind {
19    /// Outer doc comment (`///`)
20    Outer,
21    /// Inner doc comment (`//!`)
22    Inner,
23}
24
25/// Metadata for a single line in a doc comment block.
26#[derive(Debug, Clone)]
27pub struct DocCommentLineInfo {
28    /// Leading whitespace before the doc comment prefix (e.g. `"    "` for indented code)
29    pub leading_whitespace: String,
30    /// The doc comment prefix as it appeared in source (e.g. `"/// "`, `"///"`, `"///\t"`)
31    pub prefix: String,
32}
33
34/// A contiguous block of same-kind doc comments extracted from a Rust source file.
35#[derive(Debug, Clone)]
36pub struct DocCommentBlock {
37    /// Whether this is an outer (`///`) or inner (`//!`) doc comment.
38    pub kind: DocCommentKind,
39    /// 0-indexed line number of the first line in the original file.
40    pub start_line: usize,
41    /// 0-indexed line number of the last line in the original file (inclusive).
42    pub end_line: usize,
43    /// Byte offset of the first character of the first line in the block.
44    pub byte_start: usize,
45    /// Byte offset past the last character (including `\n`) of the last line in the block.
46    pub byte_end: usize,
47    /// Extracted markdown content with prefixes stripped.
48    pub markdown: String,
49    /// Per-line metadata for prefix restoration during fix mode.
50    pub line_metadata: Vec<DocCommentLineInfo>,
51    /// Length of leading whitespace + prefix (in bytes) for column offset remapping.
52    /// Each entry corresponds to a line in `line_metadata`.
53    pub prefix_byte_lengths: Vec<usize>,
54}
55
56/// Classify a line as a doc comment, returning the kind, leading whitespace,
57/// and the full prefix (including the conventional single space if present).
58///
59/// Returns `None` if the line is not a doc comment. A doc comment must start
60/// with optional whitespace followed by `///` or `//!`. Lines starting with
61/// `////` are regular comments (not doc comments).
62///
63/// Handles all valid rustdoc forms:
64///
65/// - `/// content` (space after prefix)
66/// - `///content` (no space — valid rustdoc, content is `content`)
67/// - `///` (bare prefix, empty content)
68/// - `///\tcontent` (tab after prefix)
69fn classify_doc_comment_line(line: &str) -> Option<(DocCommentKind, String, String)> {
70    let trimmed = line.trim_start();
71    let leading_ws = &line[..line.len() - trimmed.len()];
72
73    // `////` is NOT a doc comment (regular comment)
74    if trimmed.starts_with("////") {
75        return None;
76    }
77
78    if let Some(after) = trimmed.strip_prefix("///") {
79        // Determine the prefix: include the conventional space/tab if present
80        let prefix = if after.starts_with(' ') || after.starts_with('\t') {
81            format!("///{}", &after[..1])
82        } else {
83            "///".to_string()
84        };
85        Some((DocCommentKind::Outer, leading_ws.to_string(), prefix))
86    } else if let Some(after) = trimmed.strip_prefix("//!") {
87        let prefix = if after.starts_with(' ') || after.starts_with('\t') {
88            format!("//!{}", &after[..1])
89        } else {
90            "//!".to_string()
91        };
92        Some((DocCommentKind::Inner, leading_ws.to_string(), prefix))
93    } else {
94        None
95    }
96}
97
98/// Extract the markdown content from a doc comment line after stripping the prefix.
99fn extract_markdown_from_line(trimmed: &str, kind: DocCommentKind) -> &str {
100    let prefix = match kind {
101        DocCommentKind::Outer => "///",
102        DocCommentKind::Inner => "//!",
103    };
104
105    let after_prefix = &trimmed[prefix.len()..];
106    // Strip exactly one leading space if present (conventional rustdoc formatting)
107    if let Some(stripped) = after_prefix.strip_prefix(' ') {
108        stripped
109    } else {
110        after_prefix
111    }
112}
113
114/// Extract all doc comment blocks from Rust source code.
115///
116/// Groups contiguous same-kind doc comment lines into blocks. A block boundary
117/// occurs when:
118///
119/// - A line is not a doc comment
120/// - The doc comment kind changes (from `///` to `//!` or vice versa)
121///
122/// Each block's `markdown` field contains the extracted markdown with prefixes
123/// stripped. The `line_metadata` field preserves the original indentation and
124/// prefix for each line, enabling faithful restoration during fix mode.
125///
126/// **Precondition:** `content` must be LF-normalized (no `\r\n`).
127pub fn extract_doc_comment_blocks(content: &str) -> Vec<DocCommentBlock> {
128    let mut blocks = Vec::new();
129    let mut current_block: Option<DocCommentBlock> = None;
130    let mut byte_offset = 0;
131
132    let lines: Vec<&str> = content.split('\n').collect();
133    let num_lines = lines.len();
134
135    for (line_idx, line) in lines.iter().enumerate() {
136        let line_byte_start = byte_offset;
137        // Only add 1 for the newline if this is not the last segment
138        let has_newline = line_idx < num_lines - 1 || content.ends_with('\n');
139        let line_byte_end = byte_offset + line.len() + if has_newline { 1 } else { 0 };
140
141        if let Some((kind, leading_ws, prefix)) = classify_doc_comment_line(line) {
142            let trimmed = line.trim_start();
143            let md_content = extract_markdown_from_line(trimmed, kind);
144
145            // Compute column offset: leading whitespace bytes + prefix bytes
146            let prefix_byte_len = leading_ws.len() + prefix.len();
147
148            let line_info = DocCommentLineInfo {
149                leading_whitespace: leading_ws,
150                prefix,
151            };
152
153            match current_block.as_mut() {
154                Some(block) if block.kind == kind => {
155                    // Continue the current block
156                    block.end_line = line_idx;
157                    block.byte_end = line_byte_end;
158                    block.markdown.push('\n');
159                    block.markdown.push_str(md_content);
160                    block.line_metadata.push(line_info);
161                    block.prefix_byte_lengths.push(prefix_byte_len);
162                }
163                _ => {
164                    // Flush any existing block
165                    if let Some(block) = current_block.take() {
166                        blocks.push(block);
167                    }
168                    // Start a new block
169                    current_block = Some(DocCommentBlock {
170                        kind,
171                        start_line: line_idx,
172                        end_line: line_idx,
173                        byte_start: line_byte_start,
174                        byte_end: line_byte_end,
175                        markdown: md_content.to_string(),
176                        line_metadata: vec![line_info],
177                        prefix_byte_lengths: vec![prefix_byte_len],
178                    });
179                }
180            }
181        } else {
182            // Not a doc comment line — flush current block
183            if let Some(block) = current_block.take() {
184                blocks.push(block);
185            }
186        }
187
188        byte_offset = line_byte_end;
189    }
190
191    // Flush final block
192    if let Some(block) = current_block.take() {
193        blocks.push(block);
194    }
195
196    blocks
197}
198
199/// Rules that should be skipped when linting doc comment blocks.
200///
201/// - MD025: Multiple H1 headings are standard in rustdoc (`# Errors`, `# Examples`, `# Safety`).
202/// - MD033: HTML tags like `<div class="warning">` are required syntax for rustdoc warning blocks.
203/// - MD040: Rustdoc assumes unlabeled code blocks are Rust, so requiring language labels is noise.
204/// - MD041: "First line should be a heading" doesn't apply — doc blocks aren't standalone documents.
205/// - MD047: "File should end with a newline" doesn't apply for the same reason.
206/// - MD051: Rustdoc anchors like `#method.bar` and `#structfield.name` aren't document headings.
207/// - MD052: Intra-doc links like `[crate::io]` are rustdoc syntax, not markdown reference links.
208/// - MD054: Shortcut reference style `[crate::module]` is the canonical intra-doc link syntax.
209pub const SKIPPED_RULES: &[&str] = &["MD025", "MD033", "MD040", "MD041", "MD047", "MD051", "MD052", "MD054"];
210
211/// Check all doc comment blocks in a Rust source file and return lint warnings.
212///
213/// Warnings have their line numbers and column numbers remapped to point to the
214/// correct location in the original Rust file. Fix suggestions are stripped
215/// (fixes are only applied through the fix mode path in the binary crate).
216///
217/// Empty doc comment blocks (only whitespace content) are skipped.
218pub fn check_doc_comment_blocks(
219    content: &str,
220    rules: &[Box<dyn Rule>],
221    config: &rumdl_config::Config,
222) -> Vec<LintWarning> {
223    let blocks = extract_doc_comment_blocks(content);
224    let mut all_warnings = Vec::new();
225
226    for block in &blocks {
227        // Skip empty blocks to avoid spurious warnings
228        if block.markdown.trim().is_empty() {
229            continue;
230        }
231
232        let ctx = LintContext::new(&block.markdown, config.markdown_flavor(), None);
233
234        for rule in rules {
235            if SKIPPED_RULES.contains(&rule.name()) {
236                continue;
237            }
238
239            if let Ok(rule_warnings) = rule.check(&ctx) {
240                for warning in rule_warnings {
241                    // Remap line numbers:
242                    // warning.line is 1-indexed within the block markdown
243                    // block.start_line is 0-indexed in the file
244                    // (1-indexed block) + (0-indexed file start) = 1-indexed file line
245                    let file_line = warning.line + block.start_line;
246                    let file_end_line = warning.end_line + block.start_line;
247
248                    // Remap column: add the prefix byte length for the corresponding line
249                    let block_line_idx = warning.line.saturating_sub(1);
250                    let col_offset = block.prefix_byte_lengths.get(block_line_idx).copied().unwrap_or(0);
251                    let file_column = warning.column + col_offset;
252
253                    let block_end_line_idx = warning.end_line.saturating_sub(1);
254                    let end_col_offset = block.prefix_byte_lengths.get(block_end_line_idx).copied().unwrap_or(0);
255                    let file_end_column = warning.end_column + end_col_offset;
256
257                    all_warnings.push(LintWarning {
258                        line: file_line,
259                        end_line: file_end_line,
260                        column: file_column,
261                        end_column: file_end_column,
262                        fix: None,
263                        ..warning
264                    });
265                }
266            }
267        }
268    }
269
270    all_warnings
271}
272
273#[cfg(test)]
274mod tests {
275    use super::*;
276
277    #[test]
278    fn test_classify_outer_doc_comment() {
279        let (kind, ws, prefix) = classify_doc_comment_line("/// Hello").unwrap();
280        assert_eq!(kind, DocCommentKind::Outer);
281        assert_eq!(ws, "");
282        assert_eq!(prefix, "/// ");
283    }
284
285    #[test]
286    fn test_classify_inner_doc_comment() {
287        let (kind, ws, prefix) = classify_doc_comment_line("//! Module doc").unwrap();
288        assert_eq!(kind, DocCommentKind::Inner);
289        assert_eq!(ws, "");
290        assert_eq!(prefix, "//! ");
291    }
292
293    #[test]
294    fn test_classify_empty_outer() {
295        let (kind, ws, prefix) = classify_doc_comment_line("///").unwrap();
296        assert_eq!(kind, DocCommentKind::Outer);
297        assert_eq!(ws, "");
298        assert_eq!(prefix, "///");
299    }
300
301    #[test]
302    fn test_classify_empty_inner() {
303        let (kind, ws, prefix) = classify_doc_comment_line("//!").unwrap();
304        assert_eq!(kind, DocCommentKind::Inner);
305        assert_eq!(ws, "");
306        assert_eq!(prefix, "//!");
307    }
308
309    #[test]
310    fn test_classify_indented() {
311        let (kind, ws, prefix) = classify_doc_comment_line("    /// Indented").unwrap();
312        assert_eq!(kind, DocCommentKind::Outer);
313        assert_eq!(ws, "    ");
314        assert_eq!(prefix, "/// ");
315    }
316
317    #[test]
318    fn test_classify_no_space_after_prefix() {
319        // `///content` is valid rustdoc — content is "content"
320        let (kind, ws, prefix) = classify_doc_comment_line("///content").unwrap();
321        assert_eq!(kind, DocCommentKind::Outer);
322        assert_eq!(ws, "");
323        assert_eq!(prefix, "///");
324    }
325
326    #[test]
327    fn test_classify_tab_after_prefix() {
328        let (kind, ws, prefix) = classify_doc_comment_line("///\tcontent").unwrap();
329        assert_eq!(kind, DocCommentKind::Outer);
330        assert_eq!(ws, "");
331        assert_eq!(prefix, "///\t");
332    }
333
334    #[test]
335    fn test_classify_inner_no_space() {
336        let (kind, _, prefix) = classify_doc_comment_line("//!content").unwrap();
337        assert_eq!(kind, DocCommentKind::Inner);
338        assert_eq!(prefix, "//!");
339    }
340
341    #[test]
342    fn test_classify_four_slashes_is_not_doc() {
343        assert!(classify_doc_comment_line("//// Not a doc comment").is_none());
344    }
345
346    #[test]
347    fn test_classify_regular_comment() {
348        assert!(classify_doc_comment_line("// Regular comment").is_none());
349    }
350
351    #[test]
352    fn test_classify_code_line() {
353        assert!(classify_doc_comment_line("let x = 3;").is_none());
354    }
355
356    #[test]
357    fn test_extract_no_space_content() {
358        let content = "///no space here\n";
359        let blocks = extract_doc_comment_blocks(content);
360        assert_eq!(blocks.len(), 1);
361        assert_eq!(blocks[0].markdown, "no space here");
362    }
363
364    #[test]
365    fn test_extract_basic_outer_block() {
366        let content = "/// First line\n/// Second line\nfn foo() {}\n";
367        let blocks = extract_doc_comment_blocks(content);
368        assert_eq!(blocks.len(), 1);
369        assert_eq!(blocks[0].kind, DocCommentKind::Outer);
370        assert_eq!(blocks[0].start_line, 0);
371        assert_eq!(blocks[0].end_line, 1);
372        assert_eq!(blocks[0].markdown, "First line\nSecond line");
373        assert_eq!(blocks[0].line_metadata.len(), 2);
374    }
375
376    #[test]
377    fn test_extract_basic_inner_block() {
378        let content = "//! Module doc\n//! More info\n\nuse std::io;\n";
379        let blocks = extract_doc_comment_blocks(content);
380        assert_eq!(blocks.len(), 1);
381        assert_eq!(blocks[0].kind, DocCommentKind::Inner);
382        assert_eq!(blocks[0].markdown, "Module doc\nMore info");
383    }
384
385    #[test]
386    fn test_extract_multiple_blocks() {
387        let content = "/// Block 1\nfn foo() {}\n/// Block 2\nfn bar() {}\n";
388        let blocks = extract_doc_comment_blocks(content);
389        assert_eq!(blocks.len(), 2);
390        assert_eq!(blocks[0].markdown, "Block 1");
391        assert_eq!(blocks[0].start_line, 0);
392        assert_eq!(blocks[1].markdown, "Block 2");
393        assert_eq!(blocks[1].start_line, 2);
394    }
395
396    #[test]
397    fn test_extract_mixed_kinds_separate_blocks() {
398        let content = "//! Inner\n/// Outer\n";
399        let blocks = extract_doc_comment_blocks(content);
400        assert_eq!(blocks.len(), 2);
401        assert_eq!(blocks[0].kind, DocCommentKind::Inner);
402        assert_eq!(blocks[1].kind, DocCommentKind::Outer);
403    }
404
405    #[test]
406    fn test_extract_empty_doc_line() {
407        let content = "/// First\n///\n/// Third\n";
408        let blocks = extract_doc_comment_blocks(content);
409        assert_eq!(blocks.len(), 1);
410        assert_eq!(blocks[0].markdown, "First\n\nThird");
411    }
412
413    #[test]
414    fn test_extract_preserves_extra_space() {
415        let content = "///  Two spaces\n";
416        let blocks = extract_doc_comment_blocks(content);
417        assert_eq!(blocks.len(), 1);
418        assert_eq!(blocks[0].markdown, " Two spaces");
419    }
420
421    #[test]
422    fn test_extract_indented_doc_comments() {
423        let content = "    /// Indented\n    /// More\n";
424        let blocks = extract_doc_comment_blocks(content);
425        assert_eq!(blocks.len(), 1);
426        assert_eq!(blocks[0].markdown, "Indented\nMore");
427        assert_eq!(blocks[0].line_metadata[0].leading_whitespace, "    ");
428    }
429
430    #[test]
431    fn test_no_doc_comments() {
432        let content = "fn main() {\n    let x = 3;\n}\n";
433        let blocks = extract_doc_comment_blocks(content);
434        assert!(blocks.is_empty());
435    }
436
437    #[test]
438    fn test_byte_offsets() {
439        let content = "/// Hello\nfn foo() {}\n/// World\n";
440        let blocks = extract_doc_comment_blocks(content);
441        assert_eq!(blocks.len(), 2);
442        // First block: "/// Hello\n" = 10 bytes
443        assert_eq!(blocks[0].byte_start, 0);
444        assert_eq!(blocks[0].byte_end, 10);
445        // Second block starts after "fn foo() {}\n" (12 bytes), at offset 22
446        assert_eq!(blocks[1].byte_start, 22);
447        assert_eq!(blocks[1].byte_end, 32);
448    }
449
450    #[test]
451    fn test_byte_offsets_no_trailing_newline() {
452        let content = "/// Hello";
453        let blocks = extract_doc_comment_blocks(content);
454        assert_eq!(blocks.len(), 1);
455        assert_eq!(blocks[0].byte_start, 0);
456        // No trailing newline, so byte_end == content.len()
457        assert_eq!(blocks[0].byte_end, content.len());
458    }
459
460    #[test]
461    fn test_prefix_byte_lengths() {
462        let content = "    /// Indented\n/// Top-level\n";
463        let blocks = extract_doc_comment_blocks(content);
464        assert_eq!(blocks.len(), 1);
465        // "    " (4) + "/// " (4) = 8 bytes for first line
466        assert_eq!(blocks[0].prefix_byte_lengths[0], 8);
467        // "" (0) + "/// " (4) = 4 bytes for second line
468        assert_eq!(blocks[0].prefix_byte_lengths[1], 4);
469    }
470}