rumdl_lib/
doc_comment_lint.rs

1//! Linting of markdown embedded in Rust doc comments (`///` and `//!`).
2//!
3//! This module provides extraction and check-only logic for line doc comments.
4//! It is used by both the CLI and LSP to lint Rust doc comments.
5//!
6//! **Precondition:** Input content must be LF-normalized (no `\r\n`).
7//! The CLI path handles this via `normalize_line_ending`, but callers using
8//! these functions directly must normalize first.
9//!
10//! **Not supported:** Block doc comments (`/** ... */`) are not extracted.
11
12use crate::config as rumdl_config;
13use crate::lint_context::LintContext;
14use crate::rule::{LintWarning, Rule};
15use crate::rules::md013_line_length::MD013LineLength;
16
17/// The kind of doc comment: outer (`///`) or inner (`//!`).
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum DocCommentKind {
20    /// Outer doc comment (`///`)
21    Outer,
22    /// Inner doc comment (`//!`)
23    Inner,
24}
25
26/// Metadata for a single line in a doc comment block.
27#[derive(Debug, Clone)]
28pub struct DocCommentLineInfo {
29    /// Leading whitespace before the doc comment prefix (e.g. `"    "` for indented code)
30    pub leading_whitespace: String,
31    /// The doc comment prefix as it appeared in source (e.g. `"/// "`, `"///"`, `"///\t"`)
32    pub prefix: String,
33}
34
35/// A contiguous block of same-kind doc comments extracted from a Rust source file.
36#[derive(Debug, Clone)]
37pub struct DocCommentBlock {
38    /// Whether this is an outer (`///`) or inner (`//!`) doc comment.
39    pub kind: DocCommentKind,
40    /// 0-indexed line number of the first line in the original file.
41    pub start_line: usize,
42    /// 0-indexed line number of the last line in the original file (inclusive).
43    pub end_line: usize,
44    /// Byte offset of the first character of the first line in the block.
45    pub byte_start: usize,
46    /// Byte offset past the last character (including `\n`) of the last line in the block.
47    pub byte_end: usize,
48    /// Extracted markdown content with prefixes stripped.
49    pub markdown: String,
50    /// Per-line metadata for prefix restoration during fix mode.
51    pub line_metadata: Vec<DocCommentLineInfo>,
52    /// Length of leading whitespace + prefix (in bytes) for column offset remapping.
53    /// Each entry corresponds to a line in `line_metadata`.
54    pub prefix_byte_lengths: Vec<usize>,
55}
56
57/// Classify a line as a doc comment, returning the kind, leading whitespace,
58/// and the full prefix (including the conventional single space if present).
59///
60/// Returns `None` if the line is not a doc comment. A doc comment must start
61/// with optional whitespace followed by `///` or `//!`. Lines starting with
62/// `////` are regular comments (not doc comments).
63///
64/// Handles all valid rustdoc forms:
65///
66/// - `/// content` (space after prefix)
67/// - `///content` (no space — valid rustdoc, content is `content`)
68/// - `///` (bare prefix, empty content)
69/// - `///\tcontent` (tab after prefix)
70fn classify_doc_comment_line(line: &str) -> Option<(DocCommentKind, String, String)> {
71    let trimmed = line.trim_start();
72    let leading_ws = &line[..line.len() - trimmed.len()];
73
74    // `////` is NOT a doc comment (regular comment)
75    if trimmed.starts_with("////") {
76        return None;
77    }
78
79    if let Some(after) = trimmed.strip_prefix("///") {
80        // Determine the prefix: include the conventional space/tab if present
81        let prefix = if after.starts_with(' ') || after.starts_with('\t') {
82            format!("///{}", &after[..1])
83        } else {
84            "///".to_string()
85        };
86        Some((DocCommentKind::Outer, leading_ws.to_string(), prefix))
87    } else if let Some(after) = trimmed.strip_prefix("//!") {
88        let prefix = if after.starts_with(' ') || after.starts_with('\t') {
89            format!("//!{}", &after[..1])
90        } else {
91            "//!".to_string()
92        };
93        Some((DocCommentKind::Inner, leading_ws.to_string(), prefix))
94    } else {
95        None
96    }
97}
98
99/// Extract the markdown content from a doc comment line after stripping the prefix.
100fn extract_markdown_from_line(trimmed: &str, kind: DocCommentKind) -> &str {
101    let prefix = match kind {
102        DocCommentKind::Outer => "///",
103        DocCommentKind::Inner => "//!",
104    };
105
106    let after_prefix = &trimmed[prefix.len()..];
107    // Strip exactly one leading space if present (conventional rustdoc formatting)
108    if let Some(stripped) = after_prefix.strip_prefix(' ') {
109        stripped
110    } else {
111        after_prefix
112    }
113}
114
115/// Extract all doc comment blocks from Rust source code.
116///
117/// Groups contiguous same-kind doc comment lines into blocks. A block boundary
118/// occurs when:
119///
120/// - A line is not a doc comment
121/// - The doc comment kind changes (from `///` to `//!` or vice versa)
122///
123/// Each block's `markdown` field contains the extracted markdown with prefixes
124/// stripped. The `line_metadata` field preserves the original indentation and
125/// prefix for each line, enabling faithful restoration during fix mode.
126///
127/// **Precondition:** `content` must be LF-normalized (no `\r\n`).
128pub fn extract_doc_comment_blocks(content: &str) -> Vec<DocCommentBlock> {
129    let mut blocks = Vec::new();
130    let mut current_block: Option<DocCommentBlock> = None;
131    let mut byte_offset = 0;
132
133    let lines: Vec<&str> = content.split('\n').collect();
134    let num_lines = lines.len();
135
136    for (line_idx, line) in lines.iter().enumerate() {
137        let line_byte_start = byte_offset;
138        // Only add 1 for the newline if this is not the last segment
139        let has_newline = line_idx < num_lines - 1 || content.ends_with('\n');
140        let line_byte_end = byte_offset + line.len() + if has_newline { 1 } else { 0 };
141
142        if let Some((kind, leading_ws, prefix)) = classify_doc_comment_line(line) {
143            let trimmed = line.trim_start();
144            let md_content = extract_markdown_from_line(trimmed, kind);
145
146            // Compute column offset: leading whitespace bytes + prefix bytes
147            let prefix_byte_len = leading_ws.len() + prefix.len();
148
149            let line_info = DocCommentLineInfo {
150                leading_whitespace: leading_ws,
151                prefix,
152            };
153
154            match current_block.as_mut() {
155                Some(block) if block.kind == kind => {
156                    // Continue the current block
157                    block.end_line = line_idx;
158                    block.byte_end = line_byte_end;
159                    block.markdown.push('\n');
160                    block.markdown.push_str(md_content);
161                    block.line_metadata.push(line_info);
162                    block.prefix_byte_lengths.push(prefix_byte_len);
163                }
164                _ => {
165                    // Flush any existing block
166                    if let Some(block) = current_block.take() {
167                        blocks.push(block);
168                    }
169                    // Start a new block
170                    current_block = Some(DocCommentBlock {
171                        kind,
172                        start_line: line_idx,
173                        end_line: line_idx,
174                        byte_start: line_byte_start,
175                        byte_end: line_byte_end,
176                        markdown: md_content.to_string(),
177                        line_metadata: vec![line_info],
178                        prefix_byte_lengths: vec![prefix_byte_len],
179                    });
180                }
181            }
182        } else {
183            // Not a doc comment line — flush current block
184            if let Some(block) = current_block.take() {
185                blocks.push(block);
186            }
187        }
188
189        byte_offset = line_byte_end;
190    }
191
192    // Flush final block
193    if let Some(block) = current_block.take() {
194        blocks.push(block);
195    }
196
197    blocks
198}
199
200/// Rules that should be skipped when linting doc comment blocks.
201///
202/// - MD025: Multiple H1 headings are standard in rustdoc (`# Errors`, `# Examples`, `# Safety`).
203/// - MD033: HTML tags like `<div class="warning">` are required syntax for rustdoc warning blocks.
204/// - MD040: Rustdoc assumes unlabeled code blocks are Rust, so requiring language labels is noise.
205/// - MD041: "First line should be a heading" doesn't apply — doc blocks aren't standalone documents.
206/// - MD047: "File should end with a newline" doesn't apply for the same reason.
207/// - MD051: Rustdoc anchors like `#method.bar` and `#structfield.name` aren't document headings.
208/// - MD052: Intra-doc links like `[crate::io]` are rustdoc syntax, not markdown reference links.
209/// - MD054: Shortcut reference style `[crate::module]` is the canonical intra-doc link syntax.
210pub const SKIPPED_RULES: &[&str] = &["MD025", "MD033", "MD040", "MD041", "MD047", "MD051", "MD052", "MD054"];
211
212/// Check all doc comment blocks in a Rust source file and return lint warnings.
213///
214/// Warnings have their line numbers and column numbers remapped to point to the
215/// correct location in the original Rust file. Fix suggestions are stripped
216/// (fixes are only applied through the fix mode path in the binary crate).
217///
218/// Empty doc comment blocks (only whitespace content) are skipped.
219pub fn check_doc_comment_blocks(
220    content: &str,
221    rules: &[Box<dyn Rule>],
222    config: &rumdl_config::Config,
223) -> Vec<LintWarning> {
224    let blocks = extract_doc_comment_blocks(content);
225    let mut all_warnings = Vec::new();
226
227    for block in &blocks {
228        // Skip empty blocks to avoid spurious warnings
229        if block.markdown.trim().is_empty() {
230            continue;
231        }
232
233        let ctx = LintContext::new(&block.markdown, config.markdown_flavor(), None);
234
235        for rule in rules {
236            if SKIPPED_RULES.contains(&rule.name()) {
237                continue;
238            }
239
240            // For MD013 in doc comments, disable code block checking.
241            // Code blocks contain Rust code formatted by rustfmt (max_width = 100),
242            // not prose governed by markdown line length limits.
243            let doc_rule: Box<dyn Rule>;
244            let effective_rule: &dyn Rule = if rule.name() == "MD013" {
245                if let Some(md013) = rule.as_any().downcast_ref::<MD013LineLength>() {
246                    doc_rule = Box::new(md013.with_code_blocks_disabled());
247                    doc_rule.as_ref()
248                } else {
249                    rule.as_ref()
250                }
251            } else {
252                rule.as_ref()
253            };
254
255            if let Ok(rule_warnings) = effective_rule.check(&ctx) {
256                for warning in rule_warnings {
257                    // Remap line numbers:
258                    // warning.line is 1-indexed within the block markdown
259                    // block.start_line is 0-indexed in the file
260                    // (1-indexed block) + (0-indexed file start) = 1-indexed file line
261                    let file_line = warning.line + block.start_line;
262                    let file_end_line = warning.end_line + block.start_line;
263
264                    // Remap column: add the prefix byte length for the corresponding line
265                    let block_line_idx = warning.line.saturating_sub(1);
266                    let col_offset = block.prefix_byte_lengths.get(block_line_idx).copied().unwrap_or(0);
267                    let file_column = warning.column + col_offset;
268
269                    let block_end_line_idx = warning.end_line.saturating_sub(1);
270                    let end_col_offset = block.prefix_byte_lengths.get(block_end_line_idx).copied().unwrap_or(0);
271                    let file_end_column = warning.end_column + end_col_offset;
272
273                    all_warnings.push(LintWarning {
274                        line: file_line,
275                        end_line: file_end_line,
276                        column: file_column,
277                        end_column: file_end_column,
278                        fix: None,
279                        ..warning
280                    });
281                }
282            }
283        }
284    }
285
286    all_warnings
287}
288
289#[cfg(test)]
290mod tests {
291    use super::*;
292
293    #[test]
294    fn test_classify_outer_doc_comment() {
295        let (kind, ws, prefix) = classify_doc_comment_line("/// Hello").unwrap();
296        assert_eq!(kind, DocCommentKind::Outer);
297        assert_eq!(ws, "");
298        assert_eq!(prefix, "/// ");
299    }
300
301    #[test]
302    fn test_classify_inner_doc_comment() {
303        let (kind, ws, prefix) = classify_doc_comment_line("//! Module doc").unwrap();
304        assert_eq!(kind, DocCommentKind::Inner);
305        assert_eq!(ws, "");
306        assert_eq!(prefix, "//! ");
307    }
308
309    #[test]
310    fn test_classify_empty_outer() {
311        let (kind, ws, prefix) = classify_doc_comment_line("///").unwrap();
312        assert_eq!(kind, DocCommentKind::Outer);
313        assert_eq!(ws, "");
314        assert_eq!(prefix, "///");
315    }
316
317    #[test]
318    fn test_classify_empty_inner() {
319        let (kind, ws, prefix) = classify_doc_comment_line("//!").unwrap();
320        assert_eq!(kind, DocCommentKind::Inner);
321        assert_eq!(ws, "");
322        assert_eq!(prefix, "//!");
323    }
324
325    #[test]
326    fn test_classify_indented() {
327        let (kind, ws, prefix) = classify_doc_comment_line("    /// Indented").unwrap();
328        assert_eq!(kind, DocCommentKind::Outer);
329        assert_eq!(ws, "    ");
330        assert_eq!(prefix, "/// ");
331    }
332
333    #[test]
334    fn test_classify_no_space_after_prefix() {
335        // `///content` is valid rustdoc — content is "content"
336        let (kind, ws, prefix) = classify_doc_comment_line("///content").unwrap();
337        assert_eq!(kind, DocCommentKind::Outer);
338        assert_eq!(ws, "");
339        assert_eq!(prefix, "///");
340    }
341
342    #[test]
343    fn test_classify_tab_after_prefix() {
344        let (kind, ws, prefix) = classify_doc_comment_line("///\tcontent").unwrap();
345        assert_eq!(kind, DocCommentKind::Outer);
346        assert_eq!(ws, "");
347        assert_eq!(prefix, "///\t");
348    }
349
350    #[test]
351    fn test_classify_inner_no_space() {
352        let (kind, _, prefix) = classify_doc_comment_line("//!content").unwrap();
353        assert_eq!(kind, DocCommentKind::Inner);
354        assert_eq!(prefix, "//!");
355    }
356
357    #[test]
358    fn test_classify_four_slashes_is_not_doc() {
359        assert!(classify_doc_comment_line("//// Not a doc comment").is_none());
360    }
361
362    #[test]
363    fn test_classify_regular_comment() {
364        assert!(classify_doc_comment_line("// Regular comment").is_none());
365    }
366
367    #[test]
368    fn test_classify_code_line() {
369        assert!(classify_doc_comment_line("let x = 3;").is_none());
370    }
371
372    #[test]
373    fn test_extract_no_space_content() {
374        let content = "///no space here\n";
375        let blocks = extract_doc_comment_blocks(content);
376        assert_eq!(blocks.len(), 1);
377        assert_eq!(blocks[0].markdown, "no space here");
378    }
379
380    #[test]
381    fn test_extract_basic_outer_block() {
382        let content = "/// First line\n/// Second line\nfn foo() {}\n";
383        let blocks = extract_doc_comment_blocks(content);
384        assert_eq!(blocks.len(), 1);
385        assert_eq!(blocks[0].kind, DocCommentKind::Outer);
386        assert_eq!(blocks[0].start_line, 0);
387        assert_eq!(blocks[0].end_line, 1);
388        assert_eq!(blocks[0].markdown, "First line\nSecond line");
389        assert_eq!(blocks[0].line_metadata.len(), 2);
390    }
391
392    #[test]
393    fn test_extract_basic_inner_block() {
394        let content = "//! Module doc\n//! More info\n\nuse std::io;\n";
395        let blocks = extract_doc_comment_blocks(content);
396        assert_eq!(blocks.len(), 1);
397        assert_eq!(blocks[0].kind, DocCommentKind::Inner);
398        assert_eq!(blocks[0].markdown, "Module doc\nMore info");
399    }
400
401    #[test]
402    fn test_extract_multiple_blocks() {
403        let content = "/// Block 1\nfn foo() {}\n/// Block 2\nfn bar() {}\n";
404        let blocks = extract_doc_comment_blocks(content);
405        assert_eq!(blocks.len(), 2);
406        assert_eq!(blocks[0].markdown, "Block 1");
407        assert_eq!(blocks[0].start_line, 0);
408        assert_eq!(blocks[1].markdown, "Block 2");
409        assert_eq!(blocks[1].start_line, 2);
410    }
411
412    #[test]
413    fn test_extract_mixed_kinds_separate_blocks() {
414        let content = "//! Inner\n/// Outer\n";
415        let blocks = extract_doc_comment_blocks(content);
416        assert_eq!(blocks.len(), 2);
417        assert_eq!(blocks[0].kind, DocCommentKind::Inner);
418        assert_eq!(blocks[1].kind, DocCommentKind::Outer);
419    }
420
421    #[test]
422    fn test_extract_empty_doc_line() {
423        let content = "/// First\n///\n/// Third\n";
424        let blocks = extract_doc_comment_blocks(content);
425        assert_eq!(blocks.len(), 1);
426        assert_eq!(blocks[0].markdown, "First\n\nThird");
427    }
428
429    #[test]
430    fn test_extract_preserves_extra_space() {
431        let content = "///  Two spaces\n";
432        let blocks = extract_doc_comment_blocks(content);
433        assert_eq!(blocks.len(), 1);
434        assert_eq!(blocks[0].markdown, " Two spaces");
435    }
436
437    #[test]
438    fn test_extract_indented_doc_comments() {
439        let content = "    /// Indented\n    /// More\n";
440        let blocks = extract_doc_comment_blocks(content);
441        assert_eq!(blocks.len(), 1);
442        assert_eq!(blocks[0].markdown, "Indented\nMore");
443        assert_eq!(blocks[0].line_metadata[0].leading_whitespace, "    ");
444    }
445
446    #[test]
447    fn test_no_doc_comments() {
448        let content = "fn main() {\n    let x = 3;\n}\n";
449        let blocks = extract_doc_comment_blocks(content);
450        assert!(blocks.is_empty());
451    }
452
453    #[test]
454    fn test_byte_offsets() {
455        let content = "/// Hello\nfn foo() {}\n/// World\n";
456        let blocks = extract_doc_comment_blocks(content);
457        assert_eq!(blocks.len(), 2);
458        // First block: "/// Hello\n" = 10 bytes
459        assert_eq!(blocks[0].byte_start, 0);
460        assert_eq!(blocks[0].byte_end, 10);
461        // Second block starts after "fn foo() {}\n" (12 bytes), at offset 22
462        assert_eq!(blocks[1].byte_start, 22);
463        assert_eq!(blocks[1].byte_end, 32);
464    }
465
466    #[test]
467    fn test_byte_offsets_no_trailing_newline() {
468        let content = "/// Hello";
469        let blocks = extract_doc_comment_blocks(content);
470        assert_eq!(blocks.len(), 1);
471        assert_eq!(blocks[0].byte_start, 0);
472        // No trailing newline, so byte_end == content.len()
473        assert_eq!(blocks[0].byte_end, content.len());
474    }
475
476    #[test]
477    fn test_prefix_byte_lengths() {
478        let content = "    /// Indented\n/// Top-level\n";
479        let blocks = extract_doc_comment_blocks(content);
480        assert_eq!(blocks.len(), 1);
481        // "    " (4) + "/// " (4) = 8 bytes for first line
482        assert_eq!(blocks[0].prefix_byte_lengths[0], 8);
483        // "" (0) + "/// " (4) = 4 bytes for second line
484        assert_eq!(blocks[0].prefix_byte_lengths[1], 4);
485    }
486}
rumdl_lib/doc_comment_lint.rs

rumdl_lib/
doc_comment_lint.rs