Skip to main content

rumdl_lib/
doc_comment_lint.rs

1//! Linting of markdown embedded in Rust doc comments (`///` and `//!`).
2//!
3//! This module provides extraction and check-only logic for line doc comments.
4//! It is used by both the CLI and LSP to lint Rust doc comments.
5//!
6//! **Precondition:** Input content must be LF-normalized (no `\r\n`).
7//! The CLI path handles this via `normalize_line_ending`, but callers using
8//! these functions directly must normalize first.
9//!
10//! **Not supported:** Block doc comments (`/** ... */`) are not extracted.
11
12use crate::config as rumdl_config;
13use crate::lint_context::LintContext;
14use crate::rule::{LintWarning, Rule};
15
16/// The kind of doc comment: outer (`///`) or inner (`//!`).
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum DocCommentKind {
19    /// Outer doc comment (`///`)
20    Outer,
21    /// Inner doc comment (`//!`)
22    Inner,
23}
24
25/// Metadata for a single line in a doc comment block.
26#[derive(Debug, Clone)]
27pub struct DocCommentLineInfo {
28    /// Leading whitespace before the doc comment prefix (e.g. `"    "` for indented code)
29    pub leading_whitespace: String,
30    /// The doc comment prefix as it appeared in source (e.g. `"/// "`, `"///"`, `"///\t"`)
31    pub prefix: String,
32}
33
34/// A contiguous block of same-kind doc comments extracted from a Rust source file.
35#[derive(Debug, Clone)]
36pub struct DocCommentBlock {
37    /// Whether this is an outer (`///`) or inner (`//!`) doc comment.
38    pub kind: DocCommentKind,
39    /// 0-indexed line number of the first line in the original file.
40    pub start_line: usize,
41    /// 0-indexed line number of the last line in the original file (inclusive).
42    pub end_line: usize,
43    /// Byte offset of the first character of the first line in the block.
44    pub byte_start: usize,
45    /// Byte offset past the last character (including `\n`) of the last line in the block.
46    pub byte_end: usize,
47    /// Extracted markdown content with prefixes stripped.
48    pub markdown: String,
49    /// Per-line metadata for prefix restoration during fix mode.
50    pub line_metadata: Vec<DocCommentLineInfo>,
51    /// Length of leading whitespace + prefix (in bytes) for column offset remapping.
52    /// Each entry corresponds to a line in `line_metadata`.
53    pub prefix_byte_lengths: Vec<usize>,
54}
55
56/// Classify a line as a doc comment, returning the kind, leading whitespace,
57/// and the full prefix (including the conventional single space if present).
58///
59/// Returns `None` if the line is not a doc comment. A doc comment must start
60/// with optional whitespace followed by `///` or `//!`. Lines starting with
61/// `////` are regular comments (not doc comments).
62///
63/// Handles all valid rustdoc forms:
64///
65/// - `/// content` (space after prefix)
66/// - `///content` (no space — valid rustdoc, content is `content`)
67/// - `///` (bare prefix, empty content)
68/// - `///\tcontent` (tab after prefix)
69fn classify_doc_comment_line(line: &str) -> Option<(DocCommentKind, String, String)> {
70    let trimmed = line.trim_start();
71    let leading_ws = &line[..line.len() - trimmed.len()];
72
73    // `////` is NOT a doc comment (regular comment)
74    if trimmed.starts_with("////") {
75        return None;
76    }
77
78    if let Some(after) = trimmed.strip_prefix("///") {
79        // Determine the prefix: include the conventional space/tab if present
80        let prefix = if after.starts_with(' ') || after.starts_with('\t') {
81            format!("///{}", &after[..1])
82        } else {
83            "///".to_string()
84        };
85        Some((DocCommentKind::Outer, leading_ws.to_string(), prefix))
86    } else if let Some(after) = trimmed.strip_prefix("//!") {
87        let prefix = if after.starts_with(' ') || after.starts_with('\t') {
88            format!("//!{}", &after[..1])
89        } else {
90            "//!".to_string()
91        };
92        Some((DocCommentKind::Inner, leading_ws.to_string(), prefix))
93    } else {
94        None
95    }
96}
97
98/// Extract the markdown content from a doc comment line after stripping the prefix.
99fn extract_markdown_from_line(trimmed: &str, kind: DocCommentKind) -> &str {
100    let prefix = match kind {
101        DocCommentKind::Outer => "///",
102        DocCommentKind::Inner => "//!",
103    };
104
105    let after_prefix = &trimmed[prefix.len()..];
106    // Strip exactly one leading space if present (conventional rustdoc formatting)
107    if let Some(stripped) = after_prefix.strip_prefix(' ') {
108        stripped
109    } else {
110        after_prefix
111    }
112}
113
114/// Extract all doc comment blocks from Rust source code.
115///
116/// Groups contiguous same-kind doc comment lines into blocks. A block boundary
117/// occurs when:
118///
119/// - A line is not a doc comment
120/// - The doc comment kind changes (from `///` to `//!` or vice versa)
121///
122/// Each block's `markdown` field contains the extracted markdown with prefixes
123/// stripped. The `line_metadata` field preserves the original indentation and
124/// prefix for each line, enabling faithful restoration during fix mode.
125///
126/// **Precondition:** `content` must be LF-normalized (no `\r\n`).
127pub fn extract_doc_comment_blocks(content: &str) -> Vec<DocCommentBlock> {
128    let mut blocks = Vec::new();
129    let mut current_block: Option<DocCommentBlock> = None;
130    let mut byte_offset = 0;
131
132    let lines: Vec<&str> = content.split('\n').collect();
133    let num_lines = lines.len();
134
135    for (line_idx, line) in lines.iter().enumerate() {
136        let line_byte_start = byte_offset;
137        // Only add 1 for the newline if this is not the last segment
138        let has_newline = line_idx < num_lines - 1 || content.ends_with('\n');
139        let line_byte_end = byte_offset + line.len() + if has_newline { 1 } else { 0 };
140
141        if let Some((kind, leading_ws, prefix)) = classify_doc_comment_line(line) {
142            let trimmed = line.trim_start();
143            let md_content = extract_markdown_from_line(trimmed, kind);
144
145            // Compute column offset: leading whitespace bytes + prefix bytes
146            let prefix_byte_len = leading_ws.len() + prefix.len();
147
148            let line_info = DocCommentLineInfo {
149                leading_whitespace: leading_ws,
150                prefix,
151            };
152
153            match current_block.as_mut() {
154                Some(block) if block.kind == kind => {
155                    // Continue the current block
156                    block.end_line = line_idx;
157                    block.byte_end = line_byte_end;
158                    block.markdown.push('\n');
159                    block.markdown.push_str(md_content);
160                    block.line_metadata.push(line_info);
161                    block.prefix_byte_lengths.push(prefix_byte_len);
162                }
163                _ => {
164                    // Flush any existing block
165                    if let Some(block) = current_block.take() {
166                        blocks.push(block);
167                    }
168                    // Start a new block
169                    current_block = Some(DocCommentBlock {
170                        kind,
171                        start_line: line_idx,
172                        end_line: line_idx,
173                        byte_start: line_byte_start,
174                        byte_end: line_byte_end,
175                        markdown: md_content.to_string(),
176                        line_metadata: vec![line_info],
177                        prefix_byte_lengths: vec![prefix_byte_len],
178                    });
179                }
180            }
181        } else {
182            // Not a doc comment line — flush current block
183            if let Some(block) = current_block.take() {
184                blocks.push(block);
185            }
186        }
187
188        byte_offset = line_byte_end;
189    }
190
191    // Flush final block
192    if let Some(block) = current_block.take() {
193        blocks.push(block);
194    }
195
196    blocks
197}
198
199/// Rules that should be skipped when linting doc comment blocks.
200///
201/// - MD025: Multiple H1 headings are standard in rustdoc (`# Errors`, `# Examples`, `# Safety`).
202/// - MD041: "First line should be a heading" doesn't apply — doc blocks aren't standalone documents.
203/// - MD047: "File should end with a newline" doesn't apply for the same reason.
204/// - MD052: Intra-doc links like `[crate::io]` are rustdoc syntax, not markdown reference links.
205pub const SKIPPED_RULES: &[&str] = &["MD025", "MD041", "MD047", "MD052"];
206
207/// Check all doc comment blocks in a Rust source file and return lint warnings.
208///
209/// Warnings have their line numbers and column numbers remapped to point to the
210/// correct location in the original Rust file. Fix suggestions are stripped
211/// (fixes are only applied through the fix mode path in the binary crate).
212///
213/// Empty doc comment blocks (only whitespace content) are skipped.
214pub fn check_doc_comment_blocks(
215    content: &str,
216    rules: &[Box<dyn Rule>],
217    config: &rumdl_config::Config,
218) -> Vec<LintWarning> {
219    let blocks = extract_doc_comment_blocks(content);
220    let mut all_warnings = Vec::new();
221
222    for block in &blocks {
223        // Skip empty blocks to avoid spurious warnings
224        if block.markdown.trim().is_empty() {
225            continue;
226        }
227
228        let ctx = LintContext::new(&block.markdown, config.markdown_flavor(), None);
229
230        for rule in rules {
231            if SKIPPED_RULES.contains(&rule.name()) {
232                continue;
233            }
234
235            if let Ok(rule_warnings) = rule.check(&ctx) {
236                for warning in rule_warnings {
237                    // Remap line numbers:
238                    // warning.line is 1-indexed within the block markdown
239                    // block.start_line is 0-indexed in the file
240                    // (1-indexed block) + (0-indexed file start) = 1-indexed file line
241                    let file_line = warning.line + block.start_line;
242                    let file_end_line = warning.end_line + block.start_line;
243
244                    // Remap column: add the prefix byte length for the corresponding line
245                    let block_line_idx = warning.line.saturating_sub(1);
246                    let col_offset = block.prefix_byte_lengths.get(block_line_idx).copied().unwrap_or(0);
247                    let file_column = warning.column + col_offset;
248
249                    let block_end_line_idx = warning.end_line.saturating_sub(1);
250                    let end_col_offset = block.prefix_byte_lengths.get(block_end_line_idx).copied().unwrap_or(0);
251                    let file_end_column = warning.end_column + end_col_offset;
252
253                    all_warnings.push(LintWarning {
254                        line: file_line,
255                        end_line: file_end_line,
256                        column: file_column,
257                        end_column: file_end_column,
258                        fix: None,
259                        ..warning
260                    });
261                }
262            }
263        }
264    }
265
266    all_warnings
267}
268
269#[cfg(test)]
270mod tests {
271    use super::*;
272
273    #[test]
274    fn test_classify_outer_doc_comment() {
275        let (kind, ws, prefix) = classify_doc_comment_line("/// Hello").unwrap();
276        assert_eq!(kind, DocCommentKind::Outer);
277        assert_eq!(ws, "");
278        assert_eq!(prefix, "/// ");
279    }
280
281    #[test]
282    fn test_classify_inner_doc_comment() {
283        let (kind, ws, prefix) = classify_doc_comment_line("//! Module doc").unwrap();
284        assert_eq!(kind, DocCommentKind::Inner);
285        assert_eq!(ws, "");
286        assert_eq!(prefix, "//! ");
287    }
288
289    #[test]
290    fn test_classify_empty_outer() {
291        let (kind, ws, prefix) = classify_doc_comment_line("///").unwrap();
292        assert_eq!(kind, DocCommentKind::Outer);
293        assert_eq!(ws, "");
294        assert_eq!(prefix, "///");
295    }
296
297    #[test]
298    fn test_classify_empty_inner() {
299        let (kind, ws, prefix) = classify_doc_comment_line("//!").unwrap();
300        assert_eq!(kind, DocCommentKind::Inner);
301        assert_eq!(ws, "");
302        assert_eq!(prefix, "//!");
303    }
304
305    #[test]
306    fn test_classify_indented() {
307        let (kind, ws, prefix) = classify_doc_comment_line("    /// Indented").unwrap();
308        assert_eq!(kind, DocCommentKind::Outer);
309        assert_eq!(ws, "    ");
310        assert_eq!(prefix, "/// ");
311    }
312
313    #[test]
314    fn test_classify_no_space_after_prefix() {
315        // `///content` is valid rustdoc — content is "content"
316        let (kind, ws, prefix) = classify_doc_comment_line("///content").unwrap();
317        assert_eq!(kind, DocCommentKind::Outer);
318        assert_eq!(ws, "");
319        assert_eq!(prefix, "///");
320    }
321
322    #[test]
323    fn test_classify_tab_after_prefix() {
324        let (kind, ws, prefix) = classify_doc_comment_line("///\tcontent").unwrap();
325        assert_eq!(kind, DocCommentKind::Outer);
326        assert_eq!(ws, "");
327        assert_eq!(prefix, "///\t");
328    }
329
330    #[test]
331    fn test_classify_inner_no_space() {
332        let (kind, _, prefix) = classify_doc_comment_line("//!content").unwrap();
333        assert_eq!(kind, DocCommentKind::Inner);
334        assert_eq!(prefix, "//!");
335    }
336
337    #[test]
338    fn test_classify_four_slashes_is_not_doc() {
339        assert!(classify_doc_comment_line("//// Not a doc comment").is_none());
340    }
341
342    #[test]
343    fn test_classify_regular_comment() {
344        assert!(classify_doc_comment_line("// Regular comment").is_none());
345    }
346
347    #[test]
348    fn test_classify_code_line() {
349        assert!(classify_doc_comment_line("let x = 3;").is_none());
350    }
351
352    #[test]
353    fn test_extract_no_space_content() {
354        let content = "///no space here\n";
355        let blocks = extract_doc_comment_blocks(content);
356        assert_eq!(blocks.len(), 1);
357        assert_eq!(blocks[0].markdown, "no space here");
358    }
359
360    #[test]
361    fn test_extract_basic_outer_block() {
362        let content = "/// First line\n/// Second line\nfn foo() {}\n";
363        let blocks = extract_doc_comment_blocks(content);
364        assert_eq!(blocks.len(), 1);
365        assert_eq!(blocks[0].kind, DocCommentKind::Outer);
366        assert_eq!(blocks[0].start_line, 0);
367        assert_eq!(blocks[0].end_line, 1);
368        assert_eq!(blocks[0].markdown, "First line\nSecond line");
369        assert_eq!(blocks[0].line_metadata.len(), 2);
370    }
371
372    #[test]
373    fn test_extract_basic_inner_block() {
374        let content = "//! Module doc\n//! More info\n\nuse std::io;\n";
375        let blocks = extract_doc_comment_blocks(content);
376        assert_eq!(blocks.len(), 1);
377        assert_eq!(blocks[0].kind, DocCommentKind::Inner);
378        assert_eq!(blocks[0].markdown, "Module doc\nMore info");
379    }
380
381    #[test]
382    fn test_extract_multiple_blocks() {
383        let content = "/// Block 1\nfn foo() {}\n/// Block 2\nfn bar() {}\n";
384        let blocks = extract_doc_comment_blocks(content);
385        assert_eq!(blocks.len(), 2);
386        assert_eq!(blocks[0].markdown, "Block 1");
387        assert_eq!(blocks[0].start_line, 0);
388        assert_eq!(blocks[1].markdown, "Block 2");
389        assert_eq!(blocks[1].start_line, 2);
390    }
391
392    #[test]
393    fn test_extract_mixed_kinds_separate_blocks() {
394        let content = "//! Inner\n/// Outer\n";
395        let blocks = extract_doc_comment_blocks(content);
396        assert_eq!(blocks.len(), 2);
397        assert_eq!(blocks[0].kind, DocCommentKind::Inner);
398        assert_eq!(blocks[1].kind, DocCommentKind::Outer);
399    }
400
401    #[test]
402    fn test_extract_empty_doc_line() {
403        let content = "/// First\n///\n/// Third\n";
404        let blocks = extract_doc_comment_blocks(content);
405        assert_eq!(blocks.len(), 1);
406        assert_eq!(blocks[0].markdown, "First\n\nThird");
407    }
408
409    #[test]
410    fn test_extract_preserves_extra_space() {
411        let content = "///  Two spaces\n";
412        let blocks = extract_doc_comment_blocks(content);
413        assert_eq!(blocks.len(), 1);
414        assert_eq!(blocks[0].markdown, " Two spaces");
415    }
416
417    #[test]
418    fn test_extract_indented_doc_comments() {
419        let content = "    /// Indented\n    /// More\n";
420        let blocks = extract_doc_comment_blocks(content);
421        assert_eq!(blocks.len(), 1);
422        assert_eq!(blocks[0].markdown, "Indented\nMore");
423        assert_eq!(blocks[0].line_metadata[0].leading_whitespace, "    ");
424    }
425
426    #[test]
427    fn test_no_doc_comments() {
428        let content = "fn main() {\n    let x = 3;\n}\n";
429        let blocks = extract_doc_comment_blocks(content);
430        assert!(blocks.is_empty());
431    }
432
433    #[test]
434    fn test_byte_offsets() {
435        let content = "/// Hello\nfn foo() {}\n/// World\n";
436        let blocks = extract_doc_comment_blocks(content);
437        assert_eq!(blocks.len(), 2);
438        // First block: "/// Hello\n" = 10 bytes
439        assert_eq!(blocks[0].byte_start, 0);
440        assert_eq!(blocks[0].byte_end, 10);
441        // Second block starts after "fn foo() {}\n" (12 bytes), at offset 22
442        assert_eq!(blocks[1].byte_start, 22);
443        assert_eq!(blocks[1].byte_end, 32);
444    }
445
446    #[test]
447    fn test_byte_offsets_no_trailing_newline() {
448        let content = "/// Hello";
449        let blocks = extract_doc_comment_blocks(content);
450        assert_eq!(blocks.len(), 1);
451        assert_eq!(blocks[0].byte_start, 0);
452        // No trailing newline, so byte_end == content.len()
453        assert_eq!(blocks[0].byte_end, content.len());
454    }
455
456    #[test]
457    fn test_prefix_byte_lengths() {
458        let content = "    /// Indented\n/// Top-level\n";
459        let blocks = extract_doc_comment_blocks(content);
460        assert_eq!(blocks.len(), 1);
461        // "    " (4) + "/// " (4) = 8 bytes for first line
462        assert_eq!(blocks[0].prefix_byte_lengths[0], 8);
463        // "" (0) + "/// " (4) = 4 bytes for second line
464        assert_eq!(blocks[0].prefix_byte_lengths[1], 4);
465    }
466}