rumdl_lib/rules/md041_first_line_heading/
mod.rs

1mod md041_config;
2
3pub use md041_config::MD041Config;
4
5use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
6use crate::rules::front_matter_utils::FrontMatterUtils;
7use crate::utils::range_utils::calculate_line_range;
8use crate::utils::regex_cache::HTML_HEADING_PATTERN;
9use regex::Regex;
10
11/// Rule MD041: First line in file should be a top-level heading
12///
13/// See [docs/md041.md](../../docs/md041.md) for full documentation, configuration, and examples.
14
15#[derive(Clone)]
16pub struct MD041FirstLineHeading {
17    pub level: usize,
18    pub front_matter_title: bool,
19    pub front_matter_title_pattern: Option<Regex>,
20}
21
22impl Default for MD041FirstLineHeading {
23    fn default() -> Self {
24        Self {
25            level: 1,
26            front_matter_title: true,
27            front_matter_title_pattern: None,
28        }
29    }
30}
31
32impl MD041FirstLineHeading {
33    pub fn new(level: usize, front_matter_title: bool) -> Self {
34        Self {
35            level,
36            front_matter_title,
37            front_matter_title_pattern: None,
38        }
39    }
40
41    pub fn with_pattern(level: usize, front_matter_title: bool, pattern: Option<String>) -> Self {
42        let front_matter_title_pattern = pattern.and_then(|p| match Regex::new(&p) {
43            Ok(regex) => Some(regex),
44            Err(e) => {
45                log::warn!("Invalid front_matter_title_pattern regex: {e}");
46                None
47            }
48        });
49
50        Self {
51            level,
52            front_matter_title,
53            front_matter_title_pattern,
54        }
55    }
56
57    fn has_front_matter_title(&self, content: &str) -> bool {
58        if !self.front_matter_title {
59            return false;
60        }
61
62        // If we have a custom pattern, use it to search front matter content
63        if let Some(ref pattern) = self.front_matter_title_pattern {
64            let front_matter_lines = FrontMatterUtils::extract_front_matter(content);
65            for line in front_matter_lines {
66                if pattern.is_match(line) {
67                    return true;
68                }
69            }
70            return false;
71        }
72
73        // Default behavior: check for "title:" field
74        FrontMatterUtils::has_front_matter_field(content, "title:")
75    }
76
77    /// Check if a line is a non-content token that should be skipped
78    fn is_non_content_line(line: &str) -> bool {
79        let trimmed = line.trim();
80
81        // Skip reference definitions
82        if trimmed.starts_with('[') && trimmed.contains("]: ") {
83            return true;
84        }
85
86        // Skip abbreviation definitions
87        if trimmed.starts_with('*') && trimmed.contains("]: ") {
88            return true;
89        }
90
91        false
92    }
93
94    /// Check if a line is an HTML heading
95    fn is_html_heading(line: &str, level: usize) -> bool {
96        if let Ok(Some(captures)) = HTML_HEADING_PATTERN.captures(line.trim())
97            && let Some(h_level) = captures.get(1)
98        {
99            return h_level.as_str().parse::<usize>().unwrap_or(0) == level;
100        }
101        false
102    }
103}
104
105impl Rule for MD041FirstLineHeading {
106    fn name(&self) -> &'static str {
107        "MD041"
108    }
109
110    fn description(&self) -> &'static str {
111        "First line in file should be a top level heading"
112    }
113
114    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
115        let mut warnings = Vec::new();
116
117        // Check if we should skip this file
118        if self.should_skip(ctx) {
119            return Ok(warnings);
120        }
121
122        // Find the first non-blank line after front matter using cached info
123        let mut first_content_line_num = None;
124        let mut skip_lines = 0;
125
126        // Check for front matter
127        if ctx.lines.first().map(|l| l.content(ctx.content).trim()) == Some("---") {
128            // Skip front matter
129            for (idx, line_info) in ctx.lines.iter().enumerate().skip(1) {
130                if line_info.content(ctx.content).trim() == "---" {
131                    skip_lines = idx + 1;
132                    break;
133                }
134            }
135        }
136
137        for (line_num, line_info) in ctx.lines.iter().enumerate().skip(skip_lines) {
138            let line_content = line_info.content(ctx.content).trim();
139            // Skip ESM blocks in MDX files (import/export statements)
140            if line_info.in_esm_block {
141                continue;
142            }
143            if !line_content.is_empty() && !Self::is_non_content_line(line_info.content(ctx.content)) {
144                first_content_line_num = Some(line_num);
145                break;
146            }
147        }
148
149        if first_content_line_num.is_none() {
150            // No non-blank lines after front matter
151            return Ok(warnings);
152        }
153
154        let first_line_idx = first_content_line_num.unwrap();
155
156        // Check if the first non-blank line is a heading of the required level
157        let first_line_info = &ctx.lines[first_line_idx];
158        let is_correct_heading = if let Some(heading) = &first_line_info.heading {
159            heading.level as usize == self.level
160        } else {
161            // Check for HTML heading
162            Self::is_html_heading(first_line_info.content(ctx.content), self.level)
163        };
164
165        if !is_correct_heading {
166            // Calculate precise character range for the entire first line
167            let first_line = first_line_idx + 1; // Convert to 1-indexed
168            let first_line_content = first_line_info.content(ctx.content);
169            let (start_line, start_col, end_line, end_col) = calculate_line_range(first_line, first_line_content);
170
171            warnings.push(LintWarning {
172                rule_name: Some(self.name().to_string()),
173                line: start_line,
174                column: start_col,
175                end_line,
176                end_column: end_col,
177                message: format!("First line in file should be a level {} heading", self.level),
178                severity: Severity::Warning,
179                fix: None, // MD041 no longer provides auto-fix suggestions
180            });
181        }
182        Ok(warnings)
183    }
184
185    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
186        // MD041 should not auto-fix - adding content/titles is a decision that should be made by the document author
187        // This rule now only detects and warns about missing titles, but does not automatically add them
188        Ok(ctx.content.to_string())
189    }
190
191    /// Check if this rule should be skipped
192    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
193        // Skip files that are purely preprocessor directives (e.g., mdBook includes).
194        // These files are composition/routing metadata, not standalone content.
195        // Example: A file containing only "{{#include ../../README.md}}" is a
196        // pointer to content, not content itself, and shouldn't need a heading.
197        let only_directives = !ctx.content.is_empty()
198            && ctx.content.lines().filter(|l| !l.trim().is_empty()).all(|l| {
199                let t = l.trim();
200                // mdBook directives: {{#include}}, {{#playground}}, {{#rustdoc_include}}, etc.
201                (t.starts_with("{{#") && t.ends_with("}}"))
202                        // HTML comments often accompany directives
203                        || (t.starts_with("<!--") && t.ends_with("-->"))
204            });
205
206        ctx.content.is_empty()
207            || (self.front_matter_title && self.has_front_matter_title(ctx.content))
208            || only_directives
209    }
210
211    fn as_any(&self) -> &dyn std::any::Any {
212        self
213    }
214
215    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
216    where
217        Self: Sized,
218    {
219        // Load config using serde with kebab-case support
220        let md041_config = crate::rule_config_serde::load_rule_config::<MD041Config>(config);
221
222        let use_front_matter = !md041_config.front_matter_title.is_empty();
223
224        Box::new(MD041FirstLineHeading::with_pattern(
225            md041_config.level.as_usize(),
226            use_front_matter,
227            md041_config.front_matter_title_pattern,
228        ))
229    }
230
231    fn default_config_section(&self) -> Option<(String, toml::Value)> {
232        Some((
233            "MD041".to_string(),
234            toml::toml! {
235                level = 1
236                front-matter-title = "title"
237                front-matter-title-pattern = ""
238            }
239            .into(),
240        ))
241    }
242}
243
244#[cfg(test)]
245mod tests {
246    use super::*;
247    use crate::lint_context::LintContext;
248
249    #[test]
250    fn test_first_line_is_heading_correct_level() {
251        let rule = MD041FirstLineHeading::default();
252
253        // First line is a level 1 heading (should pass)
254        let content = "# My Document\n\nSome content here.";
255        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
256        let result = rule.check(&ctx).unwrap();
257        assert!(
258            result.is_empty(),
259            "Expected no warnings when first line is a level 1 heading"
260        );
261    }
262
263    #[test]
264    fn test_first_line_is_heading_wrong_level() {
265        let rule = MD041FirstLineHeading::default();
266
267        // First line is a level 2 heading (should fail with level 1 requirement)
268        let content = "## My Document\n\nSome content here.";
269        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
270        let result = rule.check(&ctx).unwrap();
271        assert_eq!(result.len(), 1);
272        assert_eq!(result[0].line, 1);
273        assert!(result[0].message.contains("level 1 heading"));
274    }
275
276    #[test]
277    fn test_first_line_not_heading() {
278        let rule = MD041FirstLineHeading::default();
279
280        // First line is plain text (should fail)
281        let content = "This is not a heading\n\n# This is a heading";
282        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
283        let result = rule.check(&ctx).unwrap();
284        assert_eq!(result.len(), 1);
285        assert_eq!(result[0].line, 1);
286        assert!(result[0].message.contains("level 1 heading"));
287    }
288
289    #[test]
290    fn test_empty_lines_before_heading() {
291        let rule = MD041FirstLineHeading::default();
292
293        // Empty lines before first heading (should pass - rule skips empty lines)
294        let content = "\n\n# My Document\n\nSome content.";
295        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
296        let result = rule.check(&ctx).unwrap();
297        assert!(
298            result.is_empty(),
299            "Expected no warnings when empty lines precede a valid heading"
300        );
301
302        // Empty lines before non-heading content (should fail)
303        let content = "\n\nNot a heading\n\nSome content.";
304        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
305        let result = rule.check(&ctx).unwrap();
306        assert_eq!(result.len(), 1);
307        assert_eq!(result[0].line, 3); // First non-empty line
308        assert!(result[0].message.contains("level 1 heading"));
309    }
310
311    #[test]
312    fn test_front_matter_with_title() {
313        let rule = MD041FirstLineHeading::new(1, true);
314
315        // Front matter with title field (should pass)
316        let content = "---\ntitle: My Document\nauthor: John Doe\n---\n\nSome content here.";
317        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
318        let result = rule.check(&ctx).unwrap();
319        assert!(
320            result.is_empty(),
321            "Expected no warnings when front matter has title field"
322        );
323    }
324
325    #[test]
326    fn test_front_matter_without_title() {
327        let rule = MD041FirstLineHeading::new(1, true);
328
329        // Front matter without title field (should fail)
330        let content = "---\nauthor: John Doe\ndate: 2024-01-01\n---\n\nSome content here.";
331        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
332        let result = rule.check(&ctx).unwrap();
333        assert_eq!(result.len(), 1);
334        assert_eq!(result[0].line, 6); // First content line after front matter
335    }
336
337    #[test]
338    fn test_front_matter_disabled() {
339        let rule = MD041FirstLineHeading::new(1, false);
340
341        // Front matter with title field but front_matter_title is false (should fail)
342        let content = "---\ntitle: My Document\n---\n\nSome content here.";
343        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
344        let result = rule.check(&ctx).unwrap();
345        assert_eq!(result.len(), 1);
346        assert_eq!(result[0].line, 5); // First content line after front matter
347    }
348
349    #[test]
350    fn test_html_comments_before_heading() {
351        let rule = MD041FirstLineHeading::default();
352
353        // HTML comment before heading (should fail)
354        let content = "<!-- This is a comment -->\n# My Document\n\nContent.";
355        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
356        let result = rule.check(&ctx).unwrap();
357        assert_eq!(result.len(), 1);
358        assert_eq!(result[0].line, 1); // HTML comment is the first line
359    }
360
361    #[test]
362    fn test_different_heading_levels() {
363        // Test with level 2 requirement
364        let rule = MD041FirstLineHeading::new(2, false);
365
366        let content = "## Second Level Heading\n\nContent.";
367        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
368        let result = rule.check(&ctx).unwrap();
369        assert!(result.is_empty(), "Expected no warnings for correct level 2 heading");
370
371        // Wrong level
372        let content = "# First Level Heading\n\nContent.";
373        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
374        let result = rule.check(&ctx).unwrap();
375        assert_eq!(result.len(), 1);
376        assert!(result[0].message.contains("level 2 heading"));
377    }
378
379    #[test]
380    fn test_setext_headings() {
381        let rule = MD041FirstLineHeading::default();
382
383        // Setext style level 1 heading (should pass)
384        let content = "My Document\n===========\n\nContent.";
385        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
386        let result = rule.check(&ctx).unwrap();
387        assert!(result.is_empty(), "Expected no warnings for setext level 1 heading");
388
389        // Setext style level 2 heading (should fail with level 1 requirement)
390        let content = "My Document\n-----------\n\nContent.";
391        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
392        let result = rule.check(&ctx).unwrap();
393        assert_eq!(result.len(), 1);
394        assert!(result[0].message.contains("level 1 heading"));
395    }
396
397    #[test]
398    fn test_empty_document() {
399        let rule = MD041FirstLineHeading::default();
400
401        // Empty document (should pass - no warnings)
402        let content = "";
403        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
404        let result = rule.check(&ctx).unwrap();
405        assert!(result.is_empty(), "Expected no warnings for empty document");
406    }
407
408    #[test]
409    fn test_whitespace_only_document() {
410        let rule = MD041FirstLineHeading::default();
411
412        // Document with only whitespace (should pass - no warnings)
413        let content = "   \n\n   \t\n";
414        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
415        let result = rule.check(&ctx).unwrap();
416        assert!(result.is_empty(), "Expected no warnings for whitespace-only document");
417    }
418
419    #[test]
420    fn test_front_matter_then_whitespace() {
421        let rule = MD041FirstLineHeading::default();
422
423        // Front matter followed by only whitespace (should pass - no warnings)
424        let content = "---\ntitle: Test\n---\n\n   \n\n";
425        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
426        let result = rule.check(&ctx).unwrap();
427        assert!(
428            result.is_empty(),
429            "Expected no warnings when no content after front matter"
430        );
431    }
432
433    #[test]
434    fn test_multiple_front_matter_types() {
435        let rule = MD041FirstLineHeading::new(1, true);
436
437        // TOML front matter with title (should fail - rule only checks for "title:" pattern)
438        let content = "+++\ntitle = \"My Document\"\n+++\n\nContent.";
439        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
440        let result = rule.check(&ctx).unwrap();
441        assert_eq!(result.len(), 1);
442        assert!(result[0].message.contains("level 1 heading"));
443
444        // JSON front matter with title (should fail - doesn't have "title:" pattern, has "\"title\":")
445        let content = "{\n\"title\": \"My Document\"\n}\n\nContent.";
446        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
447        let result = rule.check(&ctx).unwrap();
448        assert_eq!(result.len(), 1);
449        assert!(result[0].message.contains("level 1 heading"));
450
451        // YAML front matter with title field (standard case)
452        let content = "---\ntitle: My Document\n---\n\nContent.";
453        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
454        let result = rule.check(&ctx).unwrap();
455        assert!(
456            result.is_empty(),
457            "Expected no warnings for YAML front matter with title"
458        );
459
460        // Test mixed format edge case - YAML-style in TOML
461        let content = "+++\ntitle: My Document\n+++\n\nContent.";
462        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
463        let result = rule.check(&ctx).unwrap();
464        assert!(result.is_empty(), "Expected no warnings when title: pattern is found");
465    }
466
467    #[test]
468    fn test_malformed_front_matter() {
469        let rule = MD041FirstLineHeading::new(1, true);
470
471        // Malformed front matter with title
472        let content = "- --\ntitle: My Document\n- --\n\nContent.";
473        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
474        let result = rule.check(&ctx).unwrap();
475        assert!(
476            result.is_empty(),
477            "Expected no warnings for malformed front matter with title"
478        );
479    }
480
481    #[test]
482    fn test_front_matter_with_heading() {
483        let rule = MD041FirstLineHeading::default();
484
485        // Front matter without title field followed by correct heading
486        let content = "---\nauthor: John Doe\n---\n\n# My Document\n\nContent.";
487        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
488        let result = rule.check(&ctx).unwrap();
489        assert!(
490            result.is_empty(),
491            "Expected no warnings when first line after front matter is correct heading"
492        );
493    }
494
495    #[test]
496    fn test_no_fix_suggestion() {
497        let rule = MD041FirstLineHeading::default();
498
499        // Check that NO fix suggestion is provided (MD041 is now detection-only)
500        let content = "Not a heading\n\nContent.";
501        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
502        let result = rule.check(&ctx).unwrap();
503        assert_eq!(result.len(), 1);
504        assert!(result[0].fix.is_none(), "MD041 should not provide fix suggestions");
505    }
506
507    #[test]
508    fn test_complex_document_structure() {
509        let rule = MD041FirstLineHeading::default();
510
511        // Complex document with various elements
512        let content =
513            "---\nauthor: John\n---\n\n<!-- Comment -->\n\n\n# Valid Heading\n\n## Subheading\n\nContent here.";
514        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
515        let result = rule.check(&ctx).unwrap();
516        assert_eq!(result.len(), 1);
517        assert_eq!(result[0].line, 5); // The comment line
518    }
519
520    #[test]
521    fn test_heading_with_special_characters() {
522        let rule = MD041FirstLineHeading::default();
523
524        // Heading with special characters and formatting
525        let content = "# Welcome to **My** _Document_ with `code`\n\nContent.";
526        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
527        let result = rule.check(&ctx).unwrap();
528        assert!(
529            result.is_empty(),
530            "Expected no warnings for heading with inline formatting"
531        );
532    }
533
534    #[test]
535    fn test_level_configuration() {
536        // Test various level configurations
537        for level in 1..=6 {
538            let rule = MD041FirstLineHeading::new(level, false);
539
540            // Correct level
541            let content = format!("{} Heading at Level {}\n\nContent.", "#".repeat(level), level);
542            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
543            let result = rule.check(&ctx).unwrap();
544            assert!(
545                result.is_empty(),
546                "Expected no warnings for correct level {level} heading"
547            );
548
549            // Wrong level
550            let wrong_level = if level == 1 { 2 } else { 1 };
551            let content = format!("{} Wrong Level Heading\n\nContent.", "#".repeat(wrong_level));
552            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
553            let result = rule.check(&ctx).unwrap();
554            assert_eq!(result.len(), 1);
555            assert!(result[0].message.contains(&format!("level {level} heading")));
556        }
557    }
558}