rumdl_lib/rules/
md041_first_line_heading.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::range_utils::calculate_line_range;
4use crate::utils::regex_cache::HTML_HEADING_PATTERN;
5use regex::Regex;
6
7/// Rule MD041: First line in file should be a top-level heading
8///
9/// See [docs/md041.md](../../docs/md041.md) for full documentation, configuration, and examples.
10
11#[derive(Clone)]
12pub struct MD041FirstLineHeading {
13    pub level: usize,
14    pub front_matter_title: bool,
15    pub front_matter_title_pattern: Option<Regex>,
16}
17
18impl Default for MD041FirstLineHeading {
19    fn default() -> Self {
20        Self {
21            level: 1,
22            front_matter_title: true,
23            front_matter_title_pattern: None,
24        }
25    }
26}
27
28impl MD041FirstLineHeading {
29    pub fn new(level: usize, front_matter_title: bool) -> Self {
30        Self {
31            level,
32            front_matter_title,
33            front_matter_title_pattern: None,
34        }
35    }
36
37    pub fn with_pattern(level: usize, front_matter_title: bool, pattern: Option<String>) -> Self {
38        let front_matter_title_pattern = pattern.and_then(|p| match Regex::new(&p) {
39            Ok(regex) => Some(regex),
40            Err(e) => {
41                log::warn!("Invalid front_matter_title_pattern regex: {e}");
42                None
43            }
44        });
45
46        Self {
47            level,
48            front_matter_title,
49            front_matter_title_pattern,
50        }
51    }
52
53    fn has_front_matter_title(&self, content: &str) -> bool {
54        if !self.front_matter_title {
55            return false;
56        }
57
58        // If we have a custom pattern, use it to search front matter content
59        if let Some(ref pattern) = self.front_matter_title_pattern {
60            let front_matter_lines = FrontMatterUtils::extract_front_matter(content);
61            for line in front_matter_lines {
62                if pattern.is_match(line) {
63                    return true;
64                }
65            }
66            return false;
67        }
68
69        // Default behavior: check for "title:" field
70        FrontMatterUtils::has_front_matter_field(content, "title:")
71    }
72
73    /// Check if a line is a non-content token that should be skipped
74    fn is_non_content_line(line: &str) -> bool {
75        let trimmed = line.trim();
76
77        // Skip reference definitions
78        if trimmed.starts_with('[') && trimmed.contains("]: ") {
79            return true;
80        }
81
82        // Skip abbreviation definitions
83        if trimmed.starts_with('*') && trimmed.contains("]: ") {
84            return true;
85        }
86
87        false
88    }
89
90    /// Check if a line is an HTML heading
91    fn is_html_heading(line: &str, level: usize) -> bool {
92        if let Ok(Some(captures)) = HTML_HEADING_PATTERN.captures(line.trim())
93            && let Some(h_level) = captures.get(1)
94        {
95            return h_level.as_str().parse::<usize>().unwrap_or(0) == level;
96        }
97        false
98    }
99}
100
101impl Rule for MD041FirstLineHeading {
102    fn name(&self) -> &'static str {
103        "MD041"
104    }
105
106    fn description(&self) -> &'static str {
107        "First line in file should be a top level heading"
108    }
109
110    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
111        let content = ctx.content;
112        let mut warnings = Vec::new();
113        if content.trim().is_empty() {
114            return Ok(warnings);
115        }
116        if self.has_front_matter_title(content) {
117            return Ok(warnings);
118        }
119
120        // Find the first non-blank line after front matter using cached info
121        let mut first_content_line_num = None;
122        let mut skip_lines = 0;
123
124        // Check for front matter
125        if ctx.lines.first().map(|l| l.content.trim()) == Some("---") {
126            // Skip front matter
127            for (idx, line_info) in ctx.lines.iter().enumerate().skip(1) {
128                if line_info.content.trim() == "---" {
129                    skip_lines = idx + 1;
130                    break;
131                }
132            }
133        }
134
135        for (line_num, line_info) in ctx.lines.iter().enumerate().skip(skip_lines) {
136            let line_content = line_info.content.trim();
137            if !line_content.is_empty() && !Self::is_non_content_line(&line_info.content) {
138                first_content_line_num = Some(line_num);
139                break;
140            }
141        }
142
143        if first_content_line_num.is_none() {
144            // No non-blank lines after front matter
145            return Ok(warnings);
146        }
147
148        let first_line_idx = first_content_line_num.unwrap();
149
150        // Check if the first non-blank line is a heading of the required level
151        let first_line_info = &ctx.lines[first_line_idx];
152        let is_correct_heading = if let Some(heading) = &first_line_info.heading {
153            heading.level as usize == self.level
154        } else {
155            // Check for HTML heading
156            Self::is_html_heading(&first_line_info.content, self.level)
157        };
158
159        if !is_correct_heading {
160            // Calculate precise character range for the entire first line
161            let first_line = first_line_idx + 1; // Convert to 1-indexed
162            let first_line_content = &first_line_info.content;
163            let (start_line, start_col, end_line, end_col) = calculate_line_range(first_line, first_line_content);
164
165            warnings.push(LintWarning {
166                rule_name: Some(self.name().to_string()),
167                line: start_line,
168                column: start_col,
169                end_line,
170                end_column: end_col,
171                message: format!("First line in file should be a level {} heading", self.level),
172                severity: Severity::Warning,
173                fix: None, // MD041 no longer provides auto-fix suggestions
174            });
175        }
176        Ok(warnings)
177    }
178
179    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
180        // MD041 should not auto-fix - adding content/titles is a decision that should be made by the document author
181        // This rule now only detects and warns about missing titles, but does not automatically add them
182        Ok(ctx.content.to_string())
183    }
184
185    /// Check if this rule should be skipped
186    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
187        ctx.content.is_empty()
188            || !ctx.likely_has_headings()
189            || (self.front_matter_title && self.has_front_matter_title(ctx.content))
190    }
191
192    fn as_any(&self) -> &dyn std::any::Any {
193        self
194    }
195
196    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
197    where
198        Self: Sized,
199    {
200        let level = crate::config::get_rule_config_value::<u32>(config, "MD041", "level").unwrap_or(1);
201        let front_matter_title = crate::config::get_rule_config_value::<String>(config, "MD041", "front_matter_title")
202            .unwrap_or_else(|| "title".to_string());
203        let front_matter_title_pattern =
204            crate::config::get_rule_config_value::<String>(config, "MD041", "front_matter_title_pattern");
205
206        let level_usize = level as usize;
207        let use_front_matter = !front_matter_title.is_empty();
208
209        Box::new(MD041FirstLineHeading::with_pattern(
210            level_usize,
211            use_front_matter,
212            front_matter_title_pattern,
213        ))
214    }
215
216    fn default_config_section(&self) -> Option<(String, toml::Value)> {
217        Some((
218            "MD041".to_string(),
219            toml::toml! {
220                level = 1
221                // Pattern for matching title in front matter (regex)
222                // front_matter_title_pattern = "^(title|header):"
223            }
224            .into(),
225        ))
226    }
227}
228
229#[cfg(test)]
230mod tests {
231    use super::*;
232    use crate::lint_context::LintContext;
233
234    #[test]
235    fn test_first_line_is_heading_correct_level() {
236        let rule = MD041FirstLineHeading::default();
237
238        // First line is a level 1 heading (should pass)
239        let content = "# My Document\n\nSome content here.";
240        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
241        let result = rule.check(&ctx).unwrap();
242        assert!(
243            result.is_empty(),
244            "Expected no warnings when first line is a level 1 heading"
245        );
246    }
247
248    #[test]
249    fn test_first_line_is_heading_wrong_level() {
250        let rule = MD041FirstLineHeading::default();
251
252        // First line is a level 2 heading (should fail with level 1 requirement)
253        let content = "## My Document\n\nSome content here.";
254        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
255        let result = rule.check(&ctx).unwrap();
256        assert_eq!(result.len(), 1);
257        assert_eq!(result[0].line, 1);
258        assert!(result[0].message.contains("level 1 heading"));
259    }
260
261    #[test]
262    fn test_first_line_not_heading() {
263        let rule = MD041FirstLineHeading::default();
264
265        // First line is plain text (should fail)
266        let content = "This is not a heading\n\n# This is a heading";
267        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
268        let result = rule.check(&ctx).unwrap();
269        assert_eq!(result.len(), 1);
270        assert_eq!(result[0].line, 1);
271        assert!(result[0].message.contains("level 1 heading"));
272    }
273
274    #[test]
275    fn test_empty_lines_before_heading() {
276        let rule = MD041FirstLineHeading::default();
277
278        // Empty lines before first heading (should pass - rule skips empty lines)
279        let content = "\n\n# My Document\n\nSome content.";
280        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
281        let result = rule.check(&ctx).unwrap();
282        assert!(
283            result.is_empty(),
284            "Expected no warnings when empty lines precede a valid heading"
285        );
286
287        // Empty lines before non-heading content (should fail)
288        let content = "\n\nNot a heading\n\nSome content.";
289        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
290        let result = rule.check(&ctx).unwrap();
291        assert_eq!(result.len(), 1);
292        assert_eq!(result[0].line, 3); // First non-empty line
293        assert!(result[0].message.contains("level 1 heading"));
294    }
295
296    #[test]
297    fn test_front_matter_with_title() {
298        let rule = MD041FirstLineHeading::new(1, true);
299
300        // Front matter with title field (should pass)
301        let content = "---\ntitle: My Document\nauthor: John Doe\n---\n\nSome content here.";
302        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
303        let result = rule.check(&ctx).unwrap();
304        assert!(
305            result.is_empty(),
306            "Expected no warnings when front matter has title field"
307        );
308    }
309
310    #[test]
311    fn test_front_matter_without_title() {
312        let rule = MD041FirstLineHeading::new(1, true);
313
314        // Front matter without title field (should fail)
315        let content = "---\nauthor: John Doe\ndate: 2024-01-01\n---\n\nSome content here.";
316        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
317        let result = rule.check(&ctx).unwrap();
318        assert_eq!(result.len(), 1);
319        assert_eq!(result[0].line, 6); // First content line after front matter
320    }
321
322    #[test]
323    fn test_front_matter_disabled() {
324        let rule = MD041FirstLineHeading::new(1, false);
325
326        // Front matter with title field but front_matter_title is false (should fail)
327        let content = "---\ntitle: My Document\n---\n\nSome content here.";
328        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
329        let result = rule.check(&ctx).unwrap();
330        assert_eq!(result.len(), 1);
331        assert_eq!(result[0].line, 5); // First content line after front matter
332    }
333
334    #[test]
335    fn test_html_comments_before_heading() {
336        let rule = MD041FirstLineHeading::default();
337
338        // HTML comment before heading (should fail)
339        let content = "<!-- This is a comment -->\n# My Document\n\nContent.";
340        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
341        let result = rule.check(&ctx).unwrap();
342        assert_eq!(result.len(), 1);
343        assert_eq!(result[0].line, 1); // HTML comment is the first line
344    }
345
346    #[test]
347    fn test_different_heading_levels() {
348        // Test with level 2 requirement
349        let rule = MD041FirstLineHeading::new(2, false);
350
351        let content = "## Second Level Heading\n\nContent.";
352        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
353        let result = rule.check(&ctx).unwrap();
354        assert!(result.is_empty(), "Expected no warnings for correct level 2 heading");
355
356        // Wrong level
357        let content = "# First Level Heading\n\nContent.";
358        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
359        let result = rule.check(&ctx).unwrap();
360        assert_eq!(result.len(), 1);
361        assert!(result[0].message.contains("level 2 heading"));
362    }
363
364    #[test]
365    fn test_setext_headings() {
366        let rule = MD041FirstLineHeading::default();
367
368        // Setext style level 1 heading (should pass)
369        let content = "My Document\n===========\n\nContent.";
370        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
371        let result = rule.check(&ctx).unwrap();
372        assert!(result.is_empty(), "Expected no warnings for setext level 1 heading");
373
374        // Setext style level 2 heading (should fail with level 1 requirement)
375        let content = "My Document\n-----------\n\nContent.";
376        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
377        let result = rule.check(&ctx).unwrap();
378        assert_eq!(result.len(), 1);
379        assert!(result[0].message.contains("level 1 heading"));
380    }
381
382    #[test]
383    fn test_empty_document() {
384        let rule = MD041FirstLineHeading::default();
385
386        // Empty document (should pass - no warnings)
387        let content = "";
388        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
389        let result = rule.check(&ctx).unwrap();
390        assert!(result.is_empty(), "Expected no warnings for empty document");
391    }
392
393    #[test]
394    fn test_whitespace_only_document() {
395        let rule = MD041FirstLineHeading::default();
396
397        // Document with only whitespace (should pass - no warnings)
398        let content = "   \n\n   \t\n";
399        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
400        let result = rule.check(&ctx).unwrap();
401        assert!(result.is_empty(), "Expected no warnings for whitespace-only document");
402    }
403
404    #[test]
405    fn test_front_matter_then_whitespace() {
406        let rule = MD041FirstLineHeading::default();
407
408        // Front matter followed by only whitespace (should pass - no warnings)
409        let content = "---\ntitle: Test\n---\n\n   \n\n";
410        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
411        let result = rule.check(&ctx).unwrap();
412        assert!(
413            result.is_empty(),
414            "Expected no warnings when no content after front matter"
415        );
416    }
417
418    #[test]
419    fn test_multiple_front_matter_types() {
420        let rule = MD041FirstLineHeading::new(1, true);
421
422        // TOML front matter with title (should fail - rule only checks for "title:" pattern)
423        let content = "+++\ntitle = \"My Document\"\n+++\n\nContent.";
424        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
425        let result = rule.check(&ctx).unwrap();
426        assert_eq!(result.len(), 1);
427        assert!(result[0].message.contains("level 1 heading"));
428
429        // JSON front matter with title (should fail - doesn't have "title:" pattern, has "\"title\":")
430        let content = "{\n\"title\": \"My Document\"\n}\n\nContent.";
431        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
432        let result = rule.check(&ctx).unwrap();
433        assert_eq!(result.len(), 1);
434        assert!(result[0].message.contains("level 1 heading"));
435
436        // YAML front matter with title field (standard case)
437        let content = "---\ntitle: My Document\n---\n\nContent.";
438        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
439        let result = rule.check(&ctx).unwrap();
440        assert!(
441            result.is_empty(),
442            "Expected no warnings for YAML front matter with title"
443        );
444
445        // Test mixed format edge case - YAML-style in TOML
446        let content = "+++\ntitle: My Document\n+++\n\nContent.";
447        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
448        let result = rule.check(&ctx).unwrap();
449        assert!(result.is_empty(), "Expected no warnings when title: pattern is found");
450    }
451
452    #[test]
453    fn test_malformed_front_matter() {
454        let rule = MD041FirstLineHeading::new(1, true);
455
456        // Malformed front matter with title
457        let content = "- --\ntitle: My Document\n- --\n\nContent.";
458        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
459        let result = rule.check(&ctx).unwrap();
460        assert!(
461            result.is_empty(),
462            "Expected no warnings for malformed front matter with title"
463        );
464    }
465
466    #[test]
467    fn test_front_matter_with_heading() {
468        let rule = MD041FirstLineHeading::default();
469
470        // Front matter without title field followed by correct heading
471        let content = "---\nauthor: John Doe\n---\n\n# My Document\n\nContent.";
472        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
473        let result = rule.check(&ctx).unwrap();
474        assert!(
475            result.is_empty(),
476            "Expected no warnings when first line after front matter is correct heading"
477        );
478    }
479
480    #[test]
481    fn test_no_fix_suggestion() {
482        let rule = MD041FirstLineHeading::default();
483
484        // Check that NO fix suggestion is provided (MD041 is now detection-only)
485        let content = "Not a heading\n\nContent.";
486        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
487        let result = rule.check(&ctx).unwrap();
488        assert_eq!(result.len(), 1);
489        assert!(result[0].fix.is_none(), "MD041 should not provide fix suggestions");
490    }
491
492    #[test]
493    fn test_complex_document_structure() {
494        let rule = MD041FirstLineHeading::default();
495
496        // Complex document with various elements
497        let content =
498            "---\nauthor: John\n---\n\n<!-- Comment -->\n\n\n# Valid Heading\n\n## Subheading\n\nContent here.";
499        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
500        let result = rule.check(&ctx).unwrap();
501        assert_eq!(result.len(), 1);
502        assert_eq!(result[0].line, 5); // The comment line
503    }
504
505    #[test]
506    fn test_heading_with_special_characters() {
507        let rule = MD041FirstLineHeading::default();
508
509        // Heading with special characters and formatting
510        let content = "# Welcome to **My** _Document_ with `code`\n\nContent.";
511        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
512        let result = rule.check(&ctx).unwrap();
513        assert!(
514            result.is_empty(),
515            "Expected no warnings for heading with inline formatting"
516        );
517    }
518
519    #[test]
520    fn test_level_configuration() {
521        // Test various level configurations
522        for level in 1..=6 {
523            let rule = MD041FirstLineHeading::new(level, false);
524
525            // Correct level
526            let content = format!("{} Heading at Level {}\n\nContent.", "#".repeat(level), level);
527            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
528            let result = rule.check(&ctx).unwrap();
529            assert!(
530                result.is_empty(),
531                "Expected no warnings for correct level {level} heading"
532            );
533
534            // Wrong level
535            let wrong_level = if level == 1 { 2 } else { 1 };
536            let content = format!("{} Wrong Level Heading\n\nContent.", "#".repeat(wrong_level));
537            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
538            let result = rule.check(&ctx).unwrap();
539            assert_eq!(result.len(), 1);
540            assert!(result[0].message.contains(&format!("level {level} heading")));
541        }
542    }
543}