rumdl_lib/rules/
md041_first_line_heading.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::range_utils::calculate_line_range;
4use crate::utils::regex_cache::HTML_HEADING_PATTERN;
5use regex::Regex;
6
7/// Rule MD041: First line in file should be a top-level heading
8///
9/// See [docs/md041.md](../../docs/md041.md) for full documentation, configuration, and examples.
10
11#[derive(Clone)]
12pub struct MD041FirstLineHeading {
13    pub level: usize,
14    pub front_matter_title: bool,
15    pub front_matter_title_pattern: Option<Regex>,
16}
17
18impl Default for MD041FirstLineHeading {
19    fn default() -> Self {
20        Self {
21            level: 1,
22            front_matter_title: true,
23            front_matter_title_pattern: None,
24        }
25    }
26}
27
28impl MD041FirstLineHeading {
29    pub fn new(level: usize, front_matter_title: bool) -> Self {
30        Self {
31            level,
32            front_matter_title,
33            front_matter_title_pattern: None,
34        }
35    }
36
37    pub fn with_pattern(level: usize, front_matter_title: bool, pattern: Option<String>) -> Self {
38        let front_matter_title_pattern = pattern.and_then(|p| match Regex::new(&p) {
39            Ok(regex) => Some(regex),
40            Err(e) => {
41                log::warn!("Invalid front_matter_title_pattern regex: {e}");
42                None
43            }
44        });
45
46        Self {
47            level,
48            front_matter_title,
49            front_matter_title_pattern,
50        }
51    }
52
53    fn has_front_matter_title(&self, content: &str) -> bool {
54        if !self.front_matter_title {
55            return false;
56        }
57
58        // If we have a custom pattern, use it to search front matter content
59        if let Some(ref pattern) = self.front_matter_title_pattern {
60            let front_matter_lines = FrontMatterUtils::extract_front_matter(content);
61            for line in front_matter_lines {
62                if pattern.is_match(line) {
63                    return true;
64                }
65            }
66            return false;
67        }
68
69        // Default behavior: check for "title:" field
70        FrontMatterUtils::has_front_matter_field(content, "title:")
71    }
72
73    /// Check if a line is a non-content token that should be skipped
74    fn is_non_content_line(line: &str) -> bool {
75        let trimmed = line.trim();
76
77        // Skip reference definitions
78        if trimmed.starts_with('[') && trimmed.contains("]: ") {
79            return true;
80        }
81
82        // Skip abbreviation definitions
83        if trimmed.starts_with('*') && trimmed.contains("]: ") {
84            return true;
85        }
86
87        false
88    }
89
90    /// Check if a line is an HTML heading
91    fn is_html_heading(line: &str, level: usize) -> bool {
92        if let Ok(Some(captures)) = HTML_HEADING_PATTERN.captures(line.trim())
93            && let Some(h_level) = captures.get(1)
94        {
95            return h_level.as_str().parse::<usize>().unwrap_or(0) == level;
96        }
97        false
98    }
99}
100
101impl Rule for MD041FirstLineHeading {
102    fn name(&self) -> &'static str {
103        "MD041"
104    }
105
106    fn description(&self) -> &'static str {
107        "First line in file should be a top level heading"
108    }
109
110    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
111        let content = ctx.content;
112        let mut warnings = Vec::new();
113        if content.trim().is_empty() {
114            return Ok(warnings);
115        }
116        if self.has_front_matter_title(content) {
117            return Ok(warnings);
118        }
119
120        // Find the first non-blank line after front matter using cached info
121        let mut first_content_line_num = None;
122        let mut skip_lines = 0;
123
124        // Check for front matter
125        if ctx.lines.first().map(|l| l.content.trim()) == Some("---") {
126            // Skip front matter
127            for (idx, line_info) in ctx.lines.iter().enumerate().skip(1) {
128                if line_info.content.trim() == "---" {
129                    skip_lines = idx + 1;
130                    break;
131                }
132            }
133        }
134
135        for (line_num, line_info) in ctx.lines.iter().enumerate().skip(skip_lines) {
136            let line_content = line_info.content.trim();
137            // Skip ESM blocks in MDX files (import/export statements)
138            if line_info.in_esm_block {
139                continue;
140            }
141            if !line_content.is_empty() && !Self::is_non_content_line(&line_info.content) {
142                first_content_line_num = Some(line_num);
143                break;
144            }
145        }
146
147        if first_content_line_num.is_none() {
148            // No non-blank lines after front matter
149            return Ok(warnings);
150        }
151
152        let first_line_idx = first_content_line_num.unwrap();
153
154        // Check if the first non-blank line is a heading of the required level
155        let first_line_info = &ctx.lines[first_line_idx];
156        let is_correct_heading = if let Some(heading) = &first_line_info.heading {
157            heading.level as usize == self.level
158        } else {
159            // Check for HTML heading
160            Self::is_html_heading(&first_line_info.content, self.level)
161        };
162
163        if !is_correct_heading {
164            // Calculate precise character range for the entire first line
165            let first_line = first_line_idx + 1; // Convert to 1-indexed
166            let first_line_content = &first_line_info.content;
167            let (start_line, start_col, end_line, end_col) = calculate_line_range(first_line, first_line_content);
168
169            warnings.push(LintWarning {
170                rule_name: Some(self.name().to_string()),
171                line: start_line,
172                column: start_col,
173                end_line,
174                end_column: end_col,
175                message: format!("First line in file should be a level {} heading", self.level),
176                severity: Severity::Warning,
177                fix: None, // MD041 no longer provides auto-fix suggestions
178            });
179        }
180        Ok(warnings)
181    }
182
183    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
184        // MD041 should not auto-fix - adding content/titles is a decision that should be made by the document author
185        // This rule now only detects and warns about missing titles, but does not automatically add them
186        Ok(ctx.content.to_string())
187    }
188
189    /// Check if this rule should be skipped
190    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
191        ctx.content.is_empty()
192            || !ctx.likely_has_headings()
193            || (self.front_matter_title && self.has_front_matter_title(ctx.content))
194    }
195
196    fn as_any(&self) -> &dyn std::any::Any {
197        self
198    }
199
200    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
201    where
202        Self: Sized,
203    {
204        let level = crate::config::get_rule_config_value::<u32>(config, "MD041", "level").unwrap_or(1);
205        let front_matter_title = crate::config::get_rule_config_value::<String>(config, "MD041", "front_matter_title")
206            .unwrap_or_else(|| "title".to_string());
207        let front_matter_title_pattern =
208            crate::config::get_rule_config_value::<String>(config, "MD041", "front_matter_title_pattern");
209
210        let level_usize = level as usize;
211        let use_front_matter = !front_matter_title.is_empty();
212
213        Box::new(MD041FirstLineHeading::with_pattern(
214            level_usize,
215            use_front_matter,
216            front_matter_title_pattern,
217        ))
218    }
219
220    fn default_config_section(&self) -> Option<(String, toml::Value)> {
221        Some((
222            "MD041".to_string(),
223            toml::toml! {
224                level = 1
225                // Pattern for matching title in front matter (regex)
226                // front_matter_title_pattern = "^(title|header):"
227            }
228            .into(),
229        ))
230    }
231}
232
233#[cfg(test)]
234mod tests {
235    use super::*;
236    use crate::lint_context::LintContext;
237
238    #[test]
239    fn test_first_line_is_heading_correct_level() {
240        let rule = MD041FirstLineHeading::default();
241
242        // First line is a level 1 heading (should pass)
243        let content = "# My Document\n\nSome content here.";
244        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
245        let result = rule.check(&ctx).unwrap();
246        assert!(
247            result.is_empty(),
248            "Expected no warnings when first line is a level 1 heading"
249        );
250    }
251
252    #[test]
253    fn test_first_line_is_heading_wrong_level() {
254        let rule = MD041FirstLineHeading::default();
255
256        // First line is a level 2 heading (should fail with level 1 requirement)
257        let content = "## My Document\n\nSome content here.";
258        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
259        let result = rule.check(&ctx).unwrap();
260        assert_eq!(result.len(), 1);
261        assert_eq!(result[0].line, 1);
262        assert!(result[0].message.contains("level 1 heading"));
263    }
264
265    #[test]
266    fn test_first_line_not_heading() {
267        let rule = MD041FirstLineHeading::default();
268
269        // First line is plain text (should fail)
270        let content = "This is not a heading\n\n# This is a heading";
271        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
272        let result = rule.check(&ctx).unwrap();
273        assert_eq!(result.len(), 1);
274        assert_eq!(result[0].line, 1);
275        assert!(result[0].message.contains("level 1 heading"));
276    }
277
278    #[test]
279    fn test_empty_lines_before_heading() {
280        let rule = MD041FirstLineHeading::default();
281
282        // Empty lines before first heading (should pass - rule skips empty lines)
283        let content = "\n\n# My Document\n\nSome content.";
284        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
285        let result = rule.check(&ctx).unwrap();
286        assert!(
287            result.is_empty(),
288            "Expected no warnings when empty lines precede a valid heading"
289        );
290
291        // Empty lines before non-heading content (should fail)
292        let content = "\n\nNot a heading\n\nSome content.";
293        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
294        let result = rule.check(&ctx).unwrap();
295        assert_eq!(result.len(), 1);
296        assert_eq!(result[0].line, 3); // First non-empty line
297        assert!(result[0].message.contains("level 1 heading"));
298    }
299
300    #[test]
301    fn test_front_matter_with_title() {
302        let rule = MD041FirstLineHeading::new(1, true);
303
304        // Front matter with title field (should pass)
305        let content = "---\ntitle: My Document\nauthor: John Doe\n---\n\nSome content here.";
306        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
307        let result = rule.check(&ctx).unwrap();
308        assert!(
309            result.is_empty(),
310            "Expected no warnings when front matter has title field"
311        );
312    }
313
314    #[test]
315    fn test_front_matter_without_title() {
316        let rule = MD041FirstLineHeading::new(1, true);
317
318        // Front matter without title field (should fail)
319        let content = "---\nauthor: John Doe\ndate: 2024-01-01\n---\n\nSome content here.";
320        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
321        let result = rule.check(&ctx).unwrap();
322        assert_eq!(result.len(), 1);
323        assert_eq!(result[0].line, 6); // First content line after front matter
324    }
325
326    #[test]
327    fn test_front_matter_disabled() {
328        let rule = MD041FirstLineHeading::new(1, false);
329
330        // Front matter with title field but front_matter_title is false (should fail)
331        let content = "---\ntitle: My Document\n---\n\nSome content here.";
332        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
333        let result = rule.check(&ctx).unwrap();
334        assert_eq!(result.len(), 1);
335        assert_eq!(result[0].line, 5); // First content line after front matter
336    }
337
338    #[test]
339    fn test_html_comments_before_heading() {
340        let rule = MD041FirstLineHeading::default();
341
342        // HTML comment before heading (should fail)
343        let content = "<!-- This is a comment -->\n# My Document\n\nContent.";
344        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
345        let result = rule.check(&ctx).unwrap();
346        assert_eq!(result.len(), 1);
347        assert_eq!(result[0].line, 1); // HTML comment is the first line
348    }
349
350    #[test]
351    fn test_different_heading_levels() {
352        // Test with level 2 requirement
353        let rule = MD041FirstLineHeading::new(2, false);
354
355        let content = "## Second Level Heading\n\nContent.";
356        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
357        let result = rule.check(&ctx).unwrap();
358        assert!(result.is_empty(), "Expected no warnings for correct level 2 heading");
359
360        // Wrong level
361        let content = "# First Level Heading\n\nContent.";
362        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
363        let result = rule.check(&ctx).unwrap();
364        assert_eq!(result.len(), 1);
365        assert!(result[0].message.contains("level 2 heading"));
366    }
367
368    #[test]
369    fn test_setext_headings() {
370        let rule = MD041FirstLineHeading::default();
371
372        // Setext style level 1 heading (should pass)
373        let content = "My Document\n===========\n\nContent.";
374        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
375        let result = rule.check(&ctx).unwrap();
376        assert!(result.is_empty(), "Expected no warnings for setext level 1 heading");
377
378        // Setext style level 2 heading (should fail with level 1 requirement)
379        let content = "My Document\n-----------\n\nContent.";
380        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
381        let result = rule.check(&ctx).unwrap();
382        assert_eq!(result.len(), 1);
383        assert!(result[0].message.contains("level 1 heading"));
384    }
385
386    #[test]
387    fn test_empty_document() {
388        let rule = MD041FirstLineHeading::default();
389
390        // Empty document (should pass - no warnings)
391        let content = "";
392        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
393        let result = rule.check(&ctx).unwrap();
394        assert!(result.is_empty(), "Expected no warnings for empty document");
395    }
396
397    #[test]
398    fn test_whitespace_only_document() {
399        let rule = MD041FirstLineHeading::default();
400
401        // Document with only whitespace (should pass - no warnings)
402        let content = "   \n\n   \t\n";
403        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
404        let result = rule.check(&ctx).unwrap();
405        assert!(result.is_empty(), "Expected no warnings for whitespace-only document");
406    }
407
408    #[test]
409    fn test_front_matter_then_whitespace() {
410        let rule = MD041FirstLineHeading::default();
411
412        // Front matter followed by only whitespace (should pass - no warnings)
413        let content = "---\ntitle: Test\n---\n\n   \n\n";
414        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
415        let result = rule.check(&ctx).unwrap();
416        assert!(
417            result.is_empty(),
418            "Expected no warnings when no content after front matter"
419        );
420    }
421
422    #[test]
423    fn test_multiple_front_matter_types() {
424        let rule = MD041FirstLineHeading::new(1, true);
425
426        // TOML front matter with title (should fail - rule only checks for "title:" pattern)
427        let content = "+++\ntitle = \"My Document\"\n+++\n\nContent.";
428        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
429        let result = rule.check(&ctx).unwrap();
430        assert_eq!(result.len(), 1);
431        assert!(result[0].message.contains("level 1 heading"));
432
433        // JSON front matter with title (should fail - doesn't have "title:" pattern, has "\"title\":")
434        let content = "{\n\"title\": \"My Document\"\n}\n\nContent.";
435        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
436        let result = rule.check(&ctx).unwrap();
437        assert_eq!(result.len(), 1);
438        assert!(result[0].message.contains("level 1 heading"));
439
440        // YAML front matter with title field (standard case)
441        let content = "---\ntitle: My Document\n---\n\nContent.";
442        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
443        let result = rule.check(&ctx).unwrap();
444        assert!(
445            result.is_empty(),
446            "Expected no warnings for YAML front matter with title"
447        );
448
449        // Test mixed format edge case - YAML-style in TOML
450        let content = "+++\ntitle: My Document\n+++\n\nContent.";
451        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
452        let result = rule.check(&ctx).unwrap();
453        assert!(result.is_empty(), "Expected no warnings when title: pattern is found");
454    }
455
456    #[test]
457    fn test_malformed_front_matter() {
458        let rule = MD041FirstLineHeading::new(1, true);
459
460        // Malformed front matter with title
461        let content = "- --\ntitle: My Document\n- --\n\nContent.";
462        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
463        let result = rule.check(&ctx).unwrap();
464        assert!(
465            result.is_empty(),
466            "Expected no warnings for malformed front matter with title"
467        );
468    }
469
470    #[test]
471    fn test_front_matter_with_heading() {
472        let rule = MD041FirstLineHeading::default();
473
474        // Front matter without title field followed by correct heading
475        let content = "---\nauthor: John Doe\n---\n\n# My Document\n\nContent.";
476        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
477        let result = rule.check(&ctx).unwrap();
478        assert!(
479            result.is_empty(),
480            "Expected no warnings when first line after front matter is correct heading"
481        );
482    }
483
484    #[test]
485    fn test_no_fix_suggestion() {
486        let rule = MD041FirstLineHeading::default();
487
488        // Check that NO fix suggestion is provided (MD041 is now detection-only)
489        let content = "Not a heading\n\nContent.";
490        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
491        let result = rule.check(&ctx).unwrap();
492        assert_eq!(result.len(), 1);
493        assert!(result[0].fix.is_none(), "MD041 should not provide fix suggestions");
494    }
495
496    #[test]
497    fn test_complex_document_structure() {
498        let rule = MD041FirstLineHeading::default();
499
500        // Complex document with various elements
501        let content =
502            "---\nauthor: John\n---\n\n<!-- Comment -->\n\n\n# Valid Heading\n\n## Subheading\n\nContent here.";
503        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
504        let result = rule.check(&ctx).unwrap();
505        assert_eq!(result.len(), 1);
506        assert_eq!(result[0].line, 5); // The comment line
507    }
508
509    #[test]
510    fn test_heading_with_special_characters() {
511        let rule = MD041FirstLineHeading::default();
512
513        // Heading with special characters and formatting
514        let content = "# Welcome to **My** _Document_ with `code`\n\nContent.";
515        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
516        let result = rule.check(&ctx).unwrap();
517        assert!(
518            result.is_empty(),
519            "Expected no warnings for heading with inline formatting"
520        );
521    }
522
523    #[test]
524    fn test_level_configuration() {
525        // Test various level configurations
526        for level in 1..=6 {
527            let rule = MD041FirstLineHeading::new(level, false);
528
529            // Correct level
530            let content = format!("{} Heading at Level {}\n\nContent.", "#".repeat(level), level);
531            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
532            let result = rule.check(&ctx).unwrap();
533            assert!(
534                result.is_empty(),
535                "Expected no warnings for correct level {level} heading"
536            );
537
538            // Wrong level
539            let wrong_level = if level == 1 { 2 } else { 1 };
540            let content = format!("{} Wrong Level Heading\n\nContent.", "#".repeat(wrong_level));
541            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
542            let result = rule.check(&ctx).unwrap();
543            assert_eq!(result.len(), 1);
544            assert!(result[0].message.contains(&format!("level {level} heading")));
545        }
546    }
547}