rumdl_lib/rules/
md041_first_line_heading.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::range_utils::calculate_line_range;
4use crate::utils::regex_cache::HTML_HEADING_PATTERN;
5use regex::Regex;
6
7/// Rule MD041: First line in file should be a top-level heading
8///
9/// See [docs/md041.md](../../docs/md041.md) for full documentation, configuration, and examples.
10
11#[derive(Clone)]
12pub struct MD041FirstLineHeading {
13    pub level: usize,
14    pub front_matter_title: bool,
15    pub front_matter_title_pattern: Option<Regex>,
16}
17
18impl Default for MD041FirstLineHeading {
19    fn default() -> Self {
20        Self {
21            level: 1,
22            front_matter_title: true,
23            front_matter_title_pattern: None,
24        }
25    }
26}
27
28impl MD041FirstLineHeading {
29    pub fn new(level: usize, front_matter_title: bool) -> Self {
30        Self {
31            level,
32            front_matter_title,
33            front_matter_title_pattern: None,
34        }
35    }
36
37    pub fn with_pattern(level: usize, front_matter_title: bool, pattern: Option<String>) -> Self {
38        let front_matter_title_pattern = pattern.and_then(|p| match Regex::new(&p) {
39            Ok(regex) => Some(regex),
40            Err(e) => {
41                log::warn!("Invalid front_matter_title_pattern regex: {e}");
42                None
43            }
44        });
45
46        Self {
47            level,
48            front_matter_title,
49            front_matter_title_pattern,
50        }
51    }
52
53    fn has_front_matter_title(&self, content: &str) -> bool {
54        if !self.front_matter_title {
55            return false;
56        }
57
58        // If we have a custom pattern, use it to search front matter content
59        if let Some(ref pattern) = self.front_matter_title_pattern {
60            let front_matter_lines = FrontMatterUtils::extract_front_matter(content);
61            for line in front_matter_lines {
62                if pattern.is_match(line) {
63                    return true;
64                }
65            }
66            return false;
67        }
68
69        // Default behavior: check for "title:" field
70        FrontMatterUtils::has_front_matter_field(content, "title:")
71    }
72
73    /// Check if a line is a non-content token that should be skipped
74    fn is_non_content_line(line: &str) -> bool {
75        let trimmed = line.trim();
76
77        // Skip reference definitions
78        if trimmed.starts_with('[') && trimmed.contains("]: ") {
79            return true;
80        }
81
82        // Skip abbreviation definitions
83        if trimmed.starts_with('*') && trimmed.contains("]: ") {
84            return true;
85        }
86
87        false
88    }
89
90    /// Check if a line is an HTML heading
91    fn is_html_heading(line: &str, level: usize) -> bool {
92        if let Ok(Some(captures)) = HTML_HEADING_PATTERN.captures(line.trim())
93            && let Some(h_level) = captures.get(1)
94        {
95            return h_level.as_str().parse::<usize>().unwrap_or(0) == level;
96        }
97        false
98    }
99}
100
101impl Rule for MD041FirstLineHeading {
102    fn name(&self) -> &'static str {
103        "MD041"
104    }
105
106    fn description(&self) -> &'static str {
107        "First line in file should be a top level heading"
108    }
109
110    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
111        let content = ctx.content;
112        let mut warnings = Vec::new();
113        if content.trim().is_empty() {
114            return Ok(warnings);
115        }
116        if self.has_front_matter_title(content) {
117            return Ok(warnings);
118        }
119
120        // Find the first non-blank line after front matter using cached info
121        let mut first_content_line_num = None;
122        let mut skip_lines = 0;
123
124        // Check for front matter
125        if ctx.lines.first().map(|l| l.content.trim()) == Some("---") {
126            // Skip front matter
127            for (idx, line_info) in ctx.lines.iter().enumerate().skip(1) {
128                if line_info.content.trim() == "---" {
129                    skip_lines = idx + 1;
130                    break;
131                }
132            }
133        }
134
135        for (line_num, line_info) in ctx.lines.iter().enumerate().skip(skip_lines) {
136            let line_content = line_info.content.trim();
137            if !line_content.is_empty() && !Self::is_non_content_line(&line_info.content) {
138                first_content_line_num = Some(line_num);
139                break;
140            }
141        }
142
143        if first_content_line_num.is_none() {
144            // No non-blank lines after front matter
145            return Ok(warnings);
146        }
147
148        let first_line_idx = first_content_line_num.unwrap();
149
150        // Check if the first non-blank line is a heading of the required level
151        let first_line_info = &ctx.lines[first_line_idx];
152        let is_correct_heading = if let Some(heading) = &first_line_info.heading {
153            heading.level as usize == self.level
154        } else {
155            // Check for HTML heading
156            Self::is_html_heading(&first_line_info.content, self.level)
157        };
158
159        if !is_correct_heading {
160            // Calculate precise character range for the entire first line
161            let first_line = first_line_idx + 1; // Convert to 1-indexed
162            let first_line_content = &first_line_info.content;
163            let (start_line, start_col, end_line, end_col) = calculate_line_range(first_line, first_line_content);
164
165            warnings.push(LintWarning {
166                rule_name: Some(self.name()),
167                line: start_line,
168                column: start_col,
169                end_line,
170                end_column: end_col,
171                message: format!("First line in file should be a level {} heading", self.level),
172                severity: Severity::Warning,
173                fix: None, // MD041 no longer provides auto-fix suggestions
174            });
175        }
176        Ok(warnings)
177    }
178
179    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
180        // MD041 should not auto-fix - adding content/titles is a decision that should be made by the document author
181        // This rule now only detects and warns about missing titles, but does not automatically add them
182        Ok(ctx.content.to_string())
183    }
184
185    /// Check if this rule should be skipped
186    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
187        ctx.content.is_empty() || (self.front_matter_title && self.has_front_matter_title(ctx.content))
188    }
189
190    fn as_any(&self) -> &dyn std::any::Any {
191        self
192    }
193
194    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
195    where
196        Self: Sized,
197    {
198        let level = crate::config::get_rule_config_value::<u32>(config, "MD041", "level").unwrap_or(1);
199        let front_matter_title = crate::config::get_rule_config_value::<String>(config, "MD041", "front_matter_title")
200            .unwrap_or_else(|| "title".to_string());
201        let front_matter_title_pattern =
202            crate::config::get_rule_config_value::<String>(config, "MD041", "front_matter_title_pattern");
203
204        let level_usize = level as usize;
205        let use_front_matter = !front_matter_title.is_empty();
206
207        Box::new(MD041FirstLineHeading::with_pattern(
208            level_usize,
209            use_front_matter,
210            front_matter_title_pattern,
211        ))
212    }
213
214    fn default_config_section(&self) -> Option<(String, toml::Value)> {
215        Some((
216            "MD041".to_string(),
217            toml::toml! {
218                level = 1
219                // Pattern for matching title in front matter (regex)
220                // front_matter_title_pattern = "^(title|header):"
221            }
222            .into(),
223        ))
224    }
225}
226
227#[cfg(test)]
228mod tests {
229    use super::*;
230    use crate::lint_context::LintContext;
231
232    #[test]
233    fn test_first_line_is_heading_correct_level() {
234        let rule = MD041FirstLineHeading::default();
235
236        // First line is a level 1 heading (should pass)
237        let content = "# My Document\n\nSome content here.";
238        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
239        let result = rule.check(&ctx).unwrap();
240        assert!(
241            result.is_empty(),
242            "Expected no warnings when first line is a level 1 heading"
243        );
244    }
245
246    #[test]
247    fn test_first_line_is_heading_wrong_level() {
248        let rule = MD041FirstLineHeading::default();
249
250        // First line is a level 2 heading (should fail with level 1 requirement)
251        let content = "## My Document\n\nSome content here.";
252        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
253        let result = rule.check(&ctx).unwrap();
254        assert_eq!(result.len(), 1);
255        assert_eq!(result[0].line, 1);
256        assert!(result[0].message.contains("level 1 heading"));
257    }
258
259    #[test]
260    fn test_first_line_not_heading() {
261        let rule = MD041FirstLineHeading::default();
262
263        // First line is plain text (should fail)
264        let content = "This is not a heading\n\n# This is a heading";
265        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
266        let result = rule.check(&ctx).unwrap();
267        assert_eq!(result.len(), 1);
268        assert_eq!(result[0].line, 1);
269        assert!(result[0].message.contains("level 1 heading"));
270    }
271
272    #[test]
273    fn test_empty_lines_before_heading() {
274        let rule = MD041FirstLineHeading::default();
275
276        // Empty lines before first heading (should pass - rule skips empty lines)
277        let content = "\n\n# My Document\n\nSome content.";
278        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
279        let result = rule.check(&ctx).unwrap();
280        assert!(
281            result.is_empty(),
282            "Expected no warnings when empty lines precede a valid heading"
283        );
284
285        // Empty lines before non-heading content (should fail)
286        let content = "\n\nNot a heading\n\nSome content.";
287        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
288        let result = rule.check(&ctx).unwrap();
289        assert_eq!(result.len(), 1);
290        assert_eq!(result[0].line, 3); // First non-empty line
291        assert!(result[0].message.contains("level 1 heading"));
292    }
293
294    #[test]
295    fn test_front_matter_with_title() {
296        let rule = MD041FirstLineHeading::new(1, true);
297
298        // Front matter with title field (should pass)
299        let content = "---\ntitle: My Document\nauthor: John Doe\n---\n\nSome content here.";
300        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
301        let result = rule.check(&ctx).unwrap();
302        assert!(
303            result.is_empty(),
304            "Expected no warnings when front matter has title field"
305        );
306    }
307
308    #[test]
309    fn test_front_matter_without_title() {
310        let rule = MD041FirstLineHeading::new(1, true);
311
312        // Front matter without title field (should fail)
313        let content = "---\nauthor: John Doe\ndate: 2024-01-01\n---\n\nSome content here.";
314        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
315        let result = rule.check(&ctx).unwrap();
316        assert_eq!(result.len(), 1);
317        assert_eq!(result[0].line, 6); // First content line after front matter
318    }
319
320    #[test]
321    fn test_front_matter_disabled() {
322        let rule = MD041FirstLineHeading::new(1, false);
323
324        // Front matter with title field but front_matter_title is false (should fail)
325        let content = "---\ntitle: My Document\n---\n\nSome content here.";
326        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
327        let result = rule.check(&ctx).unwrap();
328        assert_eq!(result.len(), 1);
329        assert_eq!(result[0].line, 5); // First content line after front matter
330    }
331
332    #[test]
333    fn test_html_comments_before_heading() {
334        let rule = MD041FirstLineHeading::default();
335
336        // HTML comment before heading (should fail)
337        let content = "<!-- This is a comment -->\n# My Document\n\nContent.";
338        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
339        let result = rule.check(&ctx).unwrap();
340        assert_eq!(result.len(), 1);
341        assert_eq!(result[0].line, 1); // HTML comment is the first line
342    }
343
344    #[test]
345    fn test_different_heading_levels() {
346        // Test with level 2 requirement
347        let rule = MD041FirstLineHeading::new(2, false);
348
349        let content = "## Second Level Heading\n\nContent.";
350        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
351        let result = rule.check(&ctx).unwrap();
352        assert!(result.is_empty(), "Expected no warnings for correct level 2 heading");
353
354        // Wrong level
355        let content = "# First Level Heading\n\nContent.";
356        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
357        let result = rule.check(&ctx).unwrap();
358        assert_eq!(result.len(), 1);
359        assert!(result[0].message.contains("level 2 heading"));
360    }
361
362    #[test]
363    fn test_setext_headings() {
364        let rule = MD041FirstLineHeading::default();
365
366        // Setext style level 1 heading (should pass)
367        let content = "My Document\n===========\n\nContent.";
368        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
369        let result = rule.check(&ctx).unwrap();
370        assert!(result.is_empty(), "Expected no warnings for setext level 1 heading");
371
372        // Setext style level 2 heading (should fail with level 1 requirement)
373        let content = "My Document\n-----------\n\nContent.";
374        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
375        let result = rule.check(&ctx).unwrap();
376        assert_eq!(result.len(), 1);
377        assert!(result[0].message.contains("level 1 heading"));
378    }
379
380    #[test]
381    fn test_empty_document() {
382        let rule = MD041FirstLineHeading::default();
383
384        // Empty document (should pass - no warnings)
385        let content = "";
386        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
387        let result = rule.check(&ctx).unwrap();
388        assert!(result.is_empty(), "Expected no warnings for empty document");
389    }
390
391    #[test]
392    fn test_whitespace_only_document() {
393        let rule = MD041FirstLineHeading::default();
394
395        // Document with only whitespace (should pass - no warnings)
396        let content = "   \n\n   \t\n";
397        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
398        let result = rule.check(&ctx).unwrap();
399        assert!(result.is_empty(), "Expected no warnings for whitespace-only document");
400    }
401
402    #[test]
403    fn test_front_matter_then_whitespace() {
404        let rule = MD041FirstLineHeading::default();
405
406        // Front matter followed by only whitespace (should pass - no warnings)
407        let content = "---\ntitle: Test\n---\n\n   \n\n";
408        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
409        let result = rule.check(&ctx).unwrap();
410        assert!(
411            result.is_empty(),
412            "Expected no warnings when no content after front matter"
413        );
414    }
415
416    #[test]
417    fn test_multiple_front_matter_types() {
418        let rule = MD041FirstLineHeading::new(1, true);
419
420        // TOML front matter with title (should fail - rule only checks for "title:" pattern)
421        let content = "+++\ntitle = \"My Document\"\n+++\n\nContent.";
422        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
423        let result = rule.check(&ctx).unwrap();
424        assert_eq!(result.len(), 1);
425        assert!(result[0].message.contains("level 1 heading"));
426
427        // JSON front matter with title (should fail - doesn't have "title:" pattern, has "\"title\":")
428        let content = "{\n\"title\": \"My Document\"\n}\n\nContent.";
429        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
430        let result = rule.check(&ctx).unwrap();
431        assert_eq!(result.len(), 1);
432        assert!(result[0].message.contains("level 1 heading"));
433
434        // YAML front matter with title field (standard case)
435        let content = "---\ntitle: My Document\n---\n\nContent.";
436        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
437        let result = rule.check(&ctx).unwrap();
438        assert!(
439            result.is_empty(),
440            "Expected no warnings for YAML front matter with title"
441        );
442
443        // Test mixed format edge case - YAML-style in TOML
444        let content = "+++\ntitle: My Document\n+++\n\nContent.";
445        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
446        let result = rule.check(&ctx).unwrap();
447        assert!(result.is_empty(), "Expected no warnings when title: pattern is found");
448    }
449
450    #[test]
451    fn test_malformed_front_matter() {
452        let rule = MD041FirstLineHeading::new(1, true);
453
454        // Malformed front matter with title
455        let content = "- --\ntitle: My Document\n- --\n\nContent.";
456        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
457        let result = rule.check(&ctx).unwrap();
458        assert!(
459            result.is_empty(),
460            "Expected no warnings for malformed front matter with title"
461        );
462    }
463
464    #[test]
465    fn test_front_matter_with_heading() {
466        let rule = MD041FirstLineHeading::default();
467
468        // Front matter without title field followed by correct heading
469        let content = "---\nauthor: John Doe\n---\n\n# My Document\n\nContent.";
470        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
471        let result = rule.check(&ctx).unwrap();
472        assert!(
473            result.is_empty(),
474            "Expected no warnings when first line after front matter is correct heading"
475        );
476    }
477
478    #[test]
479    fn test_no_fix_suggestion() {
480        let rule = MD041FirstLineHeading::default();
481
482        // Check that NO fix suggestion is provided (MD041 is now detection-only)
483        let content = "Not a heading\n\nContent.";
484        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
485        let result = rule.check(&ctx).unwrap();
486        assert_eq!(result.len(), 1);
487        assert!(result[0].fix.is_none(), "MD041 should not provide fix suggestions");
488    }
489
490    #[test]
491    fn test_complex_document_structure() {
492        let rule = MD041FirstLineHeading::default();
493
494        // Complex document with various elements
495        let content =
496            "---\nauthor: John\n---\n\n<!-- Comment -->\n\n\n# Valid Heading\n\n## Subheading\n\nContent here.";
497        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
498        let result = rule.check(&ctx).unwrap();
499        assert_eq!(result.len(), 1);
500        assert_eq!(result[0].line, 5); // The comment line
501    }
502
503    #[test]
504    fn test_heading_with_special_characters() {
505        let rule = MD041FirstLineHeading::default();
506
507        // Heading with special characters and formatting
508        let content = "# Welcome to **My** _Document_ with `code`\n\nContent.";
509        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
510        let result = rule.check(&ctx).unwrap();
511        assert!(
512            result.is_empty(),
513            "Expected no warnings for heading with inline formatting"
514        );
515    }
516
517    #[test]
518    fn test_level_configuration() {
519        // Test various level configurations
520        for level in 1..=6 {
521            let rule = MD041FirstLineHeading::new(level, false);
522
523            // Correct level
524            let content = format!("{} Heading at Level {}\n\nContent.", "#".repeat(level), level);
525            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
526            let result = rule.check(&ctx).unwrap();
527            assert!(
528                result.is_empty(),
529                "Expected no warnings for correct level {level} heading"
530            );
531
532            // Wrong level
533            let wrong_level = if level == 1 { 2 } else { 1 };
534            let content = format!("{} Wrong Level Heading\n\nContent.", "#".repeat(wrong_level));
535            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
536            let result = rule.check(&ctx).unwrap();
537            assert_eq!(result.len(), 1);
538            assert!(result[0].message.contains(&format!("level {level} heading")));
539        }
540    }
541}