rumdl_lib/rules/
md041_first_line_heading.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::range_utils::{LineIndex, calculate_line_range};
4use crate::utils::regex_cache::HTML_HEADING_PATTERN;
5use regex::Regex;
6
7/// Rule MD041: First line in file should be a top-level heading
8///
9/// See [docs/md041.md](../../docs/md041.md) for full documentation, configuration, and examples.
10
11#[derive(Clone)]
12pub struct MD041FirstLineHeading {
13    pub level: usize,
14    pub front_matter_title: bool,
15    pub front_matter_title_pattern: Option<Regex>,
16}
17
18impl Default for MD041FirstLineHeading {
19    fn default() -> Self {
20        Self {
21            level: 1,
22            front_matter_title: true,
23            front_matter_title_pattern: None,
24        }
25    }
26}
27
28impl MD041FirstLineHeading {
29    pub fn new(level: usize, front_matter_title: bool) -> Self {
30        Self {
31            level,
32            front_matter_title,
33            front_matter_title_pattern: None,
34        }
35    }
36
37    pub fn with_pattern(level: usize, front_matter_title: bool, pattern: Option<String>) -> Self {
38        let front_matter_title_pattern = pattern.and_then(|p| match Regex::new(&p) {
39            Ok(regex) => Some(regex),
40            Err(e) => {
41                log::warn!("Invalid front_matter_title_pattern regex: {e}");
42                None
43            }
44        });
45
46        Self {
47            level,
48            front_matter_title,
49            front_matter_title_pattern,
50        }
51    }
52
53    fn has_front_matter_title(&self, content: &str) -> bool {
54        if !self.front_matter_title {
55            return false;
56        }
57
58        // If we have a custom pattern, use it to search front matter content
59        if let Some(ref pattern) = self.front_matter_title_pattern {
60            let front_matter_lines = FrontMatterUtils::extract_front_matter(content);
61            for line in front_matter_lines {
62                if pattern.is_match(line) {
63                    return true;
64                }
65            }
66            return false;
67        }
68
69        // Default behavior: check for "title:" field
70        FrontMatterUtils::has_front_matter_field(content, "title:")
71    }
72
73    /// Check if a line is a non-content token that should be skipped
74    fn is_non_content_line(line: &str) -> bool {
75        let trimmed = line.trim();
76
77        // Skip reference definitions
78        if trimmed.starts_with('[') && trimmed.contains("]: ") {
79            return true;
80        }
81
82        // Skip abbreviation definitions
83        if trimmed.starts_with('*') && trimmed.contains("]: ") {
84            return true;
85        }
86
87        false
88    }
89
90    /// Check if a line is an HTML heading
91    fn is_html_heading(line: &str, level: usize) -> bool {
92        if let Ok(Some(captures)) = HTML_HEADING_PATTERN.captures(line.trim())
93            && let Some(h_level) = captures.get(1)
94        {
95            return h_level.as_str().parse::<usize>().unwrap_or(0) == level;
96        }
97        false
98    }
99}
100
101impl Rule for MD041FirstLineHeading {
102    fn name(&self) -> &'static str {
103        "MD041"
104    }
105
106    fn description(&self) -> &'static str {
107        "First line in file should be a top level heading"
108    }
109
110    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
111        let content = ctx.content;
112        let mut warnings = Vec::new();
113        if content.trim().is_empty() {
114            return Ok(warnings);
115        }
116        if self.has_front_matter_title(content) {
117            return Ok(warnings);
118        }
119
120        // Find the first non-blank line after front matter using cached info
121        let mut first_content_line_num = None;
122        let mut skip_lines = 0;
123
124        // Check for front matter
125        if ctx.lines.first().map(|l| l.content.trim()) == Some("---") {
126            // Skip front matter
127            for (idx, line_info) in ctx.lines.iter().enumerate().skip(1) {
128                if line_info.content.trim() == "---" {
129                    skip_lines = idx + 1;
130                    break;
131                }
132            }
133        }
134
135        for (line_num, line_info) in ctx.lines.iter().enumerate().skip(skip_lines) {
136            let line_content = line_info.content.trim();
137            if !line_content.is_empty() && !Self::is_non_content_line(&line_info.content) {
138                first_content_line_num = Some(line_num);
139                break;
140            }
141        }
142
143        if first_content_line_num.is_none() {
144            // No non-blank lines after front matter
145            return Ok(warnings);
146        }
147
148        let first_line_idx = first_content_line_num.unwrap();
149
150        // Check if the first non-blank line is a heading of the required level
151        let first_line_info = &ctx.lines[first_line_idx];
152        let is_correct_heading = if let Some(heading) = &first_line_info.heading {
153            heading.level as usize == self.level
154        } else {
155            // Check for HTML heading
156            Self::is_html_heading(&first_line_info.content, self.level)
157        };
158
159        if !is_correct_heading {
160            // Calculate precise character range for the entire first line
161            let first_line = first_line_idx + 1; // Convert to 1-indexed
162            let first_line_content = &first_line_info.content;
163            let (start_line, start_col, end_line, end_col) = calculate_line_range(first_line, first_line_content);
164
165            // Detect line ending style for the fix
166            let line_ending = crate::utils::detect_line_ending(content);
167
168            warnings.push(LintWarning {
169                rule_name: Some(self.name()),
170                line: start_line,
171                column: start_col,
172                end_line,
173                end_column: end_col,
174                message: format!("First line in file should be a level {} heading", self.level),
175                severity: Severity::Warning,
176                fix: Some(Fix {
177                    range: LineIndex::new(content.to_string()).line_col_to_byte_range_with_length(first_line, 1, 0),
178                    replacement: format!("{} Title{}{}", "#".repeat(self.level), line_ending, line_ending),
179                }),
180            });
181        }
182        Ok(warnings)
183    }
184
185    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
186        let content = ctx.content;
187        let content = crate::rules::front_matter_utils::FrontMatterUtils::fix_malformed_front_matter(content);
188        if content.trim().is_empty() || self.has_front_matter_title(&content) {
189            return Ok(content.to_string());
190        }
191
192        // Detect the line ending style to use
193        let line_ending = crate::utils::detect_line_ending(&content);
194
195        // Re-create context for the potentially fixed content
196        let fixed_ctx = crate::lint_context::LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
197
198        // Find the first non-blank line after front matter
199        let mut first_content_line_num = None;
200        let mut skip_lines = 0;
201
202        // Check for front matter
203        if fixed_ctx.lines.first().map(|l| l.content.trim()) == Some("---") {
204            // Skip front matter
205            for (idx, line_info) in fixed_ctx.lines.iter().enumerate().skip(1) {
206                if line_info.content.trim() == "---" {
207                    skip_lines = idx + 1;
208                    break;
209                }
210            }
211        }
212
213        for (line_num, line_info) in fixed_ctx.lines.iter().enumerate().skip(skip_lines) {
214            let line_content = line_info.content.trim();
215            if !line_content.is_empty() && !Self::is_non_content_line(&line_info.content) {
216                first_content_line_num = Some(line_num);
217                break;
218            }
219        }
220
221        let mut result = String::new();
222        let lines: Vec<&str> = content.lines().collect();
223
224        // Check if we have any headings at all
225        let has_any_heading = fixed_ctx.lines.iter().any(|line| line.heading.is_some());
226
227        if !has_any_heading {
228            // Add a new title at the beginning
229            result.push_str(&format!(
230                "{} Title{}{}{}",
231                "#".repeat(self.level),
232                line_ending,
233                line_ending,
234                content
235            ));
236        } else if let Some(first_line_idx) = first_content_line_num {
237            // Check if first content line is a heading of correct level
238            let first_line_info = &fixed_ctx.lines[first_line_idx];
239
240            if let Some(heading) = &first_line_info.heading {
241                if heading.level as usize != self.level {
242                    // Fix the existing heading level
243                    for (i, line) in lines.iter().enumerate() {
244                        if i == first_line_idx {
245                            result.push_str(&format!("{} {}", "#".repeat(self.level), heading.text));
246                        } else {
247                            result.push_str(line);
248                        }
249                        if i < lines.len() - 1 {
250                            result.push_str(line_ending);
251                        }
252                    }
253                } else {
254                    // No fix needed, return original
255                    return Ok(content.to_string());
256                }
257            } else if Self::is_html_heading(&first_line_info.content, self.level) {
258                // HTML heading with correct level, no fix needed
259                return Ok(content.to_string());
260            } else {
261                // First line is not a heading, add a new title before it
262                for (i, line) in lines.iter().enumerate() {
263                    if i == first_line_idx {
264                        result.push_str(&format!(
265                            "{} Title{}{}",
266                            "#".repeat(self.level),
267                            line_ending,
268                            line_ending
269                        ));
270                    }
271                    result.push_str(line);
272                    if i < lines.len() - 1 {
273                        result.push_str(line_ending);
274                    }
275                }
276            }
277        } else {
278            // No content after front matter
279            return Ok(content.to_string());
280        }
281
282        Ok(result)
283    }
284
285    /// Check if this rule should be skipped
286    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
287        ctx.content.is_empty() || (self.front_matter_title && self.has_front_matter_title(ctx.content))
288    }
289
290    fn as_any(&self) -> &dyn std::any::Any {
291        self
292    }
293
294    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
295    where
296        Self: Sized,
297    {
298        let level = crate::config::get_rule_config_value::<u32>(config, "MD041", "level").unwrap_or(1);
299        let front_matter_title = crate::config::get_rule_config_value::<String>(config, "MD041", "front_matter_title")
300            .unwrap_or_else(|| "title".to_string());
301        let front_matter_title_pattern =
302            crate::config::get_rule_config_value::<String>(config, "MD041", "front_matter_title_pattern");
303
304        let level_usize = level as usize;
305        let use_front_matter = !front_matter_title.is_empty();
306
307        Box::new(MD041FirstLineHeading::with_pattern(
308            level_usize,
309            use_front_matter,
310            front_matter_title_pattern,
311        ))
312    }
313
314    fn default_config_section(&self) -> Option<(String, toml::Value)> {
315        Some((
316            "MD041".to_string(),
317            toml::toml! {
318                level = 1
319                // Pattern for matching title in front matter (regex)
320                // front_matter_title_pattern = "^(title|header):"
321            }
322            .into(),
323        ))
324    }
325}
326
327#[cfg(test)]
328mod tests {
329    use super::*;
330    use crate::lint_context::LintContext;
331
332    #[test]
333    fn test_first_line_is_heading_correct_level() {
334        let rule = MD041FirstLineHeading::default();
335
336        // First line is a level 1 heading (should pass)
337        let content = "# My Document\n\nSome content here.";
338        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
339        let result = rule.check(&ctx).unwrap();
340        assert!(
341            result.is_empty(),
342            "Expected no warnings when first line is a level 1 heading"
343        );
344    }
345
346    #[test]
347    fn test_first_line_is_heading_wrong_level() {
348        let rule = MD041FirstLineHeading::default();
349
350        // First line is a level 2 heading (should fail with level 1 requirement)
351        let content = "## My Document\n\nSome content here.";
352        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
353        let result = rule.check(&ctx).unwrap();
354        assert_eq!(result.len(), 1);
355        assert_eq!(result[0].line, 1);
356        assert!(result[0].message.contains("level 1 heading"));
357    }
358
359    #[test]
360    fn test_first_line_not_heading() {
361        let rule = MD041FirstLineHeading::default();
362
363        // First line is plain text (should fail)
364        let content = "This is not a heading\n\n# This is a heading";
365        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
366        let result = rule.check(&ctx).unwrap();
367        assert_eq!(result.len(), 1);
368        assert_eq!(result[0].line, 1);
369        assert!(result[0].message.contains("level 1 heading"));
370    }
371
372    #[test]
373    fn test_empty_lines_before_heading() {
374        let rule = MD041FirstLineHeading::default();
375
376        // Empty lines before first heading (should pass - rule skips empty lines)
377        let content = "\n\n# My Document\n\nSome content.";
378        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
379        let result = rule.check(&ctx).unwrap();
380        assert!(
381            result.is_empty(),
382            "Expected no warnings when empty lines precede a valid heading"
383        );
384
385        // Empty lines before non-heading content (should fail)
386        let content = "\n\nNot a heading\n\nSome content.";
387        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
388        let result = rule.check(&ctx).unwrap();
389        assert_eq!(result.len(), 1);
390        assert_eq!(result[0].line, 3); // First non-empty line
391        assert!(result[0].message.contains("level 1 heading"));
392    }
393
394    #[test]
395    fn test_front_matter_with_title() {
396        let rule = MD041FirstLineHeading::new(1, true);
397
398        // Front matter with title field (should pass)
399        let content = "---\ntitle: My Document\nauthor: John Doe\n---\n\nSome content here.";
400        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
401        let result = rule.check(&ctx).unwrap();
402        assert!(
403            result.is_empty(),
404            "Expected no warnings when front matter has title field"
405        );
406    }
407
408    #[test]
409    fn test_front_matter_without_title() {
410        let rule = MD041FirstLineHeading::new(1, true);
411
412        // Front matter without title field (should fail)
413        let content = "---\nauthor: John Doe\ndate: 2024-01-01\n---\n\nSome content here.";
414        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
415        let result = rule.check(&ctx).unwrap();
416        assert_eq!(result.len(), 1);
417        assert_eq!(result[0].line, 6); // First content line after front matter
418    }
419
420    #[test]
421    fn test_front_matter_disabled() {
422        let rule = MD041FirstLineHeading::new(1, false);
423
424        // Front matter with title field but front_matter_title is false (should fail)
425        let content = "---\ntitle: My Document\n---\n\nSome content here.";
426        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
427        let result = rule.check(&ctx).unwrap();
428        assert_eq!(result.len(), 1);
429        assert_eq!(result[0].line, 5); // First content line after front matter
430    }
431
432    #[test]
433    fn test_html_comments_before_heading() {
434        let rule = MD041FirstLineHeading::default();
435
436        // HTML comment before heading (should fail)
437        let content = "<!-- This is a comment -->\n# My Document\n\nContent.";
438        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
439        let result = rule.check(&ctx).unwrap();
440        assert_eq!(result.len(), 1);
441        assert_eq!(result[0].line, 1); // HTML comment is the first line
442    }
443
444    #[test]
445    fn test_different_heading_levels() {
446        // Test with level 2 requirement
447        let rule = MD041FirstLineHeading::new(2, false);
448
449        let content = "## Second Level Heading\n\nContent.";
450        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
451        let result = rule.check(&ctx).unwrap();
452        assert!(result.is_empty(), "Expected no warnings for correct level 2 heading");
453
454        // Wrong level
455        let content = "# First Level Heading\n\nContent.";
456        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
457        let result = rule.check(&ctx).unwrap();
458        assert_eq!(result.len(), 1);
459        assert!(result[0].message.contains("level 2 heading"));
460    }
461
462    #[test]
463    fn test_setext_headings() {
464        let rule = MD041FirstLineHeading::default();
465
466        // Setext style level 1 heading (should pass)
467        let content = "My Document\n===========\n\nContent.";
468        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
469        let result = rule.check(&ctx).unwrap();
470        assert!(result.is_empty(), "Expected no warnings for setext level 1 heading");
471
472        // Setext style level 2 heading (should fail with level 1 requirement)
473        let content = "My Document\n-----------\n\nContent.";
474        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
475        let result = rule.check(&ctx).unwrap();
476        assert_eq!(result.len(), 1);
477        assert!(result[0].message.contains("level 1 heading"));
478    }
479
480    #[test]
481    fn test_empty_document() {
482        let rule = MD041FirstLineHeading::default();
483
484        // Empty document (should pass - no warnings)
485        let content = "";
486        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
487        let result = rule.check(&ctx).unwrap();
488        assert!(result.is_empty(), "Expected no warnings for empty document");
489    }
490
491    #[test]
492    fn test_whitespace_only_document() {
493        let rule = MD041FirstLineHeading::default();
494
495        // Document with only whitespace (should pass - no warnings)
496        let content = "   \n\n   \t\n";
497        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
498        let result = rule.check(&ctx).unwrap();
499        assert!(result.is_empty(), "Expected no warnings for whitespace-only document");
500    }
501
502    #[test]
503    fn test_front_matter_then_whitespace() {
504        let rule = MD041FirstLineHeading::default();
505
506        // Front matter followed by only whitespace (should pass - no warnings)
507        let content = "---\ntitle: Test\n---\n\n   \n\n";
508        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
509        let result = rule.check(&ctx).unwrap();
510        assert!(
511            result.is_empty(),
512            "Expected no warnings when no content after front matter"
513        );
514    }
515
516    #[test]
517    fn test_multiple_front_matter_types() {
518        let rule = MD041FirstLineHeading::new(1, true);
519
520        // TOML front matter with title (should fail - rule only checks for "title:" pattern)
521        let content = "+++\ntitle = \"My Document\"\n+++\n\nContent.";
522        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
523        let result = rule.check(&ctx).unwrap();
524        assert_eq!(result.len(), 1);
525        assert!(result[0].message.contains("level 1 heading"));
526
527        // JSON front matter with title (should fail - doesn't have "title:" pattern, has "\"title\":")
528        let content = "{\n\"title\": \"My Document\"\n}\n\nContent.";
529        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
530        let result = rule.check(&ctx).unwrap();
531        assert_eq!(result.len(), 1);
532        assert!(result[0].message.contains("level 1 heading"));
533
534        // YAML front matter with title field (standard case)
535        let content = "---\ntitle: My Document\n---\n\nContent.";
536        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
537        let result = rule.check(&ctx).unwrap();
538        assert!(
539            result.is_empty(),
540            "Expected no warnings for YAML front matter with title"
541        );
542
543        // Test mixed format edge case - YAML-style in TOML
544        let content = "+++\ntitle: My Document\n+++\n\nContent.";
545        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
546        let result = rule.check(&ctx).unwrap();
547        assert!(result.is_empty(), "Expected no warnings when title: pattern is found");
548    }
549
550    #[test]
551    fn test_malformed_front_matter() {
552        let rule = MD041FirstLineHeading::new(1, true);
553
554        // Malformed front matter with title
555        let content = "- --\ntitle: My Document\n- --\n\nContent.";
556        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
557        let result = rule.check(&ctx).unwrap();
558        assert!(
559            result.is_empty(),
560            "Expected no warnings for malformed front matter with title"
561        );
562    }
563
564    #[test]
565    fn test_front_matter_with_heading() {
566        let rule = MD041FirstLineHeading::default();
567
568        // Front matter without title field followed by correct heading
569        let content = "---\nauthor: John Doe\n---\n\n# My Document\n\nContent.";
570        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
571        let result = rule.check(&ctx).unwrap();
572        assert!(
573            result.is_empty(),
574            "Expected no warnings when first line after front matter is correct heading"
575        );
576    }
577
578    #[test]
579    fn test_fix_suggestion() {
580        let rule = MD041FirstLineHeading::default();
581
582        // Check that fix suggestion is provided
583        let content = "Not a heading\n\nContent.";
584        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
585        let result = rule.check(&ctx).unwrap();
586        assert_eq!(result.len(), 1);
587        assert!(result[0].fix.is_some());
588
589        let fix = result[0].fix.as_ref().unwrap();
590        assert!(fix.replacement.contains("# Title"));
591    }
592
593    #[test]
594    fn test_complex_document_structure() {
595        let rule = MD041FirstLineHeading::default();
596
597        // Complex document with various elements
598        let content =
599            "---\nauthor: John\n---\n\n<!-- Comment -->\n\n\n# Valid Heading\n\n## Subheading\n\nContent here.";
600        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
601        let result = rule.check(&ctx).unwrap();
602        assert_eq!(result.len(), 1);
603        assert_eq!(result[0].line, 5); // The comment line
604    }
605
606    #[test]
607    fn test_heading_with_special_characters() {
608        let rule = MD041FirstLineHeading::default();
609
610        // Heading with special characters and formatting
611        let content = "# Welcome to **My** _Document_ with `code`\n\nContent.";
612        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
613        let result = rule.check(&ctx).unwrap();
614        assert!(
615            result.is_empty(),
616            "Expected no warnings for heading with inline formatting"
617        );
618    }
619
620    #[test]
621    fn test_level_configuration() {
622        // Test various level configurations
623        for level in 1..=6 {
624            let rule = MD041FirstLineHeading::new(level, false);
625
626            // Correct level
627            let content = format!("{} Heading at Level {}\n\nContent.", "#".repeat(level), level);
628            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
629            let result = rule.check(&ctx).unwrap();
630            assert!(
631                result.is_empty(),
632                "Expected no warnings for correct level {level} heading"
633            );
634
635            // Wrong level
636            let wrong_level = if level == 1 { 2 } else { 1 };
637            let content = format!("{} Wrong Level Heading\n\nContent.", "#".repeat(wrong_level));
638            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
639            let result = rule.check(&ctx).unwrap();
640            assert_eq!(result.len(), 1);
641            assert!(result[0].message.contains(&format!("level {level} heading")));
642        }
643    }
644}