rumdl_lib/rules/
md041_first_line_heading.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::range_utils::{LineIndex, calculate_line_range};
4use crate::utils::regex_cache::HTML_HEADING_PATTERN;
5use regex::Regex;
6
7/// Rule MD041: First line in file should be a top-level heading
8///
9/// See [docs/md041.md](../../docs/md041.md) for full documentation, configuration, and examples.
10
11#[derive(Clone)]
12pub struct MD041FirstLineHeading {
13    pub level: usize,
14    pub front_matter_title: bool,
15    pub front_matter_title_pattern: Option<Regex>,
16}
17
18impl Default for MD041FirstLineHeading {
19    fn default() -> Self {
20        Self {
21            level: 1,
22            front_matter_title: true,
23            front_matter_title_pattern: None,
24        }
25    }
26}
27
28impl MD041FirstLineHeading {
29    pub fn new(level: usize, front_matter_title: bool) -> Self {
30        Self {
31            level,
32            front_matter_title,
33            front_matter_title_pattern: None,
34        }
35    }
36
37    pub fn with_pattern(level: usize, front_matter_title: bool, pattern: Option<String>) -> Self {
38        let front_matter_title_pattern = pattern.and_then(|p| match Regex::new(&p) {
39            Ok(regex) => Some(regex),
40            Err(e) => {
41                log::warn!("Invalid front_matter_title_pattern regex: {e}");
42                None
43            }
44        });
45
46        Self {
47            level,
48            front_matter_title,
49            front_matter_title_pattern,
50        }
51    }
52
53    fn has_front_matter_title(&self, content: &str) -> bool {
54        if !self.front_matter_title {
55            return false;
56        }
57
58        // If we have a custom pattern, use it to search front matter content
59        if let Some(ref pattern) = self.front_matter_title_pattern {
60            let front_matter_lines = FrontMatterUtils::extract_front_matter(content);
61            for line in front_matter_lines {
62                if pattern.is_match(line) {
63                    return true;
64                }
65            }
66            return false;
67        }
68
69        // Default behavior: check for "title:" field
70        FrontMatterUtils::has_front_matter_field(content, "title:")
71    }
72
73    /// Check if a line is a non-content token that should be skipped
74    fn is_non_content_line(line: &str) -> bool {
75        let trimmed = line.trim();
76
77        // Skip reference definitions
78        if trimmed.starts_with('[') && trimmed.contains("]: ") {
79            return true;
80        }
81
82        // Skip abbreviation definitions
83        if trimmed.starts_with('*') && trimmed.contains("]: ") {
84            return true;
85        }
86
87        false
88    }
89
90    /// Check if a line is an HTML heading
91    fn is_html_heading(line: &str, level: usize) -> bool {
92        if let Ok(Some(captures)) = HTML_HEADING_PATTERN.captures(line.trim())
93            && let Some(h_level) = captures.get(1)
94        {
95            return h_level.as_str().parse::<usize>().unwrap_or(0) == level;
96        }
97        false
98    }
99}
100
101impl Rule for MD041FirstLineHeading {
102    fn name(&self) -> &'static str {
103        "MD041"
104    }
105
106    fn description(&self) -> &'static str {
107        "First line in file should be a top level heading"
108    }
109
110    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
111        let content = ctx.content;
112        let mut warnings = Vec::new();
113        if content.trim().is_empty() {
114            return Ok(warnings);
115        }
116        if self.has_front_matter_title(content) {
117            return Ok(warnings);
118        }
119
120        // Find the first non-blank line after front matter using cached info
121        let mut first_content_line_num = None;
122        let mut skip_lines = 0;
123
124        // Check for front matter
125        if ctx.lines.first().map(|l| l.content.trim()) == Some("---") {
126            // Skip front matter
127            for (idx, line_info) in ctx.lines.iter().enumerate().skip(1) {
128                if line_info.content.trim() == "---" {
129                    skip_lines = idx + 1;
130                    break;
131                }
132            }
133        }
134
135        for (line_num, line_info) in ctx.lines.iter().enumerate().skip(skip_lines) {
136            let line_content = line_info.content.trim();
137            if !line_content.is_empty() && !Self::is_non_content_line(&line_info.content) {
138                first_content_line_num = Some(line_num);
139                break;
140            }
141        }
142
143        if first_content_line_num.is_none() {
144            // No non-blank lines after front matter
145            return Ok(warnings);
146        }
147
148        let first_line_idx = first_content_line_num.unwrap();
149
150        // Check if the first non-blank line is a heading of the required level
151        let first_line_info = &ctx.lines[first_line_idx];
152        let is_correct_heading = if let Some(heading) = &first_line_info.heading {
153            heading.level as usize == self.level
154        } else {
155            // Check for HTML heading
156            Self::is_html_heading(&first_line_info.content, self.level)
157        };
158
159        if !is_correct_heading {
160            // Calculate precise character range for the entire first line
161            let first_line = first_line_idx + 1; // Convert to 1-indexed
162            let first_line_content = &first_line_info.content;
163            let (start_line, start_col, end_line, end_col) = calculate_line_range(first_line, first_line_content);
164
165            warnings.push(LintWarning {
166                rule_name: Some(self.name()),
167                line: start_line,
168                column: start_col,
169                end_line,
170                end_column: end_col,
171                message: format!("First line in file should be a level {} heading", self.level),
172                severity: Severity::Warning,
173                fix: Some(Fix {
174                    range: LineIndex::new(content.to_string()).line_col_to_byte_range_with_length(first_line, 1, 0),
175                    replacement: format!("{} Title\n\n", "#".repeat(self.level)),
176                }),
177            });
178        }
179        Ok(warnings)
180    }
181
182    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
183        let content = ctx.content;
184        let content = crate::rules::front_matter_utils::FrontMatterUtils::fix_malformed_front_matter(content);
185        if content.trim().is_empty() || self.has_front_matter_title(&content) {
186            return Ok(content.to_string());
187        }
188
189        // Re-create context for the potentially fixed content
190        let fixed_ctx = crate::lint_context::LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
191
192        // Find the first non-blank line after front matter
193        let mut first_content_line_num = None;
194        let mut skip_lines = 0;
195
196        // Check for front matter
197        if fixed_ctx.lines.first().map(|l| l.content.trim()) == Some("---") {
198            // Skip front matter
199            for (idx, line_info) in fixed_ctx.lines.iter().enumerate().skip(1) {
200                if line_info.content.trim() == "---" {
201                    skip_lines = idx + 1;
202                    break;
203                }
204            }
205        }
206
207        for (line_num, line_info) in fixed_ctx.lines.iter().enumerate().skip(skip_lines) {
208            let line_content = line_info.content.trim();
209            if !line_content.is_empty() && !Self::is_non_content_line(&line_info.content) {
210                first_content_line_num = Some(line_num);
211                break;
212            }
213        }
214
215        let mut result = String::new();
216        let lines: Vec<&str> = content.lines().collect();
217
218        // Check if we have any headings at all
219        let has_any_heading = fixed_ctx.lines.iter().any(|line| line.heading.is_some());
220
221        if !has_any_heading {
222            // Add a new title at the beginning
223            result.push_str(&format!("{} Title\n\n{}", "#".repeat(self.level), content));
224        } else if let Some(first_line_idx) = first_content_line_num {
225            // Check if first content line is a heading of correct level
226            let first_line_info = &fixed_ctx.lines[first_line_idx];
227
228            if let Some(heading) = &first_line_info.heading {
229                if heading.level as usize != self.level {
230                    // Fix the existing heading level
231                    for (i, line) in lines.iter().enumerate() {
232                        if i == first_line_idx {
233                            result.push_str(&format!("{} {}", "#".repeat(self.level), heading.text));
234                        } else {
235                            result.push_str(line);
236                        }
237                        if i < lines.len() - 1 {
238                            result.push('\n');
239                        }
240                    }
241                } else {
242                    // No fix needed, return original
243                    return Ok(content.to_string());
244                }
245            } else if Self::is_html_heading(&first_line_info.content, self.level) {
246                // HTML heading with correct level, no fix needed
247                return Ok(content.to_string());
248            } else {
249                // First line is not a heading, add a new title before it
250                for (i, line) in lines.iter().enumerate() {
251                    if i == first_line_idx {
252                        result.push_str(&format!("{} Title\n\n", "#".repeat(self.level)));
253                    }
254                    result.push_str(line);
255                    if i < lines.len() - 1 {
256                        result.push('\n');
257                    }
258                }
259            }
260        } else {
261            // No content after front matter
262            return Ok(content.to_string());
263        }
264
265        Ok(result)
266    }
267
268    /// Check if this rule should be skipped
269    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
270        ctx.content.is_empty() || (self.front_matter_title && self.has_front_matter_title(ctx.content))
271    }
272
273    fn as_any(&self) -> &dyn std::any::Any {
274        self
275    }
276
277    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
278    where
279        Self: Sized,
280    {
281        let level = crate::config::get_rule_config_value::<u32>(config, "MD041", "level").unwrap_or(1);
282        let front_matter_title = crate::config::get_rule_config_value::<String>(config, "MD041", "front_matter_title")
283            .unwrap_or_else(|| "title".to_string());
284        let front_matter_title_pattern =
285            crate::config::get_rule_config_value::<String>(config, "MD041", "front_matter_title_pattern");
286
287        let level_usize = level as usize;
288        let use_front_matter = !front_matter_title.is_empty();
289
290        Box::new(MD041FirstLineHeading::with_pattern(
291            level_usize,
292            use_front_matter,
293            front_matter_title_pattern,
294        ))
295    }
296
297    fn default_config_section(&self) -> Option<(String, toml::Value)> {
298        Some((
299            "MD041".to_string(),
300            toml::toml! {
301                level = 1
302                // Pattern for matching title in front matter (regex)
303                // front_matter_title_pattern = "^(title|header):"
304            }
305            .into(),
306        ))
307    }
308}
309
310#[cfg(test)]
311mod tests {
312    use super::*;
313    use crate::lint_context::LintContext;
314
315    #[test]
316    fn test_first_line_is_heading_correct_level() {
317        let rule = MD041FirstLineHeading::default();
318
319        // First line is a level 1 heading (should pass)
320        let content = "# My Document\n\nSome content here.";
321        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
322        let result = rule.check(&ctx).unwrap();
323        assert!(
324            result.is_empty(),
325            "Expected no warnings when first line is a level 1 heading"
326        );
327    }
328
329    #[test]
330    fn test_first_line_is_heading_wrong_level() {
331        let rule = MD041FirstLineHeading::default();
332
333        // First line is a level 2 heading (should fail with level 1 requirement)
334        let content = "## My Document\n\nSome content here.";
335        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
336        let result = rule.check(&ctx).unwrap();
337        assert_eq!(result.len(), 1);
338        assert_eq!(result[0].line, 1);
339        assert!(result[0].message.contains("level 1 heading"));
340    }
341
342    #[test]
343    fn test_first_line_not_heading() {
344        let rule = MD041FirstLineHeading::default();
345
346        // First line is plain text (should fail)
347        let content = "This is not a heading\n\n# This is a heading";
348        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
349        let result = rule.check(&ctx).unwrap();
350        assert_eq!(result.len(), 1);
351        assert_eq!(result[0].line, 1);
352        assert!(result[0].message.contains("level 1 heading"));
353    }
354
355    #[test]
356    fn test_empty_lines_before_heading() {
357        let rule = MD041FirstLineHeading::default();
358
359        // Empty lines before first heading (should pass - rule skips empty lines)
360        let content = "\n\n# My Document\n\nSome content.";
361        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
362        let result = rule.check(&ctx).unwrap();
363        assert!(
364            result.is_empty(),
365            "Expected no warnings when empty lines precede a valid heading"
366        );
367
368        // Empty lines before non-heading content (should fail)
369        let content = "\n\nNot a heading\n\nSome content.";
370        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
371        let result = rule.check(&ctx).unwrap();
372        assert_eq!(result.len(), 1);
373        assert_eq!(result[0].line, 3); // First non-empty line
374        assert!(result[0].message.contains("level 1 heading"));
375    }
376
377    #[test]
378    fn test_front_matter_with_title() {
379        let rule = MD041FirstLineHeading::new(1, true);
380
381        // Front matter with title field (should pass)
382        let content = "---\ntitle: My Document\nauthor: John Doe\n---\n\nSome content here.";
383        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
384        let result = rule.check(&ctx).unwrap();
385        assert!(
386            result.is_empty(),
387            "Expected no warnings when front matter has title field"
388        );
389    }
390
391    #[test]
392    fn test_front_matter_without_title() {
393        let rule = MD041FirstLineHeading::new(1, true);
394
395        // Front matter without title field (should fail)
396        let content = "---\nauthor: John Doe\ndate: 2024-01-01\n---\n\nSome content here.";
397        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
398        let result = rule.check(&ctx).unwrap();
399        assert_eq!(result.len(), 1);
400        assert_eq!(result[0].line, 6); // First content line after front matter
401    }
402
403    #[test]
404    fn test_front_matter_disabled() {
405        let rule = MD041FirstLineHeading::new(1, false);
406
407        // Front matter with title field but front_matter_title is false (should fail)
408        let content = "---\ntitle: My Document\n---\n\nSome content here.";
409        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
410        let result = rule.check(&ctx).unwrap();
411        assert_eq!(result.len(), 1);
412        assert_eq!(result[0].line, 5); // First content line after front matter
413    }
414
415    #[test]
416    fn test_html_comments_before_heading() {
417        let rule = MD041FirstLineHeading::default();
418
419        // HTML comment before heading (should fail)
420        let content = "<!-- This is a comment -->\n# My Document\n\nContent.";
421        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
422        let result = rule.check(&ctx).unwrap();
423        assert_eq!(result.len(), 1);
424        assert_eq!(result[0].line, 1); // HTML comment is the first line
425    }
426
427    #[test]
428    fn test_different_heading_levels() {
429        // Test with level 2 requirement
430        let rule = MD041FirstLineHeading::new(2, false);
431
432        let content = "## Second Level Heading\n\nContent.";
433        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
434        let result = rule.check(&ctx).unwrap();
435        assert!(result.is_empty(), "Expected no warnings for correct level 2 heading");
436
437        // Wrong level
438        let content = "# First Level Heading\n\nContent.";
439        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
440        let result = rule.check(&ctx).unwrap();
441        assert_eq!(result.len(), 1);
442        assert!(result[0].message.contains("level 2 heading"));
443    }
444
445    #[test]
446    fn test_setext_headings() {
447        let rule = MD041FirstLineHeading::default();
448
449        // Setext style level 1 heading (should pass)
450        let content = "My Document\n===========\n\nContent.";
451        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
452        let result = rule.check(&ctx).unwrap();
453        assert!(result.is_empty(), "Expected no warnings for setext level 1 heading");
454
455        // Setext style level 2 heading (should fail with level 1 requirement)
456        let content = "My Document\n-----------\n\nContent.";
457        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
458        let result = rule.check(&ctx).unwrap();
459        assert_eq!(result.len(), 1);
460        assert!(result[0].message.contains("level 1 heading"));
461    }
462
463    #[test]
464    fn test_empty_document() {
465        let rule = MD041FirstLineHeading::default();
466
467        // Empty document (should pass - no warnings)
468        let content = "";
469        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
470        let result = rule.check(&ctx).unwrap();
471        assert!(result.is_empty(), "Expected no warnings for empty document");
472    }
473
474    #[test]
475    fn test_whitespace_only_document() {
476        let rule = MD041FirstLineHeading::default();
477
478        // Document with only whitespace (should pass - no warnings)
479        let content = "   \n\n   \t\n";
480        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
481        let result = rule.check(&ctx).unwrap();
482        assert!(result.is_empty(), "Expected no warnings for whitespace-only document");
483    }
484
485    #[test]
486    fn test_front_matter_then_whitespace() {
487        let rule = MD041FirstLineHeading::default();
488
489        // Front matter followed by only whitespace (should pass - no warnings)
490        let content = "---\ntitle: Test\n---\n\n   \n\n";
491        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
492        let result = rule.check(&ctx).unwrap();
493        assert!(
494            result.is_empty(),
495            "Expected no warnings when no content after front matter"
496        );
497    }
498
499    #[test]
500    fn test_multiple_front_matter_types() {
501        let rule = MD041FirstLineHeading::new(1, true);
502
503        // TOML front matter with title (should fail - rule only checks for "title:" pattern)
504        let content = "+++\ntitle = \"My Document\"\n+++\n\nContent.";
505        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
506        let result = rule.check(&ctx).unwrap();
507        assert_eq!(result.len(), 1);
508        assert!(result[0].message.contains("level 1 heading"));
509
510        // JSON front matter with title (should fail - doesn't have "title:" pattern, has "\"title\":")
511        let content = "{\n\"title\": \"My Document\"\n}\n\nContent.";
512        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
513        let result = rule.check(&ctx).unwrap();
514        assert_eq!(result.len(), 1);
515        assert!(result[0].message.contains("level 1 heading"));
516
517        // YAML front matter with title field (standard case)
518        let content = "---\ntitle: My Document\n---\n\nContent.";
519        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
520        let result = rule.check(&ctx).unwrap();
521        assert!(
522            result.is_empty(),
523            "Expected no warnings for YAML front matter with title"
524        );
525
526        // Test mixed format edge case - YAML-style in TOML
527        let content = "+++\ntitle: My Document\n+++\n\nContent.";
528        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
529        let result = rule.check(&ctx).unwrap();
530        assert!(result.is_empty(), "Expected no warnings when title: pattern is found");
531    }
532
533    #[test]
534    fn test_malformed_front_matter() {
535        let rule = MD041FirstLineHeading::new(1, true);
536
537        // Malformed front matter with title
538        let content = "- --\ntitle: My Document\n- --\n\nContent.";
539        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
540        let result = rule.check(&ctx).unwrap();
541        assert!(
542            result.is_empty(),
543            "Expected no warnings for malformed front matter with title"
544        );
545    }
546
547    #[test]
548    fn test_front_matter_with_heading() {
549        let rule = MD041FirstLineHeading::default();
550
551        // Front matter without title field followed by correct heading
552        let content = "---\nauthor: John Doe\n---\n\n# My Document\n\nContent.";
553        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
554        let result = rule.check(&ctx).unwrap();
555        assert!(
556            result.is_empty(),
557            "Expected no warnings when first line after front matter is correct heading"
558        );
559    }
560
561    #[test]
562    fn test_fix_suggestion() {
563        let rule = MD041FirstLineHeading::default();
564
565        // Check that fix suggestion is provided
566        let content = "Not a heading\n\nContent.";
567        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
568        let result = rule.check(&ctx).unwrap();
569        assert_eq!(result.len(), 1);
570        assert!(result[0].fix.is_some());
571
572        let fix = result[0].fix.as_ref().unwrap();
573        assert!(fix.replacement.contains("# Title"));
574    }
575
576    #[test]
577    fn test_complex_document_structure() {
578        let rule = MD041FirstLineHeading::default();
579
580        // Complex document with various elements
581        let content =
582            "---\nauthor: John\n---\n\n<!-- Comment -->\n\n\n# Valid Heading\n\n## Subheading\n\nContent here.";
583        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
584        let result = rule.check(&ctx).unwrap();
585        assert_eq!(result.len(), 1);
586        assert_eq!(result[0].line, 5); // The comment line
587    }
588
589    #[test]
590    fn test_heading_with_special_characters() {
591        let rule = MD041FirstLineHeading::default();
592
593        // Heading with special characters and formatting
594        let content = "# Welcome to **My** _Document_ with `code`\n\nContent.";
595        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
596        let result = rule.check(&ctx).unwrap();
597        assert!(
598            result.is_empty(),
599            "Expected no warnings for heading with inline formatting"
600        );
601    }
602
603    #[test]
604    fn test_level_configuration() {
605        // Test various level configurations
606        for level in 1..=6 {
607            let rule = MD041FirstLineHeading::new(level, false);
608
609            // Correct level
610            let content = format!("{} Heading at Level {}\n\nContent.", "#".repeat(level), level);
611            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
612            let result = rule.check(&ctx).unwrap();
613            assert!(
614                result.is_empty(),
615                "Expected no warnings for correct level {level} heading"
616            );
617
618            // Wrong level
619            let wrong_level = if level == 1 { 2 } else { 1 };
620            let content = format!("{} Wrong Level Heading\n\nContent.", "#".repeat(wrong_level));
621            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
622            let result = rule.check(&ctx).unwrap();
623            assert_eq!(result.len(), 1);
624            assert!(result[0].message.contains(&format!("level {level} heading")));
625        }
626    }
627}