Skip to main content

rumdl_lib/rules/md041_first_line_heading/
mod.rs

1mod md041_config;
2
3pub use md041_config::MD041Config;
4
5use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
6use crate::rules::front_matter_utils::FrontMatterUtils;
7use crate::utils::mkdocs_attr_list::is_mkdocs_anchor_line;
8use crate::utils::range_utils::calculate_line_range;
9use crate::utils::regex_cache::HTML_HEADING_PATTERN;
10use regex::Regex;
11
12/// Rule MD041: First line in file should be a top-level heading
13///
14/// See [docs/md041.md](../../docs/md041.md) for full documentation, configuration, and examples.
15
16#[derive(Clone)]
17pub struct MD041FirstLineHeading {
18    pub level: usize,
19    pub front_matter_title: bool,
20    pub front_matter_title_pattern: Option<Regex>,
21}
22
23impl Default for MD041FirstLineHeading {
24    fn default() -> Self {
25        Self {
26            level: 1,
27            front_matter_title: true,
28            front_matter_title_pattern: None,
29        }
30    }
31}
32
33impl MD041FirstLineHeading {
34    pub fn new(level: usize, front_matter_title: bool) -> Self {
35        Self {
36            level,
37            front_matter_title,
38            front_matter_title_pattern: None,
39        }
40    }
41
42    pub fn with_pattern(level: usize, front_matter_title: bool, pattern: Option<String>) -> Self {
43        let front_matter_title_pattern = pattern.and_then(|p| match Regex::new(&p) {
44            Ok(regex) => Some(regex),
45            Err(e) => {
46                log::warn!("Invalid front_matter_title_pattern regex: {e}");
47                None
48            }
49        });
50
51        Self {
52            level,
53            front_matter_title,
54            front_matter_title_pattern,
55        }
56    }
57
58    fn has_front_matter_title(&self, content: &str) -> bool {
59        if !self.front_matter_title {
60            return false;
61        }
62
63        // If we have a custom pattern, use it to search front matter content
64        if let Some(ref pattern) = self.front_matter_title_pattern {
65            let front_matter_lines = FrontMatterUtils::extract_front_matter(content);
66            for line in front_matter_lines {
67                if pattern.is_match(line) {
68                    return true;
69                }
70            }
71            return false;
72        }
73
74        // Default behavior: check for "title:" field
75        FrontMatterUtils::has_front_matter_field(content, "title:")
76    }
77
78    /// Check if a line is a non-content token that should be skipped
79    fn is_non_content_line(line: &str) -> bool {
80        let trimmed = line.trim();
81
82        // Skip reference definitions
83        if trimmed.starts_with('[') && trimmed.contains("]: ") {
84            return true;
85        }
86
87        // Skip abbreviation definitions
88        if trimmed.starts_with('*') && trimmed.contains("]: ") {
89            return true;
90        }
91
92        // Skip badge/shield images - common pattern at top of READMEs
93        // Matches: ![badge](url) or [![badge](url)](url)
94        if Self::is_badge_image_line(trimmed) {
95            return true;
96        }
97
98        false
99    }
100
101    /// Check if a line consists only of badge/shield images
102    /// Common patterns:
103    /// - `![badge](url)`
104    /// - `[![badge](url)](url)` (linked badge)
105    /// - Multiple badges on one line
106    fn is_badge_image_line(line: &str) -> bool {
107        if line.is_empty() {
108            return false;
109        }
110
111        // Must start with image syntax
112        if !line.starts_with('!') && !line.starts_with('[') {
113            return false;
114        }
115
116        // Check if line contains only image/link patterns and whitespace
117        let mut remaining = line;
118        while !remaining.is_empty() {
119            remaining = remaining.trim_start();
120            if remaining.is_empty() {
121                break;
122            }
123
124            // Linked image: [![alt](img-url)](link-url)
125            if remaining.starts_with("[![") {
126                if let Some(end) = Self::find_linked_image_end(remaining) {
127                    remaining = &remaining[end..];
128                    continue;
129                }
130                return false;
131            }
132
133            // Simple image: ![alt](url)
134            if remaining.starts_with("![") {
135                if let Some(end) = Self::find_image_end(remaining) {
136                    remaining = &remaining[end..];
137                    continue;
138                }
139                return false;
140            }
141
142            // Not an image pattern
143            return false;
144        }
145
146        true
147    }
148
149    /// Find the end of an image pattern ![alt](url)
150    fn find_image_end(s: &str) -> Option<usize> {
151        if !s.starts_with("![") {
152            return None;
153        }
154        // Find ]( after ![
155        let alt_end = s[2..].find("](")?;
156        let paren_start = 2 + alt_end + 2; // Position after ](
157        // Find closing )
158        let paren_end = s[paren_start..].find(')')?;
159        Some(paren_start + paren_end + 1)
160    }
161
162    /// Find the end of a linked image pattern [![alt](img-url)](link-url)
163    fn find_linked_image_end(s: &str) -> Option<usize> {
164        if !s.starts_with("[![") {
165            return None;
166        }
167        // Find the inner image first
168        let inner_end = Self::find_image_end(&s[1..])?;
169        let after_inner = 1 + inner_end;
170        // Should be followed by ](url)
171        if !s[after_inner..].starts_with("](") {
172            return None;
173        }
174        let link_start = after_inner + 2;
175        let link_end = s[link_start..].find(')')?;
176        Some(link_start + link_end + 1)
177    }
178
179    /// Check if a line is an HTML heading using the centralized HTML parser
180    fn is_html_heading(ctx: &crate::lint_context::LintContext, first_line_idx: usize, level: usize) -> bool {
181        // Check for single-line HTML heading using regex (fast path)
182        let first_line_content = ctx.lines[first_line_idx].content(ctx.content);
183        if let Ok(Some(captures)) = HTML_HEADING_PATTERN.captures(first_line_content.trim())
184            && let Some(h_level) = captures.get(1)
185            && h_level.as_str().parse::<usize>().unwrap_or(0) == level
186        {
187            return true;
188        }
189
190        // Use centralized HTML parser for multi-line headings
191        let html_tags = ctx.html_tags();
192        let target_tag = format!("h{level}");
193
194        // Find opening tag on first line
195        let opening_index = html_tags.iter().position(|tag| {
196            tag.line == first_line_idx + 1 // HtmlTag uses 1-indexed lines
197                && tag.tag_name == target_tag
198                && !tag.is_closing
199        });
200
201        let Some(open_idx) = opening_index else {
202            return false;
203        };
204
205        // Walk HTML tags to find the corresponding closing tag, allowing arbitrary nesting depth.
206        // This avoids brittle line-count heuristics and handles long headings with nested content.
207        let mut depth = 1usize;
208        for tag in html_tags.iter().skip(open_idx + 1) {
209            // Ignore tags that appear before the first heading line (possible when multiple tags share a line)
210            if tag.line <= first_line_idx + 1 {
211                continue;
212            }
213
214            if tag.tag_name == target_tag {
215                if tag.is_closing {
216                    depth -= 1;
217                    if depth == 0 {
218                        return true;
219                    }
220                } else if !tag.is_self_closing {
221                    depth += 1;
222                }
223            }
224        }
225
226        false
227    }
228}
229
230impl Rule for MD041FirstLineHeading {
231    fn name(&self) -> &'static str {
232        "MD041"
233    }
234
235    fn description(&self) -> &'static str {
236        "First line in file should be a top level heading"
237    }
238
239    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
240        let mut warnings = Vec::new();
241
242        // Check if we should skip this file
243        if self.should_skip(ctx) {
244            return Ok(warnings);
245        }
246
247        // Find the first non-blank line after front matter using cached info
248        let mut first_content_line_num = None;
249        let mut skip_lines = 0;
250
251        // Check for front matter
252        if ctx.lines.first().map(|l| l.content(ctx.content).trim()) == Some("---") {
253            // Skip front matter
254            for (idx, line_info) in ctx.lines.iter().enumerate().skip(1) {
255                if line_info.content(ctx.content).trim() == "---" {
256                    skip_lines = idx + 1;
257                    break;
258                }
259            }
260        }
261
262        // Check if we're in MkDocs flavor
263        let is_mkdocs = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
264
265        for (line_num, line_info) in ctx.lines.iter().enumerate().skip(skip_lines) {
266            let line_content = line_info.content(ctx.content);
267            let trimmed = line_content.trim();
268            // Skip ESM blocks in MDX files (import/export statements)
269            if line_info.in_esm_block {
270                continue;
271            }
272            // Skip HTML comments - they are non-visible and should not affect MD041
273            if line_info.in_html_comment {
274                continue;
275            }
276            // Skip MkDocs anchor lines (empty link with attr_list) when in MkDocs flavor
277            if is_mkdocs && is_mkdocs_anchor_line(line_content) {
278                continue;
279            }
280            if !trimmed.is_empty() && !Self::is_non_content_line(line_content) {
281                first_content_line_num = Some(line_num);
282                break;
283            }
284        }
285
286        if first_content_line_num.is_none() {
287            // No non-blank lines after front matter
288            return Ok(warnings);
289        }
290
291        let first_line_idx = first_content_line_num.unwrap();
292
293        // Check if the first non-blank line is a heading of the required level
294        let first_line_info = &ctx.lines[first_line_idx];
295        let is_correct_heading = if let Some(heading) = &first_line_info.heading {
296            heading.level as usize == self.level
297        } else {
298            // Check for HTML heading (both single-line and multi-line)
299            Self::is_html_heading(ctx, first_line_idx, self.level)
300        };
301
302        if !is_correct_heading {
303            // Calculate precise character range for the entire first line
304            let first_line = first_line_idx + 1; // Convert to 1-indexed
305            let first_line_content = first_line_info.content(ctx.content);
306            let (start_line, start_col, end_line, end_col) = calculate_line_range(first_line, first_line_content);
307
308            warnings.push(LintWarning {
309                rule_name: Some(self.name().to_string()),
310                line: start_line,
311                column: start_col,
312                end_line,
313                end_column: end_col,
314                message: format!("First line in file should be a level {} heading", self.level),
315                severity: Severity::Warning,
316                fix: None, // MD041 no longer provides auto-fix suggestions
317            });
318        }
319        Ok(warnings)
320    }
321
322    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
323        // MD041 should not auto-fix - adding content/titles is a decision that should be made by the document author
324        // This rule now only detects and warns about missing titles, but does not automatically add them
325        Ok(ctx.content.to_string())
326    }
327
328    /// Check if this rule should be skipped
329    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
330        // Skip files that are purely preprocessor directives (e.g., mdBook includes).
331        // These files are composition/routing metadata, not standalone content.
332        // Example: A file containing only "{{#include ../../README.md}}" is a
333        // pointer to content, not content itself, and shouldn't need a heading.
334        let only_directives = !ctx.content.is_empty()
335            && ctx.content.lines().filter(|l| !l.trim().is_empty()).all(|l| {
336                let t = l.trim();
337                // mdBook directives: {{#include}}, {{#playground}}, {{#rustdoc_include}}, etc.
338                (t.starts_with("{{#") && t.ends_with("}}"))
339                        // HTML comments often accompany directives
340                        || (t.starts_with("<!--") && t.ends_with("-->"))
341            });
342
343        ctx.content.is_empty()
344            || (self.front_matter_title && self.has_front_matter_title(ctx.content))
345            || only_directives
346    }
347
348    fn as_any(&self) -> &dyn std::any::Any {
349        self
350    }
351
352    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
353    where
354        Self: Sized,
355    {
356        // Load config using serde with kebab-case support
357        let md041_config = crate::rule_config_serde::load_rule_config::<MD041Config>(config);
358
359        let use_front_matter = !md041_config.front_matter_title.is_empty();
360
361        Box::new(MD041FirstLineHeading::with_pattern(
362            md041_config.level.as_usize(),
363            use_front_matter,
364            md041_config.front_matter_title_pattern,
365        ))
366    }
367
368    fn default_config_section(&self) -> Option<(String, toml::Value)> {
369        Some((
370            "MD041".to_string(),
371            toml::toml! {
372                level = 1
373                front-matter-title = "title"
374                front-matter-title-pattern = ""
375            }
376            .into(),
377        ))
378    }
379}
380
381#[cfg(test)]
382mod tests {
383    use super::*;
384    use crate::lint_context::LintContext;
385
386    #[test]
387    fn test_first_line_is_heading_correct_level() {
388        let rule = MD041FirstLineHeading::default();
389
390        // First line is a level 1 heading (should pass)
391        let content = "# My Document\n\nSome content here.";
392        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
393        let result = rule.check(&ctx).unwrap();
394        assert!(
395            result.is_empty(),
396            "Expected no warnings when first line is a level 1 heading"
397        );
398    }
399
400    #[test]
401    fn test_first_line_is_heading_wrong_level() {
402        let rule = MD041FirstLineHeading::default();
403
404        // First line is a level 2 heading (should fail with level 1 requirement)
405        let content = "## My Document\n\nSome content here.";
406        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
407        let result = rule.check(&ctx).unwrap();
408        assert_eq!(result.len(), 1);
409        assert_eq!(result[0].line, 1);
410        assert!(result[0].message.contains("level 1 heading"));
411    }
412
413    #[test]
414    fn test_first_line_not_heading() {
415        let rule = MD041FirstLineHeading::default();
416
417        // First line is plain text (should fail)
418        let content = "This is not a heading\n\n# This is a heading";
419        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
420        let result = rule.check(&ctx).unwrap();
421        assert_eq!(result.len(), 1);
422        assert_eq!(result[0].line, 1);
423        assert!(result[0].message.contains("level 1 heading"));
424    }
425
426    #[test]
427    fn test_empty_lines_before_heading() {
428        let rule = MD041FirstLineHeading::default();
429
430        // Empty lines before first heading (should pass - rule skips empty lines)
431        let content = "\n\n# My Document\n\nSome content.";
432        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
433        let result = rule.check(&ctx).unwrap();
434        assert!(
435            result.is_empty(),
436            "Expected no warnings when empty lines precede a valid heading"
437        );
438
439        // Empty lines before non-heading content (should fail)
440        let content = "\n\nNot a heading\n\nSome content.";
441        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
442        let result = rule.check(&ctx).unwrap();
443        assert_eq!(result.len(), 1);
444        assert_eq!(result[0].line, 3); // First non-empty line
445        assert!(result[0].message.contains("level 1 heading"));
446    }
447
448    #[test]
449    fn test_front_matter_with_title() {
450        let rule = MD041FirstLineHeading::new(1, true);
451
452        // Front matter with title field (should pass)
453        let content = "---\ntitle: My Document\nauthor: John Doe\n---\n\nSome content here.";
454        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
455        let result = rule.check(&ctx).unwrap();
456        assert!(
457            result.is_empty(),
458            "Expected no warnings when front matter has title field"
459        );
460    }
461
462    #[test]
463    fn test_front_matter_without_title() {
464        let rule = MD041FirstLineHeading::new(1, true);
465
466        // Front matter without title field (should fail)
467        let content = "---\nauthor: John Doe\ndate: 2024-01-01\n---\n\nSome content here.";
468        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
469        let result = rule.check(&ctx).unwrap();
470        assert_eq!(result.len(), 1);
471        assert_eq!(result[0].line, 6); // First content line after front matter
472    }
473
474    #[test]
475    fn test_front_matter_disabled() {
476        let rule = MD041FirstLineHeading::new(1, false);
477
478        // Front matter with title field but front_matter_title is false (should fail)
479        let content = "---\ntitle: My Document\n---\n\nSome content here.";
480        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
481        let result = rule.check(&ctx).unwrap();
482        assert_eq!(result.len(), 1);
483        assert_eq!(result[0].line, 5); // First content line after front matter
484    }
485
486    #[test]
487    fn test_html_comments_before_heading() {
488        let rule = MD041FirstLineHeading::default();
489
490        // HTML comment before heading (should pass - comments are skipped, issue #155)
491        let content = "<!-- This is a comment -->\n# My Document\n\nContent.";
492        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
493        let result = rule.check(&ctx).unwrap();
494        assert!(
495            result.is_empty(),
496            "HTML comments should be skipped when checking for first heading"
497        );
498    }
499
500    #[test]
501    fn test_multiline_html_comment_before_heading() {
502        let rule = MD041FirstLineHeading::default();
503
504        // Multi-line HTML comment before heading (should pass - issue #155)
505        let content = "<!--\nThis is a multi-line\nHTML comment\n-->\n# My Document\n\nContent.";
506        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
507        let result = rule.check(&ctx).unwrap();
508        assert!(
509            result.is_empty(),
510            "Multi-line HTML comments should be skipped when checking for first heading"
511        );
512    }
513
514    #[test]
515    fn test_html_comment_with_blank_lines_before_heading() {
516        let rule = MD041FirstLineHeading::default();
517
518        // HTML comment with blank lines before heading (should pass - issue #155)
519        let content = "<!-- This is a comment -->\n\n# My Document\n\nContent.";
520        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
521        let result = rule.check(&ctx).unwrap();
522        assert!(
523            result.is_empty(),
524            "HTML comments with blank lines should be skipped when checking for first heading"
525        );
526    }
527
528    #[test]
529    fn test_html_comment_before_html_heading() {
530        let rule = MD041FirstLineHeading::default();
531
532        // HTML comment before HTML heading (should pass - issue #155)
533        let content = "<!-- This is a comment -->\n<h1>My Document</h1>\n\nContent.";
534        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
535        let result = rule.check(&ctx).unwrap();
536        assert!(
537            result.is_empty(),
538            "HTML comments should be skipped before HTML headings"
539        );
540    }
541
542    #[test]
543    fn test_document_with_only_html_comments() {
544        let rule = MD041FirstLineHeading::default();
545
546        // Document with only HTML comments (should pass - no warnings for comment-only files)
547        let content = "<!-- This is a comment -->\n<!-- Another comment -->";
548        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
549        let result = rule.check(&ctx).unwrap();
550        assert!(
551            result.is_empty(),
552            "Documents with only HTML comments should not trigger MD041"
553        );
554    }
555
556    #[test]
557    fn test_html_comment_followed_by_non_heading() {
558        let rule = MD041FirstLineHeading::default();
559
560        // HTML comment followed by non-heading content (should still fail - issue #155)
561        let content = "<!-- This is a comment -->\nThis is not a heading\n\nSome content.";
562        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
563        let result = rule.check(&ctx).unwrap();
564        assert_eq!(
565            result.len(),
566            1,
567            "HTML comment followed by non-heading should still trigger MD041"
568        );
569        assert_eq!(
570            result[0].line, 2,
571            "Warning should be on the first non-comment, non-heading line"
572        );
573    }
574
575    #[test]
576    fn test_multiple_html_comments_before_heading() {
577        let rule = MD041FirstLineHeading::default();
578
579        // Multiple HTML comments before heading (should pass - issue #155)
580        let content = "<!-- First comment -->\n<!-- Second comment -->\n# My Document\n\nContent.";
581        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
582        let result = rule.check(&ctx).unwrap();
583        assert!(
584            result.is_empty(),
585            "Multiple HTML comments should all be skipped before heading"
586        );
587    }
588
589    #[test]
590    fn test_html_comment_with_wrong_level_heading() {
591        let rule = MD041FirstLineHeading::default();
592
593        // HTML comment followed by wrong-level heading (should fail - issue #155)
594        let content = "<!-- This is a comment -->\n## Wrong Level Heading\n\nContent.";
595        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
596        let result = rule.check(&ctx).unwrap();
597        assert_eq!(
598            result.len(),
599            1,
600            "HTML comment followed by wrong-level heading should still trigger MD041"
601        );
602        assert!(
603            result[0].message.contains("level 1 heading"),
604            "Should require level 1 heading"
605        );
606    }
607
608    #[test]
609    fn test_html_comment_mixed_with_reference_definitions() {
610        let rule = MD041FirstLineHeading::default();
611
612        // HTML comment mixed with reference definitions before heading (should pass - issue #155)
613        let content = "<!-- Comment -->\n[ref]: https://example.com\n# My Document\n\nContent.";
614        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
615        let result = rule.check(&ctx).unwrap();
616        assert!(
617            result.is_empty(),
618            "HTML comments and reference definitions should both be skipped before heading"
619        );
620    }
621
622    #[test]
623    fn test_html_comment_after_front_matter() {
624        let rule = MD041FirstLineHeading::default();
625
626        // HTML comment after front matter, before heading (should pass - issue #155)
627        let content = "---\nauthor: John\n---\n<!-- Comment -->\n# My Document\n\nContent.";
628        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
629        let result = rule.check(&ctx).unwrap();
630        assert!(
631            result.is_empty(),
632            "HTML comments after front matter should be skipped before heading"
633        );
634    }
635
636    #[test]
637    fn test_html_comment_not_at_start_should_not_affect_rule() {
638        let rule = MD041FirstLineHeading::default();
639
640        // HTML comment in middle of document should not affect MD041 check
641        let content = "# Valid Heading\n\nSome content.\n\n<!-- Comment in middle -->\n\nMore content.";
642        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
643        let result = rule.check(&ctx).unwrap();
644        assert!(
645            result.is_empty(),
646            "HTML comments in middle of document should not affect MD041 (only first content matters)"
647        );
648    }
649
650    #[test]
651    fn test_multiline_html_comment_followed_by_non_heading() {
652        let rule = MD041FirstLineHeading::default();
653
654        // Multi-line HTML comment followed by non-heading (should still fail - issue #155)
655        let content = "<!--\nMulti-line\ncomment\n-->\nThis is not a heading\n\nContent.";
656        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
657        let result = rule.check(&ctx).unwrap();
658        assert_eq!(
659            result.len(),
660            1,
661            "Multi-line HTML comment followed by non-heading should still trigger MD041"
662        );
663        assert_eq!(
664            result[0].line, 5,
665            "Warning should be on the first non-comment, non-heading line"
666        );
667    }
668
669    #[test]
670    fn test_different_heading_levels() {
671        // Test with level 2 requirement
672        let rule = MD041FirstLineHeading::new(2, false);
673
674        let content = "## Second Level Heading\n\nContent.";
675        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
676        let result = rule.check(&ctx).unwrap();
677        assert!(result.is_empty(), "Expected no warnings for correct level 2 heading");
678
679        // Wrong level
680        let content = "# First Level Heading\n\nContent.";
681        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
682        let result = rule.check(&ctx).unwrap();
683        assert_eq!(result.len(), 1);
684        assert!(result[0].message.contains("level 2 heading"));
685    }
686
687    #[test]
688    fn test_setext_headings() {
689        let rule = MD041FirstLineHeading::default();
690
691        // Setext style level 1 heading (should pass)
692        let content = "My Document\n===========\n\nContent.";
693        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
694        let result = rule.check(&ctx).unwrap();
695        assert!(result.is_empty(), "Expected no warnings for setext level 1 heading");
696
697        // Setext style level 2 heading (should fail with level 1 requirement)
698        let content = "My Document\n-----------\n\nContent.";
699        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
700        let result = rule.check(&ctx).unwrap();
701        assert_eq!(result.len(), 1);
702        assert!(result[0].message.contains("level 1 heading"));
703    }
704
705    #[test]
706    fn test_empty_document() {
707        let rule = MD041FirstLineHeading::default();
708
709        // Empty document (should pass - no warnings)
710        let content = "";
711        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
712        let result = rule.check(&ctx).unwrap();
713        assert!(result.is_empty(), "Expected no warnings for empty document");
714    }
715
716    #[test]
717    fn test_whitespace_only_document() {
718        let rule = MD041FirstLineHeading::default();
719
720        // Document with only whitespace (should pass - no warnings)
721        let content = "   \n\n   \t\n";
722        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
723        let result = rule.check(&ctx).unwrap();
724        assert!(result.is_empty(), "Expected no warnings for whitespace-only document");
725    }
726
727    #[test]
728    fn test_front_matter_then_whitespace() {
729        let rule = MD041FirstLineHeading::default();
730
731        // Front matter followed by only whitespace (should pass - no warnings)
732        let content = "---\ntitle: Test\n---\n\n   \n\n";
733        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
734        let result = rule.check(&ctx).unwrap();
735        assert!(
736            result.is_empty(),
737            "Expected no warnings when no content after front matter"
738        );
739    }
740
741    #[test]
742    fn test_multiple_front_matter_types() {
743        let rule = MD041FirstLineHeading::new(1, true);
744
745        // TOML front matter with title (should fail - rule only checks for "title:" pattern)
746        let content = "+++\ntitle = \"My Document\"\n+++\n\nContent.";
747        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
748        let result = rule.check(&ctx).unwrap();
749        assert_eq!(result.len(), 1);
750        assert!(result[0].message.contains("level 1 heading"));
751
752        // JSON front matter with title (should fail - doesn't have "title:" pattern, has "\"title\":")
753        let content = "{\n\"title\": \"My Document\"\n}\n\nContent.";
754        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
755        let result = rule.check(&ctx).unwrap();
756        assert_eq!(result.len(), 1);
757        assert!(result[0].message.contains("level 1 heading"));
758
759        // YAML front matter with title field (standard case)
760        let content = "---\ntitle: My Document\n---\n\nContent.";
761        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
762        let result = rule.check(&ctx).unwrap();
763        assert!(
764            result.is_empty(),
765            "Expected no warnings for YAML front matter with title"
766        );
767
768        // Test mixed format edge case - YAML-style in TOML
769        let content = "+++\ntitle: My Document\n+++\n\nContent.";
770        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
771        let result = rule.check(&ctx).unwrap();
772        assert!(result.is_empty(), "Expected no warnings when title: pattern is found");
773    }
774
775    #[test]
776    fn test_malformed_front_matter() {
777        let rule = MD041FirstLineHeading::new(1, true);
778
779        // Malformed front matter with title
780        let content = "- --\ntitle: My Document\n- --\n\nContent.";
781        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
782        let result = rule.check(&ctx).unwrap();
783        assert!(
784            result.is_empty(),
785            "Expected no warnings for malformed front matter with title"
786        );
787    }
788
789    #[test]
790    fn test_front_matter_with_heading() {
791        let rule = MD041FirstLineHeading::default();
792
793        // Front matter without title field followed by correct heading
794        let content = "---\nauthor: John Doe\n---\n\n# My Document\n\nContent.";
795        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
796        let result = rule.check(&ctx).unwrap();
797        assert!(
798            result.is_empty(),
799            "Expected no warnings when first line after front matter is correct heading"
800        );
801    }
802
803    #[test]
804    fn test_no_fix_suggestion() {
805        let rule = MD041FirstLineHeading::default();
806
807        // Check that NO fix suggestion is provided (MD041 is now detection-only)
808        let content = "Not a heading\n\nContent.";
809        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
810        let result = rule.check(&ctx).unwrap();
811        assert_eq!(result.len(), 1);
812        assert!(result[0].fix.is_none(), "MD041 should not provide fix suggestions");
813    }
814
815    #[test]
816    fn test_complex_document_structure() {
817        let rule = MD041FirstLineHeading::default();
818
819        // Complex document with various elements - HTML comment should be skipped (issue #155)
820        let content =
821            "---\nauthor: John\n---\n\n<!-- Comment -->\n\n\n# Valid Heading\n\n## Subheading\n\nContent here.";
822        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
823        let result = rule.check(&ctx).unwrap();
824        assert!(
825            result.is_empty(),
826            "HTML comments should be skipped, so first heading after comment should be valid"
827        );
828    }
829
830    #[test]
831    fn test_heading_with_special_characters() {
832        let rule = MD041FirstLineHeading::default();
833
834        // Heading with special characters and formatting
835        let content = "# Welcome to **My** _Document_ with `code`\n\nContent.";
836        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
837        let result = rule.check(&ctx).unwrap();
838        assert!(
839            result.is_empty(),
840            "Expected no warnings for heading with inline formatting"
841        );
842    }
843
844    #[test]
845    fn test_level_configuration() {
846        // Test various level configurations
847        for level in 1..=6 {
848            let rule = MD041FirstLineHeading::new(level, false);
849
850            // Correct level
851            let content = format!("{} Heading at Level {}\n\nContent.", "#".repeat(level), level);
852            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
853            let result = rule.check(&ctx).unwrap();
854            assert!(
855                result.is_empty(),
856                "Expected no warnings for correct level {level} heading"
857            );
858
859            // Wrong level
860            let wrong_level = if level == 1 { 2 } else { 1 };
861            let content = format!("{} Wrong Level Heading\n\nContent.", "#".repeat(wrong_level));
862            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
863            let result = rule.check(&ctx).unwrap();
864            assert_eq!(result.len(), 1);
865            assert!(result[0].message.contains(&format!("level {level} heading")));
866        }
867    }
868
869    #[test]
870    fn test_issue_152_multiline_html_heading() {
871        let rule = MD041FirstLineHeading::default();
872
873        // Multi-line HTML h1 heading (should pass - issue #152)
874        let content = "<h1>\nSome text\n</h1>";
875        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
876        let result = rule.check(&ctx).unwrap();
877        assert!(
878            result.is_empty(),
879            "Issue #152: Multi-line HTML h1 should be recognized as valid heading"
880        );
881    }
882
883    #[test]
884    fn test_multiline_html_heading_with_attributes() {
885        let rule = MD041FirstLineHeading::default();
886
887        // Multi-line HTML heading with attributes
888        let content = "<h1 class=\"title\" id=\"main\">\nHeading Text\n</h1>\n\nContent.";
889        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
890        let result = rule.check(&ctx).unwrap();
891        assert!(
892            result.is_empty(),
893            "Multi-line HTML heading with attributes should be recognized"
894        );
895    }
896
897    #[test]
898    fn test_multiline_html_heading_wrong_level() {
899        let rule = MD041FirstLineHeading::default();
900
901        // Multi-line HTML h2 heading (should fail with level 1 requirement)
902        let content = "<h2>\nSome text\n</h2>";
903        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
904        let result = rule.check(&ctx).unwrap();
905        assert_eq!(result.len(), 1);
906        assert!(result[0].message.contains("level 1 heading"));
907    }
908
909    #[test]
910    fn test_multiline_html_heading_with_content_after() {
911        let rule = MD041FirstLineHeading::default();
912
913        // Multi-line HTML heading followed by content
914        let content = "<h1>\nMy Document\n</h1>\n\nThis is the document content.";
915        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
916        let result = rule.check(&ctx).unwrap();
917        assert!(
918            result.is_empty(),
919            "Multi-line HTML heading followed by content should be valid"
920        );
921    }
922
923    #[test]
924    fn test_multiline_html_heading_incomplete() {
925        let rule = MD041FirstLineHeading::default();
926
927        // Incomplete multi-line HTML heading (missing closing tag)
928        let content = "<h1>\nSome text\n\nMore content without closing tag";
929        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
930        let result = rule.check(&ctx).unwrap();
931        assert_eq!(result.len(), 1);
932        assert!(result[0].message.contains("level 1 heading"));
933    }
934
935    #[test]
936    fn test_singleline_html_heading_still_works() {
937        let rule = MD041FirstLineHeading::default();
938
939        // Single-line HTML heading should still work
940        let content = "<h1>My Document</h1>\n\nContent.";
941        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
942        let result = rule.check(&ctx).unwrap();
943        assert!(
944            result.is_empty(),
945            "Single-line HTML headings should still be recognized"
946        );
947    }
948
949    #[test]
950    fn test_multiline_html_heading_with_nested_tags() {
951        let rule = MD041FirstLineHeading::default();
952
953        // Multi-line HTML heading with nested tags
954        let content = "<h1>\n<strong>Bold</strong> Heading\n</h1>";
955        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
956        let result = rule.check(&ctx).unwrap();
957        assert!(
958            result.is_empty(),
959            "Multi-line HTML heading with nested tags should be recognized"
960        );
961    }
962
963    #[test]
964    fn test_multiline_html_heading_various_levels() {
965        // Test multi-line headings at different levels
966        for level in 1..=6 {
967            let rule = MD041FirstLineHeading::new(level, false);
968
969            // Correct level multi-line
970            let content = format!("<h{level}>\nHeading Text\n</h{level}>\n\nContent.");
971            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
972            let result = rule.check(&ctx).unwrap();
973            assert!(
974                result.is_empty(),
975                "Multi-line HTML heading at level {level} should be recognized"
976            );
977
978            // Wrong level multi-line
979            let wrong_level = if level == 1 { 2 } else { 1 };
980            let content = format!("<h{wrong_level}>\nHeading Text\n</h{wrong_level}>\n\nContent.");
981            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
982            let result = rule.check(&ctx).unwrap();
983            assert_eq!(result.len(), 1);
984            assert!(result[0].message.contains(&format!("level {level} heading")));
985        }
986    }
987
988    #[test]
989    fn test_issue_152_nested_heading_spans_many_lines() {
990        let rule = MD041FirstLineHeading::default();
991
992        let content = "<h1>\n  <div>\n    <img\n      href=\"https://example.com/image.png\"\n      alt=\"Example Image\"\n    />\n    <a\n      href=\"https://example.com\"\n    >Example Project</a>\n    <span>Documentation</span>\n  </div>\n</h1>";
993        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
994        let result = rule.check(&ctx).unwrap();
995        assert!(result.is_empty(), "Nested multi-line HTML heading should be recognized");
996    }
997
998    #[test]
999    fn test_issue_152_picture_tag_heading() {
1000        let rule = MD041FirstLineHeading::default();
1001
1002        let content = "<h1>\n  <picture>\n    <source\n      srcset=\"https://example.com/light.png\"\n      media=\"(prefers-color-scheme: light)\"\n    />\n    <source\n      srcset=\"https://example.com/dark.png\"\n      media=\"(prefers-color-scheme: dark)\"\n    />\n    <img src=\"https://example.com/default.png\" />\n  </picture>\n</h1>";
1003        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1004        let result = rule.check(&ctx).unwrap();
1005        assert!(
1006            result.is_empty(),
1007            "Picture tag inside multi-line HTML heading should be recognized"
1008        );
1009    }
1010
1011    #[test]
1012    fn test_badge_images_before_heading() {
1013        let rule = MD041FirstLineHeading::default();
1014
1015        // Single badge before heading
1016        let content = "![badge](https://img.shields.io/badge/test-passing-green)\n\n# My Project";
1017        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1018        let result = rule.check(&ctx).unwrap();
1019        assert!(result.is_empty(), "Badge image should be skipped");
1020
1021        // Multiple badges on one line
1022        let content = "![badge1](url1) ![badge2](url2)\n\n# My Project";
1023        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1024        let result = rule.check(&ctx).unwrap();
1025        assert!(result.is_empty(), "Multiple badges should be skipped");
1026
1027        // Linked badge (clickable)
1028        let content = "[![badge](https://img.shields.io/badge/test-pass-green)](https://example.com)\n\n# My Project";
1029        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1030        let result = rule.check(&ctx).unwrap();
1031        assert!(result.is_empty(), "Linked badge should be skipped");
1032    }
1033
1034    #[test]
1035    fn test_multiple_badge_lines_before_heading() {
1036        let rule = MD041FirstLineHeading::default();
1037
1038        // Multiple lines of badges
1039        let content = "[![Crates.io](https://img.shields.io/crates/v/example)](https://crates.io)\n[![docs.rs](https://img.shields.io/docsrs/example)](https://docs.rs)\n\n# My Project";
1040        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1041        let result = rule.check(&ctx).unwrap();
1042        assert!(result.is_empty(), "Multiple badge lines should be skipped");
1043    }
1044
1045    #[test]
1046    fn test_badges_without_heading_still_warns() {
1047        let rule = MD041FirstLineHeading::default();
1048
1049        // Badges followed by paragraph (not heading)
1050        let content = "![badge](url)\n\nThis is not a heading.";
1051        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1052        let result = rule.check(&ctx).unwrap();
1053        assert_eq!(result.len(), 1, "Should warn when badges followed by non-heading");
1054    }
1055
1056    #[test]
1057    fn test_mixed_content_not_badge_line() {
1058        let rule = MD041FirstLineHeading::default();
1059
1060        // Image with text is not a badge line
1061        let content = "![badge](url) Some text here\n\n# Heading";
1062        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1063        let result = rule.check(&ctx).unwrap();
1064        assert_eq!(result.len(), 1, "Mixed content line should not be skipped");
1065    }
1066
1067    #[test]
1068    fn test_is_badge_image_line_unit() {
1069        // Unit tests for is_badge_image_line
1070        assert!(MD041FirstLineHeading::is_badge_image_line("![badge](url)"));
1071        assert!(MD041FirstLineHeading::is_badge_image_line("[![badge](img)](link)"));
1072        assert!(MD041FirstLineHeading::is_badge_image_line("![a](b) ![c](d)"));
1073        assert!(MD041FirstLineHeading::is_badge_image_line("[![a](b)](c) [![d](e)](f)"));
1074
1075        // Not badge lines
1076        assert!(!MD041FirstLineHeading::is_badge_image_line(""));
1077        assert!(!MD041FirstLineHeading::is_badge_image_line("Some text"));
1078        assert!(!MD041FirstLineHeading::is_badge_image_line("![badge](url) text"));
1079        assert!(!MD041FirstLineHeading::is_badge_image_line("# Heading"));
1080    }
1081
1082    // Integration tests for MkDocs anchor line detection (issue #365)
1083    // Unit tests for is_mkdocs_anchor_line are in utils/mkdocs_attr_list.rs
1084
1085    #[test]
1086    fn test_mkdocs_anchor_before_heading_in_mkdocs_flavor() {
1087        let rule = MD041FirstLineHeading::default();
1088
1089        // MkDocs anchor line before heading in MkDocs flavor (should pass)
1090        let content = "[](){ #example }\n# Title";
1091        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1092        let result = rule.check(&ctx).unwrap();
1093        assert!(
1094            result.is_empty(),
1095            "MkDocs anchor line should be skipped in MkDocs flavor"
1096        );
1097    }
1098
1099    #[test]
1100    fn test_mkdocs_anchor_before_heading_in_standard_flavor() {
1101        let rule = MD041FirstLineHeading::default();
1102
1103        // MkDocs anchor line before heading in Standard flavor (should fail)
1104        let content = "[](){ #example }\n# Title";
1105        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1106        let result = rule.check(&ctx).unwrap();
1107        assert_eq!(
1108            result.len(),
1109            1,
1110            "MkDocs anchor line should NOT be skipped in Standard flavor"
1111        );
1112    }
1113
1114    #[test]
1115    fn test_multiple_mkdocs_anchors_before_heading() {
1116        let rule = MD041FirstLineHeading::default();
1117
1118        // Multiple MkDocs anchor lines before heading in MkDocs flavor
1119        let content = "[](){ #first }\n[](){ #second }\n# Title";
1120        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1121        let result = rule.check(&ctx).unwrap();
1122        assert!(
1123            result.is_empty(),
1124            "Multiple MkDocs anchor lines should all be skipped in MkDocs flavor"
1125        );
1126    }
1127
1128    #[test]
1129    fn test_mkdocs_anchor_with_front_matter() {
1130        let rule = MD041FirstLineHeading::default();
1131
1132        // MkDocs anchor after front matter
1133        let content = "---\nauthor: John\n---\n[](){ #anchor }\n# Title";
1134        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1135        let result = rule.check(&ctx).unwrap();
1136        assert!(
1137            result.is_empty(),
1138            "MkDocs anchor line after front matter should be skipped in MkDocs flavor"
1139        );
1140    }
1141
1142    #[test]
1143    fn test_mkdocs_anchor_kramdown_style() {
1144        let rule = MD041FirstLineHeading::default();
1145
1146        // Kramdown-style with colon
1147        let content = "[](){: #anchor }\n# Title";
1148        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1149        let result = rule.check(&ctx).unwrap();
1150        assert!(
1151            result.is_empty(),
1152            "Kramdown-style MkDocs anchor should be skipped in MkDocs flavor"
1153        );
1154    }
1155
1156    #[test]
1157    fn test_mkdocs_anchor_without_heading_still_warns() {
1158        let rule = MD041FirstLineHeading::default();
1159
1160        // MkDocs anchor followed by non-heading content
1161        let content = "[](){ #anchor }\nThis is not a heading.";
1162        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1163        let result = rule.check(&ctx).unwrap();
1164        assert_eq!(
1165            result.len(),
1166            1,
1167            "MkDocs anchor followed by non-heading should still trigger MD041"
1168        );
1169    }
1170
1171    #[test]
1172    fn test_mkdocs_anchor_with_html_comment() {
1173        let rule = MD041FirstLineHeading::default();
1174
1175        // MkDocs anchor combined with HTML comment before heading
1176        let content = "<!-- Comment -->\n[](){ #anchor }\n# Title";
1177        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1178        let result = rule.check(&ctx).unwrap();
1179        assert!(
1180            result.is_empty(),
1181            "MkDocs anchor with HTML comment should both be skipped in MkDocs flavor"
1182        );
1183    }
1184}