mdbook_lint_core/rules/standard/
md051.rs

1//! MD051 - Link fragments should be valid
2//!
3//! This rule is triggered when a link fragment does not match any of the fragments
4//! that are automatically generated for headings in a document.
5//!
6//! ## Correct
7//!
8//! ```markdown
9//! # Heading Name
10//!
11//! \[Link\](#heading-name)
12//! ```
13//!
14//! ## Incorrect
15//!
16//! ```markdown
17//! # Heading Name
18//!
19//! \[Link\](#invalid-fragment)
20//! ```
21
22use crate::error::Result;
23use crate::{
24    Document, Violation,
25    rule::{Rule, RuleCategory, RuleMetadata},
26    violation::Severity,
27};
28use comrak::nodes::{AstNode, NodeValue};
29
30use std::collections::{HashMap, HashSet};
31
32/// MD051 - Link fragments should be valid
33pub struct MD051 {
34    ignore_case: bool,
35    ignored_pattern: Option<String>,
36}
37
38impl Default for MD051 {
39    fn default() -> Self {
40        Self::new()
41    }
42}
43
44impl MD051 {
45    /// Create a new MD051 rule instance
46    pub fn new() -> Self {
47        Self {
48            ignore_case: false,
49            ignored_pattern: None,
50        }
51    }
52
53    /// Set whether to ignore case when comparing fragments
54    #[allow(dead_code)]
55    pub fn ignore_case(mut self, ignore_case: bool) -> Self {
56        self.ignore_case = ignore_case;
57        self
58    }
59
60    #[allow(dead_code)]
61    pub fn ignored_pattern(mut self, pattern: Option<String>) -> Self {
62        self.ignored_pattern = pattern;
63        self
64    }
65
66    /// Get position information from a node
67    fn get_position<'a>(&self, node: &'a AstNode<'a>) -> (usize, usize) {
68        let data = node.data.borrow();
69        let pos = data.sourcepos;
70        (pos.start.line, pos.start.column)
71    }
72
73    /// Generate GitHub-style heading fragment from text
74    fn generate_heading_fragment(&self, text: &str) -> String {
75        // GitHub heading algorithm:
76        // 1. Convert to lowercase
77        // 2. Remove punctuation (keep alphanumeric, spaces, hyphens)
78        // 3. Convert spaces to dashes
79        // 4. Remove leading/trailing dashes
80        let mut fragment = text.to_lowercase();
81
82        // Remove punctuation, keep alphanumeric, spaces, hyphens, underscores
83        fragment = fragment
84            .chars()
85            .filter(|c| c.is_alphanumeric() || c.is_whitespace() || *c == '-' || *c == '_')
86            .collect();
87
88        // Convert spaces to dashes
89        fragment = fragment.replace(' ', "-");
90
91        // Remove multiple consecutive dashes
92        fragment = self.consolidate_dashes(&fragment);
93
94        // Remove leading/trailing dashes
95        fragment = fragment.trim_matches('-').to_string();
96
97        fragment
98    }
99
100    /// Extract text content from a heading node
101    fn extract_heading_text<'a>(node: &'a AstNode<'a>) -> String {
102        let mut text = String::new();
103        for child in node.children() {
104            match &child.data.borrow().value {
105                NodeValue::Text(t) => text.push_str(t),
106                NodeValue::Code(code) => text.push_str(&code.literal),
107                NodeValue::Emph | NodeValue::Strong => {
108                    text.push_str(&Self::extract_heading_text(child));
109                }
110                _ => {}
111            }
112        }
113        text
114    }
115
116    /// Collect all valid fragments from the document
117    fn collect_valid_fragments<'a>(&self, ast: &'a AstNode<'a>) -> HashSet<String> {
118        let mut fragments = HashSet::new();
119        let mut heading_counts: HashMap<String, usize> = HashMap::new();
120
121        // Add special fragments
122        fragments.insert("top".to_string());
123
124        self.traverse_for_fragments(ast, &mut fragments, &mut heading_counts);
125
126        fragments
127    }
128
129    /// Traverse AST to find fragments
130    fn traverse_for_fragments<'a>(
131        &self,
132        node: &'a AstNode<'a>,
133        fragments: &mut HashSet<String>,
134        heading_counts: &mut HashMap<String, usize>,
135    ) {
136        match &node.data.borrow().value {
137            NodeValue::Heading(_) => {
138                let heading_text = Self::extract_heading_text(node);
139                let mut fragment = self.generate_heading_fragment(&heading_text);
140
141                // Handle duplicate fragments by appending numbers
142                if let Some(count) = heading_counts.get(&fragment) {
143                    let new_count = count + 1;
144                    heading_counts.insert(fragment.clone(), new_count);
145                    fragment = format!("{fragment}-{new_count}");
146                } else {
147                    heading_counts.insert(fragment.clone(), 1);
148                }
149
150                fragments.insert(fragment);
151
152                // Check for custom anchor syntax {#custom-name}
153                if let Some(anchor_id) = self.extract_custom_anchor(&heading_text) {
154                    fragments.insert(anchor_id);
155                }
156            }
157            NodeValue::HtmlBlock(html) => {
158                // Extract id attributes from HTML elements
159                let ids = self.extract_html_ids(&html.literal);
160                for id in ids {
161                    fragments.insert(id);
162                }
163
164                // Extract name attributes from <a> tags
165                let names = self.extract_html_names(&html.literal);
166                for name in names {
167                    fragments.insert(name);
168                }
169            }
170            NodeValue::HtmlInline(html) => {
171                // Extract id attributes from HTML elements
172                let ids = self.extract_html_ids(html);
173                for id in ids {
174                    fragments.insert(id);
175                }
176
177                // Extract name attributes from <a> tags
178                let names = self.extract_html_names(html);
179                for name in names {
180                    fragments.insert(name);
181                }
182            }
183            _ => {}
184        }
185
186        for child in node.children() {
187            self.traverse_for_fragments(child, fragments, heading_counts);
188        }
189    }
190
191    /// Replace multiple consecutive dashes with single dash
192    fn consolidate_dashes(&self, text: &str) -> String {
193        let mut result = String::new();
194        let mut prev_was_dash = false;
195
196        for ch in text.chars() {
197            if ch == '-' {
198                if !prev_was_dash {
199                    result.push(ch);
200                }
201                prev_was_dash = true;
202            } else {
203                result.push(ch);
204                prev_was_dash = false;
205            }
206        }
207
208        result
209    }
210
211    /// Extract custom anchor ID from text like {#custom-name}
212    fn extract_custom_anchor(&self, text: &str) -> Option<String> {
213        if let Some(start) = text.find("{#") {
214            let remaining = &text[start + 2..];
215            if let Some(end) = remaining.find('}') {
216                let anchor_id = &remaining[..end];
217                // Validate anchor ID (alphanumeric, dash, underscore only)
218                if anchor_id
219                    .chars()
220                    .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
221                    && !anchor_id.is_empty()
222                {
223                    return Some(anchor_id.to_string());
224                }
225            }
226        }
227        None
228    }
229
230    /// Extract HTML id attributes manually
231    fn extract_html_ids(&self, html: &str) -> Vec<String> {
232        let mut ids = Vec::new();
233        let html_lower = html.to_lowercase();
234        let mut pos = 0;
235
236        while let Some(id_pos) = html_lower[pos..].find("id") {
237            let absolute_pos = pos + id_pos;
238
239            // Skip whitespace
240            let remaining = &html[absolute_pos + 2..];
241            let mut chars = remaining.chars();
242            let mut offset = 0;
243
244            // Skip whitespace
245            for ch in chars.by_ref() {
246                if ch.is_whitespace() {
247                    offset += ch.len_utf8();
248                } else if ch == '=' {
249                    offset += ch.len_utf8();
250                    break;
251                } else {
252                    break;
253                }
254            }
255
256            // Skip more whitespace after =
257            for ch in chars {
258                if ch.is_whitespace() {
259                    offset += ch.len_utf8();
260                } else if ch == '"' || ch == '\'' {
261                    let quote = ch;
262                    offset += ch.len_utf8();
263
264                    // Extract the value between quotes
265                    let value_start = absolute_pos + 2 + offset;
266                    let value_remaining = &html[value_start..];
267
268                    if let Some(end_quote) = value_remaining.find(quote) {
269                        let id_value = &value_remaining[..end_quote];
270                        if !id_value.is_empty() {
271                            ids.push(id_value.to_string());
272                        }
273                        pos = value_start + end_quote + 1;
274                    }
275                    break;
276                } else {
277                    break;
278                }
279            }
280        }
281
282        ids
283    }
284
285    /// Extract HTML name attributes from <a> tags manually
286    fn extract_html_names(&self, html: &str) -> Vec<String> {
287        let mut names = Vec::new();
288        let html_lower = html.to_lowercase();
289        let mut pos = 0;
290
291        // Look for <a tags
292        while let Some(a_pos) = html_lower[pos..].find("<a") {
293            let absolute_pos = pos + a_pos;
294
295            // Find the end of the tag
296            if let Some(tag_end) = html[absolute_pos..].find('>') {
297                let tag_content = &html[absolute_pos..absolute_pos + tag_end];
298                let tag_lower = tag_content.to_lowercase();
299
300                // Look for name attribute within this tag
301                if let Some(name_pos) = tag_lower.find("name") {
302                    let name_start = absolute_pos + name_pos + 4;
303                    let remaining = &html[name_start..absolute_pos + tag_end];
304                    let mut chars = remaining.chars();
305                    let mut offset = 0;
306
307                    // Skip whitespace
308                    for ch in chars.by_ref() {
309                        if ch.is_whitespace() {
310                            offset += ch.len_utf8();
311                        } else if ch == '=' {
312                            offset += ch.len_utf8();
313                            break;
314                        } else {
315                            break;
316                        }
317                    }
318
319                    // Skip more whitespace after =
320                    for ch in chars {
321                        if ch.is_whitespace() {
322                            offset += ch.len_utf8();
323                        } else if ch == '"' || ch == '\'' {
324                            let quote = ch;
325                            offset += ch.len_utf8();
326
327                            // Extract the value between quotes
328                            let value_start = name_start + offset;
329                            let value_remaining = &html[value_start..absolute_pos + tag_end];
330
331                            if let Some(end_quote) = value_remaining.find(quote) {
332                                let name_value = &value_remaining[..end_quote];
333                                if !name_value.is_empty() {
334                                    names.push(name_value.to_string());
335                                }
336                            }
337                            break;
338                        } else {
339                            break;
340                        }
341                    }
342                }
343
344                pos = absolute_pos + tag_end + 1;
345            } else {
346                break;
347            }
348        }
349
350        names
351    }
352
353    /// Check if fragment is a GitHub line reference (L123, L123C45, L123-L456, etc.)
354    fn is_github_line_reference(&self, fragment: &str) -> bool {
355        if !fragment.starts_with('L') {
356            return false;
357        }
358
359        let remaining = &fragment[1..];
360        let mut chars = remaining.chars().peekable();
361
362        // Must start with digits
363        if !self.consume_digits(&mut chars) {
364            return false;
365        }
366
367        // Optional C followed by digits
368        if chars.peek() == Some(&'C') {
369            chars.next();
370            if !self.consume_digits(&mut chars) {
371                return false;
372            }
373        }
374
375        // Optional range: -L followed by digits and optional C digits
376        if chars.peek() == Some(&'-') {
377            chars.next();
378            if chars.next() != Some('L') {
379                return false;
380            }
381            if !self.consume_digits(&mut chars) {
382                return false;
383            }
384            // Optional C followed by digits for end of range
385            if chars.peek() == Some(&'C') {
386                chars.next();
387                if !self.consume_digits(&mut chars) {
388                    return false;
389                }
390            }
391        }
392
393        // Must be at end of string
394        chars.peek().is_none()
395    }
396
397    /// Consume consecutive digits from char iterator, return true if any were consumed
398    fn consume_digits(&self, chars: &mut std::iter::Peekable<std::str::Chars>) -> bool {
399        let mut consumed_any = false;
400        while let Some(&ch) = chars.peek() {
401            if ch.is_ascii_digit() {
402                chars.next();
403                consumed_any = true;
404            } else {
405                break;
406            }
407        }
408        consumed_any
409    }
410
411    /// Check for invalid link fragments
412    fn check_link_fragments<'a>(
413        &self,
414        ast: &'a AstNode<'a>,
415        valid_fragments: &HashSet<String>,
416    ) -> Vec<Violation> {
417        let mut violations = Vec::new();
418
419        self.traverse_for_links(ast, valid_fragments, &mut violations);
420
421        violations
422    }
423
424    /// Traverse AST to find link fragments
425    fn traverse_for_links<'a>(
426        &self,
427        node: &'a AstNode<'a>,
428        valid_fragments: &HashSet<String>,
429        violations: &mut Vec<Violation>,
430    ) {
431        if let NodeValue::Link(link) = &node.data.borrow().value
432            && let Some(fragment) = link.url.strip_prefix('#')
433        {
434            // Handle empty fragments - they should cause violations
435            if fragment.is_empty() {
436                let pos = self.get_position(node);
437                violations.push(self.create_violation(
438                    "Link fragment is empty".to_string(),
439                    pos.0,
440                    pos.1,
441                    Severity::Error,
442                ));
443                return;
444            }
445
446            // Skip if matches ignored pattern
447            if let Some(ref pattern) = self.ignored_pattern
448                && fragment.contains(pattern)
449            {
450                return;
451            }
452
453            // GitHub line reference patterns are always valid
454            if self.is_github_line_reference(fragment) {
455                return;
456            }
457
458            let fragment_to_check = if self.ignore_case {
459                fragment.to_lowercase()
460            } else {
461                fragment.to_string()
462            };
463
464            let valid_fragments_check: HashSet<String> = if self.ignore_case {
465                valid_fragments.iter().map(|f| f.to_lowercase()).collect()
466            } else {
467                valid_fragments.clone()
468            };
469
470            if !valid_fragments_check.contains(&fragment_to_check) {
471                let pos = self.get_position(node);
472                violations.push(self.create_violation(
473                    format!("Link fragment '{fragment}' is not valid"),
474                    pos.0,
475                    pos.1,
476                    Severity::Error,
477                ));
478            }
479        }
480
481        for child in node.children() {
482            self.traverse_for_links(child, valid_fragments, violations);
483        }
484    }
485
486    /// Fallback method using manual parsing when no AST is available
487    fn check_fragments_fallback(&self, document: &Document) -> Vec<Violation> {
488        let mut violations = Vec::new();
489
490        for (line_num, line) in document.content.lines().enumerate() {
491            let line_number = line_num + 1;
492            let mut chars = line.char_indices().peekable();
493            let mut in_backticks = false;
494
495            while let Some((i, ch)) = chars.next() {
496                match ch {
497                    '`' => {
498                        in_backticks = !in_backticks;
499                    }
500                    '[' if !in_backticks => {
501                        // Try to parse link with fragment: [text](#fragment)
502                        if let Some((fragment, text_end)) = self.parse_fragment_link(&line[i..]) {
503                            // Handle empty fragments - they should cause violations
504                            if fragment.is_empty() {
505                                violations.push(self.create_violation(
506                                    "Link fragment is empty".to_string(),
507                                    line_number,
508                                    i + 1,
509                                    Severity::Error,
510                                ));
511                                // Skip past the parsed link
512                                for _ in 0..text_end - 1 {
513                                    chars.next();
514                                }
515                                continue;
516                            }
517
518                            // Skip special cases like "top"
519                            if fragment == "top" {
520                                // Skip past the parsed link
521                                for _ in 0..text_end - 1 {
522                                    chars.next();
523                                }
524                                continue;
525                            }
526
527                            // For the fallback, we'll do basic validation
528                            // Check for obvious case issues and suspicious patterns
529                            let mut is_suspicious = false;
530
531                            // Skip GitHub line references - they are always valid
532                            if self.is_github_line_reference(&fragment) {
533                                // Skip past the parsed link
534                                for _ in 0..text_end - 1 {
535                                    chars.next();
536                                }
537                                continue;
538                            }
539
540                            if fragment.contains("invalid") || fragment.contains("undefined") {
541                                is_suspicious = true;
542                            }
543
544                            // Check for basic case issues (contains uppercase when should be lowercase)
545                            // Only flag this if case sensitivity is enabled
546                            if !self.ignore_case && fragment != fragment.to_lowercase() {
547                                is_suspicious = true;
548                            }
549
550                            if is_suspicious {
551                                violations.push(self.create_violation(
552                                    format!("Link fragment '{fragment}' may not be valid"),
553                                    line_number,
554                                    i + 1,
555                                    Severity::Warning,
556                                ));
557                            }
558
559                            // Skip past the parsed link
560                            for _ in 0..text_end - 1 {
561                                chars.next();
562                            }
563                        }
564                    }
565                    _ => {}
566                }
567            }
568        }
569
570        violations
571    }
572
573    /// Parse a fragment link starting at the given position
574    /// Returns (fragment, total_length) if found
575    fn parse_fragment_link(&self, text: &str) -> Option<(String, usize)> {
576        if !text.starts_with('[') {
577            return None;
578        }
579
580        // Find the closing bracket
581        let mut bracket_count = 0;
582        let mut closing_bracket_pos = None;
583
584        for (i, ch) in text.char_indices() {
585            match ch {
586                '[' => bracket_count += 1,
587                ']' => {
588                    bracket_count -= 1;
589                    if bracket_count == 0 {
590                        closing_bracket_pos = Some(i);
591                        break;
592                    }
593                }
594                _ => {}
595            }
596        }
597
598        let closing_bracket_pos = closing_bracket_pos?;
599        let remaining = &text[closing_bracket_pos + 1..];
600
601        // Check if this is followed by (#fragment)
602        if remaining.starts_with("(#") {
603            let fragment_start = closing_bracket_pos + 3; // +1 for ], +1 for (, +1 for #
604            if let Some(closing_paren) = remaining.find(')') {
605                let fragment_end = closing_bracket_pos + 1 + closing_paren;
606                let fragment = &text[fragment_start..fragment_end];
607                let total_length = fragment_end + 1;
608                return Some((fragment.to_string(), total_length));
609            }
610        }
611
612        None
613    }
614}
615
616impl Rule for MD051 {
617    fn id(&self) -> &'static str {
618        "MD051"
619    }
620
621    fn name(&self) -> &'static str {
622        "link-fragments"
623    }
624
625    fn description(&self) -> &'static str {
626        "Link fragments should be valid"
627    }
628
629    fn metadata(&self) -> RuleMetadata {
630        RuleMetadata::stable(RuleCategory::Links)
631    }
632
633    fn check_with_ast<'a>(
634        &self,
635        document: &Document,
636        ast: Option<&'a AstNode<'a>>,
637    ) -> Result<Vec<Violation>> {
638        if let Some(ast) = ast {
639            let valid_fragments = self.collect_valid_fragments(ast);
640            let violations = self.check_link_fragments(ast, &valid_fragments);
641            Ok(violations)
642        } else {
643            // Simplified regex-based fallback when no AST is available
644            Ok(self.check_fragments_fallback(document))
645        }
646    }
647}
648
649#[cfg(test)]
650mod tests {
651    use super::*;
652    use crate::test_helpers::{assert_no_violations, assert_single_violation};
653
654    #[test]
655    fn test_valid_fragments() {
656        let content = r#"# Heading Name
657
658[Link](#heading-name)
659
660## Another Heading
661
662[Another link](#another-heading)
663
664<div id="custom-id"></div>
665[Custom](#custom-id)
666
667<a name="bookmark"></a>
668[Bookmark](#bookmark)
669
670[Top link](#top)
671"#;
672
673        assert_no_violations(MD051::new(), content);
674    }
675
676    #[test]
677    fn test_invalid_fragments() {
678        let content = r#"# Heading Name
679
680[Invalid link](#invalid-fragment)
681"#;
682
683        let violation = assert_single_violation(MD051::new(), content);
684        assert_eq!(violation.line, 3);
685        assert!(violation.message.contains("invalid-fragment"));
686    }
687
688    #[test]
689    fn test_duplicate_headings() {
690        let content = r#"# Test
691
692[Link 1](#test)
693
694# Test
695
696[Link 2](#test-1)
697"#;
698
699        assert_no_violations(MD051::new(), content);
700    }
701
702    #[test]
703    fn test_github_line_references() {
704        let content = r#"# Code
705
706[Line 20](#L20)
707[Range](#L19C5-L21C11)
708"#;
709
710        assert_no_violations(MD051::new(), content);
711    }
712
713    #[test]
714    fn test_case_sensitivity() {
715        let content = r#"# Heading Name
716
717[Link](#Heading-Name)
718"#;
719
720        let violation = assert_single_violation(MD051::new(), content);
721        assert_eq!(violation.line, 3);
722
723        assert_no_violations(MD051::new().ignore_case(true), content);
724    }
725
726    #[test]
727    fn test_custom_anchor() {
728        let content = r#"# Heading Name {#custom-anchor}
729
730[Link](#custom-anchor)
731"#;
732
733        assert_no_violations(MD051::new(), content);
734    }
735
736    #[test]
737    fn test_empty_fragment() {
738        let content = r#"# Heading
739
740[Empty fragment](#)
741"#;
742
743        let violation = assert_single_violation(MD051::new(), content);
744        assert_eq!(violation.line, 3);
745    }
746
747    #[test]
748    fn test_html_id_attributes() {
749        let content = r#"# Heading
750
751<div id="custom-id">Content</div>
752<span id="another-id">Text</span>
753
754[Link to div](#custom-id)
755[Link to span](#another-id)
756"#;
757
758        assert_no_violations(MD051::new(), content);
759    }
760
761    #[test]
762    fn test_html_name_attributes() {
763        let content = r#"# Heading
764
765<a name="anchor-name"></a>
766<div name="form-element">Content</div>
767
768[Link to anchor](#anchor-name)
769[Link to element](#form-element)
770"#;
771
772        assert_no_violations(MD051::new(), content);
773    }
774
775    #[test]
776    fn test_html_block_extraction() {
777        let content = r#"# Heading
778
779<div class="content">
780  <p id="paragraph-id">Text</p>
781  <a name="link-name" href="/test">Link</a>
782</div>
783
784[Link to paragraph](#paragraph-id)
785[Link to anchor](#link-name)
786"#;
787
788        assert_no_violations(MD051::new(), content);
789    }
790
791    #[test]
792    fn test_html_inline_extraction() {
793        let content = r#"# Heading
794
795This is text with <span id="inline-id">inline HTML</span> and <a name="inline-name">anchor</a>.
796
797[Link to inline](#inline-id)
798[Link to anchor](#inline-name)
799"#;
800
801        assert_no_violations(MD051::new(), content);
802    }
803
804    #[test]
805    fn test_complex_fragment_generation() {
806        let content = r#"# Complex Heading with (Parentheses) & Symbols!
807
808[Link](#complex-heading-with-parentheses--symbols)
809
810## Another_Complex-Title 123
811
812[Another link](#another_complex-title-123)
813
814### Multiple   Spaces   Between   Words
815
816[Space link](#multiple-spaces-between-words)
817"#;
818
819        assert_no_violations(MD051::new(), content);
820    }
821
822    #[test]
823    fn test_dash_consolidation() {
824        let content = r#"# Title---With----Multiple-----Dashes
825
826[Link](#title-with-multiple-dashes)
827
828## --Leading-And-Trailing--
829
830[Another link](#leading-and-trailing)
831"#;
832
833        assert_no_violations(MD051::new(), content);
834    }
835
836    #[test]
837    fn test_unicode_and_special_chars() {
838        let content = r#"# Heading with émojis 🚀 and ñ
839
840[Unicode link](#heading-with-émojis--and-ñ)
841
842## Code `inline` and **bold**
843
844[Code link](#code-inline-and-bold)
845"#;
846
847        assert_no_violations(MD051::new(), content);
848    }
849
850    #[test]
851    fn test_custom_anchor_validation() {
852        let content = r#"# Valid Custom {#valid-anchor}
853
854[Link](#valid-anchor)
855
856# Invalid Custom {#invalid anchor}
857
858[Bad link](#invalid-anchor)
859"#;
860
861        // Should have one violation for the invalid custom anchor reference
862        let violation = assert_single_violation(MD051::new(), content);
863        assert_eq!(violation.line, 7);
864        assert!(violation.message.contains("invalid-anchor"));
865    }
866
867    #[test]
868    fn test_custom_anchor_edge_cases() {
869        let content = r#"# Empty Custom {#}
870
871# Valid Custom {#test123}
872
873[Link](#test123)
874
875# Invalid Chars {#test@123}
876
877# Nested {#outer {#inner} }
878"#;
879
880        assert_no_violations(MD051::new(), content);
881    }
882
883    #[test]
884    fn test_github_line_references_detailed() {
885        let content = r#"# Code Examples
886
887[Line reference](#L42)
888[Line range](#L10-L20)
889[Complex range](#L15C3-L25C10)
890[Another format](#L1C1-L1C5)
891"#;
892
893        assert_no_violations(MD051::new(), content);
894    }
895
896    #[test]
897    fn test_multiple_document_types() {
898        let content = r#"# Main Heading
899
900Regular text here.
901
902<div id="html-id">HTML content</div>
903
904<a name="html-name">Anchor</a>
905
906## Sub Heading {#custom-sub}
907
908More content.
909
910[Link to main](#main-heading)
911[Link to sub](#custom-sub)
912[Link to HTML ID](#html-id)
913[Link to HTML name](#html-name)
914[GitHub reference](#L100)
915[Invalid reference](#Invalid-Reference)
916"#;
917
918        let violation = assert_single_violation(MD051::new(), content);
919        assert_eq!(violation.line, 18);
920        assert!(violation.message.contains("Invalid-Reference"));
921    }
922
923    #[test]
924    fn test_duplicate_heading_numbering() {
925        let content = r#"# Test
926
927[First link](#test)
928
929# Test
930
931[Second link](#test-1)
932
933# Test
934
935[Third link](#test-2)
936
937# Different
938
939[Different link](#different)
940"#;
941
942        assert_no_violations(MD051::new(), content);
943    }
944
945    #[test]
946    fn test_html_parsing_edge_cases() {
947        let content = r#"# Heading
948
949<!-- Comment with id="not-real" -->
950<div id='single-quotes'>Content</div>
951<span id="no-closing-quote>Broken</span>
952<p id=unquoted-id>Unquoted</p>
953
954[Single quotes](#single-quotes)
955[Unquoted](#unquoted-id)
956"#;
957
958        assert_no_violations(MD051::new(), content);
959    }
960
961    #[test]
962    fn test_configuration_options() {
963        let content = r#"# Test Heading
964
965[Case mismatch](#Test-Heading)
966"#;
967
968        // Default case sensitive - should fail
969        let violation = assert_single_violation(MD051::new(), content);
970        assert_eq!(violation.line, 3);
971
972        // Case insensitive - should pass
973        assert_no_violations(MD051::new().ignore_case(true), content);
974    }
975
976    #[test]
977    fn test_ignored_pattern() {
978        let content = r#"# Heading
979
980[External link](#external-pattern)
981[Normal link](#invalid-fragment)
982"#;
983
984        // With ignored pattern, first link should pass, second should fail
985        let rule = MD051::new().ignored_pattern(Some("external-*".to_string()));
986        let violation = assert_single_violation(rule, content);
987        assert_eq!(violation.line, 4);
988        assert!(violation.message.contains("invalid-fragment"));
989    }
990
991    #[test]
992    fn test_empty_document() {
993        let content = "";
994        assert_no_violations(MD051::new(), content);
995    }
996
997    #[test]
998    fn test_no_headings_no_fragments() {
999        let content = r#"Just some text without headings.
1000
1001[Invalid link](#Invalid-Fragment)
1002"#;
1003
1004        let violation = assert_single_violation(MD051::new(), content);
1005        assert_eq!(violation.line, 3);
1006        assert!(violation.message.contains("Invalid-Fragment"));
1007    }
1008
1009    #[test]
1010    fn test_top_fragment() {
1011        let content = r#"# Heading
1012
1013[Link to top](#top)
1014"#;
1015
1016        assert_no_violations(MD051::new(), content);
1017    }
1018
1019    #[test]
1020    fn test_malformed_html() {
1021        let content = r#"# Heading
1022
1023<div id=>Empty value</div>
1024<span id>No value</span>
1025<p id="unclosed>Bad quote</p>
1026
1027[Should still work](#heading)
1028"#;
1029
1030        assert_no_violations(MD051::new(), content);
1031    }
1032
1033    #[test]
1034    fn test_nested_html_elements() {
1035        let content = r#"# Heading
1036
1037<div class="outer">
1038  <div id="nested-id">
1039    <span name="deep-name">Content</span>
1040  </div>
1041</div>
1042
1043[Link to nested](#nested-id)
1044[Link to deep](#deep-name)
1045"#;
1046
1047        assert_no_violations(MD051::new(), content);
1048    }
1049
1050    #[test]
1051    fn test_heading_with_code_and_emphasis() {
1052        let content = r#"# Title with `code` and **bold** and *italic*
1053
1054[Link](#title-with-code-and-bold-and-italic)
1055
1056## Another `complex` **formatting** example
1057
1058[Another link](#another-complex-formatting-example)
1059"#;
1060
1061        assert_no_violations(MD051::new(), content);
1062    }
1063}