1use once_cell::sync::Lazy;
2use regex::Regex;
3use serde::Deserialize;
4use std::collections::HashSet;
5use std::rc::Rc;
6
7use tree_sitter::Node;
8
9use crate::{
10 linter::{range_from_tree_sitter, RuleViolation},
11 rules::{Context, Rule, RuleLinter, RuleType},
12};
13
14#[derive(Debug, PartialEq, Clone, Deserialize, Default)]
16pub struct MD051LinkFragmentsTable {
17 #[serde(default)]
18 pub ignore_case: bool,
19 #[serde(default)]
20 pub ignored_pattern: String,
21}
22
23#[derive(Debug, Clone)]
24struct LinkFragment {
25 fragment: String,
26 range: tree_sitter::Range,
27}
28
29static LINE_FRAGMENT_PATTERN: Lazy<Regex> =
33 Lazy::new(|| Regex::new(r"^L\d+(?:C\d+)?-L\d+(?:C\d+)?$|^L\d+$").unwrap());
34
35static ID_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r#"id\s*=\s*["']([^"']+)["']"#).unwrap());
36
37static NAME_PATTERN: Lazy<Regex> =
38 Lazy::new(|| Regex::new(r#"name\s*=\s*["']([^"']+)["']"#).unwrap());
39
40pub(crate) struct MD051Linter {
41 context: Rc<Context>,
42 valid_fragments: HashSet<String>,
43 valid_fragments_lowercase: HashSet<String>, link_fragments: Vec<LinkFragment>,
45}
46
47impl MD051Linter {
48 pub fn new(context: Rc<Context>) -> Self {
49 Self {
50 context,
51 valid_fragments: HashSet::new(),
52 valid_fragments_lowercase: HashSet::new(),
53 link_fragments: Vec::new(),
54 }
55 }
56
57 fn extract_heading_text(&self, node: &Node) -> Option<String> {
58 let inline_node = if node.kind() == "atx_heading" {
59 node.children(&mut node.walk())
60 .find(|c| c.kind() == "inline")
61 } else if node.kind() == "setext_heading" {
62 node.children(&mut node.walk())
63 .find(|c| c.kind() == "paragraph")
64 .and_then(|p| p.children(&mut p.walk()).find(|gc| gc.kind() == "inline"))
65 } else {
66 None
67 };
68
69 inline_node.map(|n| {
70 let document_content = self.context.document_content.borrow();
71 document_content[n.start_byte()..n.end_byte()]
72 .trim()
73 .to_string()
74 })
75 }
76
77 fn generate_github_fragment(&self, heading_text: &str) -> String {
78 let lower = heading_text.trim().to_lowercase().replace(' ', "-");
84 let mut fragment = String::with_capacity(lower.len());
85
86 for c in lower.chars() {
87 if c.is_alphanumeric() || c == '-' || c == '_' {
88 fragment.push(c);
89 }
90 }
91
92 fragment.trim_matches('-').to_string()
94 }
95
96 fn extract_custom_anchor(&self, heading_text: &str) -> Option<String> {
97 if let Some(start) = heading_text.rfind("{#") {
99 if let Some(end) = heading_text[start..].find('}') {
100 let anchor = &heading_text[start + 2..start + end];
101 return Some(anchor.to_string());
102 }
103 }
104 None
105 }
106
107 fn extract_link_fragments(&self, node: &Node) -> Vec<LinkFragment> {
108 let mut link_fragments = Vec::new();
111
112 let mut i = 0;
114 while i < node.child_count() {
115 if let Some(child) = node.child(i) {
116 if child.kind() == "[" {
117 if let Some((fragment_info, end_index)) = self.parse_link_at_position(node, i) {
119 link_fragments.push(fragment_info);
120 i = end_index;
121 }
122 }
123 i += 1;
124 }
125 }
126
127 link_fragments
128 }
129
130 fn parse_link_at_position(
131 &self,
132 parent: &Node,
133 start_idx: usize,
134 ) -> Option<(LinkFragment, usize)> {
135 let document_content = self.context.document_content.borrow();
137
138 let mut bracket_close_idx = None;
140 let mut paren_open_idx = None;
141 let mut paren_close_idx = None;
142
143 for i in start_idx + 1..parent.child_count() {
145 if let Some(child) = parent.child(i) {
146 if child.kind() == "]" {
147 bracket_close_idx = Some(i);
148 break;
149 }
150 }
151 }
152
153 if let Some(bracket_close) = bracket_close_idx {
154 for i in bracket_close + 1..parent.child_count() {
156 if let Some(child) = parent.child(i) {
157 if child.kind() == "(" {
158 paren_open_idx = Some(i);
159 break;
160 }
161 }
162 }
163 }
164
165 if let Some(paren_open) = paren_open_idx {
166 for i in paren_open + 1..parent.child_count() {
168 if let Some(child) = parent.child(i) {
169 if child.kind() == ")" {
170 paren_close_idx = Some(i);
171 break;
172 }
173 }
174 }
175 }
176
177 if let (Some(paren_open), Some(paren_close)) = (paren_open_idx, paren_close_idx) {
179 if let (Some(paren_open_node), Some(paren_close_node)) =
181 (parent.child(paren_open), parent.child(paren_close))
182 {
183 let start_byte = paren_open_node.end_byte(); let end_byte = paren_close_node.start_byte(); let url_parts = &document_content[start_byte..end_byte];
186 if url_parts.starts_with('#') {
188 if let Some(hash_pos) = url_parts.rfind('#') {
189 let fragment = &url_parts[hash_pos + 1..];
190 if !fragment.is_empty() && !fragment.contains(' ') {
192 if let (Some(start_node), Some(end_node)) =
194 (parent.child(start_idx), parent.child(paren_close))
195 {
196 let link_range = tree_sitter::Range {
197 start_byte: start_node.start_byte(),
198 end_byte: end_node.end_byte(),
199 start_point: start_node.range().start_point,
200 end_point: end_node.range().end_point,
201 };
202
203 return Some((
204 LinkFragment {
205 fragment: fragment.to_string(),
206 range: link_range,
207 },
208 paren_close,
209 ));
210 }
211 }
212 }
213 }
214 }
215 }
216
217 None
218 }
219
220 fn is_github_special_fragment(&self, fragment: &str) -> bool {
221 if fragment == "top" {
225 return true;
226 }
227
228 if LINE_FRAGMENT_PATTERN.is_match(fragment) {
229 return true;
230 }
231
232 false
233 }
234
235 fn extract_html_id_or_name(&self, node: &Node) -> Vec<String> {
236 let mut ids = Vec::new();
238 let start_byte = node.start_byte();
239 let end_byte = node.end_byte();
240 let document_content = self.context.document_content.borrow();
241 let html_content = &document_content[start_byte..end_byte];
242
243 for cap in ID_PATTERN.captures_iter(html_content) {
244 if let Some(id) = cap.get(1) {
245 ids.push(id.as_str().to_string());
246 }
247 }
248
249 for cap in NAME_PATTERN.captures_iter(html_content) {
250 if let Some(name) = cap.get(1) {
251 ids.push(name.as_str().to_string());
252 }
253 }
254
255 ids
256 }
257}
258
259impl RuleLinter for MD051Linter {
260 fn feed(&mut self, node: &Node) {
261 match node.kind() {
262 "atx_heading" | "setext_heading" => {
263 if let Some(heading_text) = self.extract_heading_text(node) {
264 if let Some(custom_anchor) = self.extract_custom_anchor(&heading_text) {
266 self.valid_fragments.insert(custom_anchor.clone());
267 self.valid_fragments_lowercase
268 .insert(custom_anchor.to_lowercase());
269 let clean_text = heading_text
271 .replace(&format!("{{#{custom_anchor}}}"), "")
272 .trim()
273 .to_string();
274 if !clean_text.is_empty() {
275 let fragment = self.generate_github_fragment(&clean_text);
276 if !fragment.is_empty() {
277 self.valid_fragments.insert(fragment.clone());
278 self.valid_fragments_lowercase
279 .insert(fragment.to_lowercase());
280 }
281 }
282 } else {
283 let fragment = self.generate_github_fragment(&heading_text);
285 if !fragment.is_empty() {
286 let mut unique_fragment = fragment.clone();
288 let mut counter = 1;
289 while self.valid_fragments.contains(&unique_fragment) {
290 unique_fragment = format!("{fragment}-{counter}");
291 counter += 1;
292 }
293 self.valid_fragments.insert(unique_fragment.clone());
294 self.valid_fragments_lowercase
295 .insert(unique_fragment.to_lowercase());
296 }
297 }
298 }
299 }
300 "inline" | "html_block" => {
301 let ids = self.extract_html_id_or_name(node);
303 for id in ids {
304 self.valid_fragments.insert(id.clone());
305 self.valid_fragments_lowercase.insert(id.to_lowercase());
306 }
307
308 let link_fragments = self.extract_link_fragments(node);
310 for link_fragment in link_fragments {
311 self.link_fragments.push(link_fragment);
312 }
313 }
314 _ => {
315 }
317 }
318 }
319
320 fn finalize(&mut self) -> Vec<RuleViolation> {
321 let mut violations = Vec::new();
322 let config = &self.context.config.linters.settings.link_fragments;
323
324 let ignored_regex = if !config.ignored_pattern.is_empty() {
326 Regex::new(&config.ignored_pattern).ok()
327 } else {
328 None
329 };
330
331 for link_fragment in &self.link_fragments {
332 let fragment = &link_fragment.fragment;
333 let mut is_valid = false;
334
335 if self.is_github_special_fragment(fragment) {
337 is_valid = true;
338 }
339
340 if !is_valid {
342 if let Some(ref regex) = ignored_regex {
343 if regex.is_match(fragment) {
344 is_valid = true;
345 }
346 }
347 }
348
349 if !is_valid {
351 if config.ignore_case {
352 let fragment_lower = fragment.to_lowercase();
353 is_valid = self.valid_fragments_lowercase.contains(&fragment_lower);
354 } else {
355 is_valid = self.valid_fragments.contains(fragment);
356 }
357 }
358
359 if !is_valid {
360 violations.push(RuleViolation::new(
361 &MD051,
362 format!("Link fragment '{fragment}' does not match any heading or anchor in the document"),
363 self.context.file_path.clone(),
364 range_from_tree_sitter(&link_fragment.range),
365 ));
366 }
367 }
368
369 violations
370 }
371}
372
373pub const MD051: Rule = Rule {
374 id: "MD051",
375 alias: "link-fragments",
376 tags: &["links"],
377 description: "Link fragments should be valid",
378 rule_type: RuleType::Document,
379 required_nodes: &["link", "atx_heading", "setext_heading"],
380 new_linter: |context| Box::new(MD051Linter::new(context)),
381};
382
383#[cfg(test)]
384mod test {
385 use std::path::PathBuf;
386
387 use crate::config::{LintersSettingsTable, MD051LinkFragmentsTable, RuleSeverity};
388 use crate::linter::MultiRuleLinter;
389 use crate::test_utils::test_helpers::test_config_with_rules;
390
391 fn test_config() -> crate::config::QuickmarkConfig {
392 test_config_with_rules(vec![("link-fragments", RuleSeverity::Error)])
393 }
394
395 fn test_config_with_settings(
396 ignore_case: bool,
397 ignored_pattern: String,
398 ) -> crate::config::QuickmarkConfig {
399 crate::test_utils::test_helpers::test_config_with_settings(
400 vec![("link-fragments", RuleSeverity::Error)],
401 LintersSettingsTable {
402 link_fragments: MD051LinkFragmentsTable {
403 ignore_case,
404 ignored_pattern,
405 },
406 ..Default::default()
407 },
408 )
409 }
410
411 #[test]
412 fn test_basic_valid_fragment() {
413 let input = "# Test Heading
414
415[Valid Link](#test-heading)
416";
417
418 let config = test_config();
419 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
420 let violations = linter.analyze();
421
422 assert_eq!(0, violations.len());
424 }
425
426 #[test]
427 fn test_basic_invalid_fragment() {
428 let input = "# Test Heading
429
430[Invalid Link](#nonexistent-heading)
431";
432
433 let config = test_config();
434 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
435 let violations = linter.analyze();
436
437 assert_eq!(1, violations.len());
439 }
440
441 #[test]
442 fn test_case_sensitive_default() {
443 let input = "# Test Heading
444
445[Invalid Link](#Test-Heading)
446";
447
448 let config = test_config();
449 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
450 let violations = linter.analyze();
451
452 assert_eq!(1, violations.len());
454 }
455
456 #[test]
457 fn test_ignore_case_option() {
458 let input = "# Test Heading
459
460[Valid Link](#Test-Heading)
461";
462
463 let config = test_config_with_settings(true, String::new());
464 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
465 let violations = linter.analyze();
466
467 assert_eq!(0, violations.len());
469 }
470
471 #[test]
472 fn test_punctuation_removal() {
473 let input = "# Test: Heading! With? Punctuation.
474
475[Valid Link](#test-heading-with-punctuation)
476";
477
478 let config = test_config();
479 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
480 let violations = linter.analyze();
481
482 assert_eq!(0, violations.len());
484 }
485
486 #[test]
487 fn test_duplicate_headings() {
488 let input = "# Test Heading
489
490## Test Heading
491
492[Link 1](#test-heading)
493[Link 2](#test-heading-1)
494";
495
496 let config = test_config();
497 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
498 let violations = linter.analyze();
499
500 assert_eq!(0, violations.len());
502 }
503
504 #[test]
505 fn test_custom_anchor() {
506 let input = "# Test Heading {#custom-anchor}
507
508[Valid Link](#custom-anchor)
509";
510
511 let config = test_config();
512 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
513 let violations = linter.analyze();
514
515 assert_eq!(0, violations.len());
517 }
518
519 #[test]
520 fn test_html_id_attribute() {
521 let input = "# Test Heading\n\n<div id=\"my-custom-id\">Content</div>\n\n[Valid Link](#my-custom-id)\n";
522
523 let config = test_config();
524 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
525 let violations = linter.analyze();
526
527 assert_eq!(0, violations.len());
529 }
530
531 #[test]
532 fn test_html_name_attribute() {
533 let input =
534 "# Test Heading\n\n<a name=\"my-anchor\">Anchor</a>\n\n[Valid Link](#my-anchor)\n";
535
536 let config = test_config();
537 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
538 let violations = linter.analyze();
539
540 assert_eq!(0, violations.len());
542 }
543
544 #[test]
545 fn test_ignored_pattern() {
546 let input = "# Test Heading
547
548[Link to external](#external-fragment)
549";
550
551 let config = test_config_with_settings(false, "external-.*".to_string());
552 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
553 let violations = linter.analyze();
554
555 assert_eq!(0, violations.len());
557 }
558
559 #[test]
560 fn test_github_special_fragments() {
561 let input = "# Test Heading
562
563[Link to top](#top)
564[Link to line](#L20)
565[Link to range](#L19C5-L21C11)
566[Invalid range](#L10-L20)
567[Actually invalid](#L)
568";
569
570 let config = test_config();
571 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
572 let violations = linter.analyze();
573
574 assert_eq!(1, violations.len());
576 assert!(violations[0].message().contains("Link fragment 'L'"));
577 }
578
579 #[test]
580 fn test_multiple_violations() {
581 let input = "# Valid Heading
582
583[Valid Link](#valid-heading)
584[Invalid Link 1](#invalid-one)
585[Invalid Link 2](#invalid-two)
586";
587
588 let config = test_config();
589 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
590 let violations = linter.analyze();
591
592 assert_eq!(2, violations.len());
594 }
595
596 #[test]
597 fn test_setext_headings() {
598 let input = "Test Heading
599============
600
601Another Heading
602---------------
603
604[Valid Link 1](#test-heading)
605[Valid Link 2](#another-heading)
606";
607
608 let config = test_config();
609 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
610 let violations = linter.analyze();
611
612 assert_eq!(0, violations.len());
614 }
615
616 #[test]
617 fn test_edge_cases_for_consistency() {
618 let input = "# Test Heading
619
620[Valid link](#test-heading)
621[Fragment with spaces](#test heading)
622[Empty fragment](#)
623[Invalid single L](#L)
624[Valid L with number](#L123)
625";
626
627 let config = test_config();
628 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
629 let violations = linter.analyze();
630
631 assert_eq!(1, violations.len());
634 assert!(violations[0].message().contains("Link fragment 'L'"));
635 }
636
637 #[test]
638 fn test_comprehensive() {
639 let input = r#"# Test MD051 Comprehensive
640
641This file tests various MD051 features and configuration options.
642
643## Basic Headings
644
645### Test Heading One
646
647### Test Heading Two
648
649## Case Sensitivity Tests
650
651[Valid lowercase link](#test-heading-one)
652[Invalid uppercase link](#test-heading-one)
653[Mixed case invalid](#test-heading-two)
654
655## Custom Anchors
656
657### Heading with Custom Anchor {#custom-test-anchor}
658
659[Valid custom anchor link](#custom-test-anchor)
660[Invalid custom anchor link](#wrong-custom-anchor)
661
662## Punctuation in Headings
663
664### Heading: With? Special! Characters
665
666[Valid punctuation link](#heading-with-special-characters)
667[Invalid punctuation link](#heading-with-special-characters!)
668
669## HTML Elements
670
671<div id="test-html-id">HTML content</div>
672<a name="test-html-name">Named anchor</a>
673
674[Valid HTML id link](#test-html-id)
675[Valid HTML name link](#test-html-name)
676[Invalid HTML link](#wrong-html-id)
677
678## GitHub Special Cases
679
680[Valid top link](#top)
681[Valid line link](#L123)
682[Valid range link](#L10C1-L20C5)
683[Invalid line format](#L)
684[Invalid range format](#L10-L20)
685
686## Setext Headings
687
688First Setext Heading
689====================
690
691Second Setext Heading
692---------------------
693
694[Valid setext h1 link](#first-setext-heading)
695[Valid setext h2 link](#second-setext-heading)
696[Invalid setext link](#wrong-setext-heading)
697
698## Duplicate Headings
699
700### Duplicate Name
701
702### Duplicate Name
703
704[Link to first duplicate](#duplicate-name)
705[Link to second duplicate](#duplicate-name-1)
706[Invalid duplicate link](#duplicate-name-2)
707
708## Multiple Links in Same Paragraph
709
710This paragraph has [valid link](#test-heading-one) and [invalid link](#nonexistent) and [another valid](#custom-test-anchor).
711
712## Edge Cases
713
714[Empty fragment link](#)
715[Fragment with spaces](#test heading one)
716[Fragment with underscores](#test_heading_one)
717[Fragment with numbers](#test-heading-123)
718
719### Should not trigger
720
721[Fragment with external link](https://developer.hashicorp.com/vault/api-docs/auth/jwt#default_role)
722[Fragment with relative link](../../project/issues/managing_issues.md#add-an-issue-to-an-iteration-starter)
723"#;
724
725 let config = test_config();
726 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
727 let violations = linter.analyze();
728
729 assert_eq!(9, violations.len(), "Expected exactly 9 violations");
741
742 let violation_messages: Vec<String> =
744 violations.iter().map(|v| v.message().to_string()).collect();
745
746 assert!(
748 !violation_messages
749 .iter()
750 .any(|msg| msg.contains("default_role")),
751 "Should not report violations for external links"
752 );
753 assert!(
754 !violation_messages
755 .iter()
756 .any(|msg| msg.contains("add-an-issue-to-an-iteration-starter")),
757 "Should not report violations for relative links with external fragments"
758 );
759
760 assert!(violation_messages
762 .iter()
763 .any(|msg| msg.contains("wrong-custom-anchor")));
764 assert!(violation_messages
765 .iter()
766 .any(|msg| msg.contains("heading-with-special-characters!")));
767 assert!(violation_messages
768 .iter()
769 .any(|msg| msg.contains("wrong-html-id")));
770 assert!(violation_messages.iter().any(|msg| msg.contains("'L'")));
771 assert!(violation_messages
772 .iter()
773 .any(|msg| msg.contains("wrong-setext-heading")));
774 assert!(violation_messages
775 .iter()
776 .any(|msg| msg.contains("duplicate-name-2")));
777 assert!(violation_messages
778 .iter()
779 .any(|msg| msg.contains("nonexistent")));
780 assert!(violation_messages
781 .iter()
782 .any(|msg| msg.contains("test_heading_one")));
783 assert!(violation_messages
784 .iter()
785 .any(|msg| msg.contains("test-heading-123")));
786 }
787
788 #[test]
789 fn test_colons() {
790 let input = "
791## `header:with:colons_in_it`
792
793[should be ok](#headerwithcolons_in_it)
794";
795
796 let config = test_config();
797 let mut multi_linter =
798 MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
799 let violations = multi_linter.analyze();
800
801 assert_eq!(0, violations.len());
803 }
804}