1use crate::rule::{CrossFileScope, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::anchor_styles::AnchorStyle;
3use crate::workspace_index::{CrossFileLinkIndex, FileIndex, HeadingIndex};
4use pulldown_cmark::LinkType;
5use regex::Regex;
6use std::collections::{HashMap, HashSet};
7use std::path::{Component, Path, PathBuf};
8use std::sync::LazyLock;
9static HTML_ANCHOR_PATTERN: LazyLock<Regex> =
12 LazyLock::new(|| Regex::new(r#"\b(?:id|name)\s*=\s*["']([^"']+)["']"#).unwrap());
13
14static ATTR_ANCHOR_PATTERN: LazyLock<Regex> =
18 LazyLock::new(|| Regex::new(r#"\{\s*#([a-zA-Z][a-zA-Z0-9_-]*)[^}]*\}"#).unwrap());
19
20fn normalize_path(path: &Path) -> PathBuf {
22 let mut result = PathBuf::new();
23 for component in path.components() {
24 match component {
25 Component::CurDir => {} Component::ParentDir => {
27 result.pop(); }
29 c => result.push(c.as_os_str()),
30 }
31 }
32 result
33}
34
35#[derive(Clone)]
42pub struct MD051LinkFragments {
43 anchor_style: AnchorStyle,
45}
46
47impl Default for MD051LinkFragments {
48 fn default() -> Self {
49 Self::new()
50 }
51}
52
53impl MD051LinkFragments {
54 pub fn new() -> Self {
55 Self {
56 anchor_style: AnchorStyle::GitHub,
57 }
58 }
59
60 pub fn with_anchor_style(style: AnchorStyle) -> Self {
62 Self { anchor_style: style }
63 }
64
65 fn extract_headings_from_context(
69 &self,
70 ctx: &crate::lint_context::LintContext,
71 ) -> (HashSet<String>, HashSet<String>) {
72 let mut markdown_headings = HashSet::with_capacity(32);
73 let mut html_anchors = HashSet::with_capacity(16);
74 let mut fragment_counts = std::collections::HashMap::new();
75
76 for line_info in &ctx.lines {
77 if line_info.in_front_matter {
78 continue;
79 }
80
81 if line_info.in_code_block {
83 continue;
84 }
85
86 let content = line_info.content(ctx.content);
87 let bytes = content.as_bytes();
88
89 if bytes.contains(&b'<') && (content.contains("id=") || content.contains("name=")) {
91 let mut pos = 0;
94 while pos < content.len() {
95 if let Some(start) = content[pos..].find('<') {
96 let tag_start = pos + start;
97 if let Some(end) = content[tag_start..].find('>') {
98 let tag_end = tag_start + end + 1;
99 let tag = &content[tag_start..tag_end];
100
101 if let Some(caps) = HTML_ANCHOR_PATTERN.find(tag) {
103 let matched_text = caps.as_str();
104 if let Some(caps) = HTML_ANCHOR_PATTERN.captures(matched_text)
105 && let Some(id_match) = caps.get(1)
106 {
107 let id = id_match.as_str();
108 if !id.is_empty() {
109 html_anchors.insert(id.to_string());
110 }
111 }
112 }
113 pos = tag_end;
114 } else {
115 break;
116 }
117 } else {
118 break;
119 }
120 }
121 }
122
123 if line_info.heading.is_none() && content.contains('{') && content.contains('#') {
126 for caps in ATTR_ANCHOR_PATTERN.captures_iter(content) {
127 if let Some(id_match) = caps.get(1) {
128 markdown_headings.insert(id_match.as_str().to_lowercase());
130 }
131 }
132 }
133
134 if let Some(heading) = &line_info.heading {
136 if let Some(custom_id) = &heading.custom_id {
138 markdown_headings.insert(custom_id.to_lowercase());
139 }
140
141 let fragment = self.anchor_style.generate_fragment(&heading.text);
145
146 if !fragment.is_empty() {
147 let final_fragment = if let Some(count) = fragment_counts.get_mut(&fragment) {
149 let suffix = *count;
150 *count += 1;
151 format!("{fragment}-{suffix}")
152 } else {
153 fragment_counts.insert(fragment.clone(), 1);
154 fragment
155 };
156 markdown_headings.insert(final_fragment);
157 }
158 }
159 }
160
161 (markdown_headings, html_anchors)
162 }
163
164 #[inline]
166 fn is_external_url_fast(url: &str) -> bool {
167 url.starts_with("http://")
169 || url.starts_with("https://")
170 || url.starts_with("ftp://")
171 || url.starts_with("mailto:")
172 || url.starts_with("tel:")
173 || url.starts_with("//")
174 }
175
176 #[inline]
184 fn resolve_path_with_extensions(path: &Path, extensions: &[&str]) -> Vec<PathBuf> {
185 if path.extension().is_none() {
186 let mut paths = Vec::with_capacity(extensions.len() + 1);
188 paths.push(path.to_path_buf());
190 for ext in extensions {
192 let path_with_ext = path.with_extension(&ext[1..]); paths.push(path_with_ext);
194 }
195 paths
196 } else {
197 vec![path.to_path_buf()]
199 }
200 }
201
202 #[inline]
216 fn is_extensionless_path(path_part: &str) -> bool {
217 if path_part.is_empty()
219 || path_part.contains('.')
220 || path_part.contains('?')
221 || path_part.contains('&')
222 || path_part.contains('=')
223 {
224 return false;
225 }
226
227 let mut has_alphanumeric = false;
229 for c in path_part.chars() {
230 if c.is_alphanumeric() {
231 has_alphanumeric = true;
232 } else if !matches!(c, '/' | '\\' | '-' | '_') {
233 return false;
235 }
236 }
237
238 has_alphanumeric
240 }
241
242 #[inline]
244 fn is_cross_file_link(url: &str) -> bool {
245 if let Some(fragment_pos) = url.find('#') {
246 let path_part = &url[..fragment_pos];
247
248 if path_part.is_empty() {
250 return false;
251 }
252
253 if let Some(tag_start) = path_part.find("{%")
259 && path_part[tag_start + 2..].contains("%}")
260 {
261 return true;
262 }
263 if let Some(var_start) = path_part.find("{{")
264 && path_part[var_start + 2..].contains("}}")
265 {
266 return true;
267 }
268
269 if path_part.starts_with('/') {
272 return true;
273 }
274
275 let has_extension = path_part.contains('.')
281 && (
282 {
284 let clean_path = path_part.split('?').next().unwrap_or(path_part);
285 if let Some(after_dot) = clean_path.strip_prefix('.') {
287 let dots_count = clean_path.matches('.').count();
288 if dots_count == 1 {
289 !after_dot.is_empty() && after_dot.len() <= 10 &&
292 after_dot.chars().all(|c| c.is_ascii_alphanumeric())
293 } else {
294 clean_path.split('.').next_back().is_some_and(|ext| {
296 !ext.is_empty() && ext.len() <= 10 && ext.chars().all(|c| c.is_ascii_alphanumeric())
297 })
298 }
299 } else {
300 clean_path.split('.').next_back().is_some_and(|ext| {
302 !ext.is_empty() && ext.len() <= 10 && ext.chars().all(|c| c.is_ascii_alphanumeric())
303 })
304 }
305 } ||
306 path_part.contains('/') || path_part.contains('\\') ||
308 path_part.starts_with("./") || path_part.starts_with("../")
310 );
311
312 let is_extensionless = Self::is_extensionless_path(path_part);
315
316 has_extension || is_extensionless
317 } else {
318 false
319 }
320 }
321}
322
323impl Rule for MD051LinkFragments {
324 fn name(&self) -> &'static str {
325 "MD051"
326 }
327
328 fn description(&self) -> &'static str {
329 "Link fragments should reference valid headings"
330 }
331
332 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
333 if !ctx.likely_has_links_or_images() {
335 return true;
336 }
337 !ctx.has_char('#')
339 }
340
341 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
342 let mut warnings = Vec::new();
343
344 if ctx.content.is_empty() || ctx.links.is_empty() || self.should_skip(ctx) {
345 return Ok(warnings);
346 }
347
348 let (markdown_headings, html_anchors) = self.extract_headings_from_context(ctx);
349
350 for link in &ctx.links {
351 if link.is_reference {
352 continue;
353 }
354
355 if matches!(link.link_type, LinkType::WikiLink { .. }) {
357 continue;
358 }
359
360 if ctx.is_in_jinja_range(link.byte_offset) {
362 continue;
363 }
364
365 let url = &link.url;
366
367 if !url.contains('#') || Self::is_external_url_fast(url) {
369 continue;
370 }
371
372 if url.contains("{{#") && url.contains("}}") {
375 continue;
376 }
377
378 if url.starts_with('@') {
382 continue;
383 }
384
385 if Self::is_cross_file_link(url) {
387 continue;
388 }
389
390 let Some(fragment_pos) = url.find('#') else {
391 continue;
392 };
393
394 let fragment = &url[fragment_pos + 1..];
395
396 if (url.contains("{{") && fragment.contains('|')) || fragment.ends_with("}}") || fragment.ends_with("%}") {
398 continue;
399 }
400
401 if fragment.is_empty() {
402 continue;
403 }
404
405 let found = if html_anchors.contains(fragment) {
408 true
409 } else {
410 let fragment_lower = fragment.to_lowercase();
411 markdown_headings.contains(&fragment_lower)
412 };
413
414 if !found {
415 warnings.push(LintWarning {
416 rule_name: Some(self.name().to_string()),
417 message: format!("Link anchor '#{fragment}' does not exist in document headings"),
418 line: link.line,
419 column: link.start_col + 1,
420 end_line: link.line,
421 end_column: link.end_col + 1,
422 severity: Severity::Error,
423 fix: None,
424 });
425 }
426 }
427
428 Ok(warnings)
429 }
430
431 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
432 Ok(ctx.content.to_string())
435 }
436
437 fn as_any(&self) -> &dyn std::any::Any {
438 self
439 }
440
441 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
442 where
443 Self: Sized,
444 {
445 let anchor_style = if let Some(rule_config) = config.rules.get("MD051") {
447 if let Some(style_str) = rule_config.values.get("anchor-style").and_then(|v| v.as_str()) {
448 match style_str.to_lowercase().as_str() {
449 "kramdown" => AnchorStyle::Kramdown,
450 "kramdown-gfm" => AnchorStyle::KramdownGfm,
451 "jekyll" => AnchorStyle::KramdownGfm, _ => AnchorStyle::GitHub,
453 }
454 } else {
455 AnchorStyle::GitHub
456 }
457 } else {
458 AnchorStyle::GitHub
459 };
460
461 Box::new(MD051LinkFragments::with_anchor_style(anchor_style))
462 }
463
464 fn category(&self) -> RuleCategory {
465 RuleCategory::Link
466 }
467
468 fn cross_file_scope(&self) -> CrossFileScope {
469 CrossFileScope::Workspace
470 }
471
472 fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, file_index: &mut FileIndex) {
473 let mut fragment_counts = HashMap::new();
474
475 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
477 if line_info.in_front_matter {
478 continue;
479 }
480
481 if line_info.in_code_block {
483 continue;
484 }
485
486 let content = line_info.content(ctx.content);
487
488 if content.contains('<') && (content.contains("id=") || content.contains("name=")) {
490 let mut pos = 0;
491 while pos < content.len() {
492 if let Some(start) = content[pos..].find('<') {
493 let tag_start = pos + start;
494 if let Some(end) = content[tag_start..].find('>') {
495 let tag_end = tag_start + end + 1;
496 let tag = &content[tag_start..tag_end];
497
498 if let Some(caps) = HTML_ANCHOR_PATTERN.captures(tag)
499 && let Some(id_match) = caps.get(1)
500 {
501 file_index.add_html_anchor(id_match.as_str().to_string());
502 }
503 pos = tag_end;
504 } else {
505 break;
506 }
507 } else {
508 break;
509 }
510 }
511 }
512
513 if line_info.heading.is_none() && content.contains("{") && content.contains("#") {
516 for caps in ATTR_ANCHOR_PATTERN.captures_iter(content) {
517 if let Some(id_match) = caps.get(1) {
518 file_index.add_attribute_anchor(id_match.as_str().to_string());
519 }
520 }
521 }
522
523 if let Some(heading) = &line_info.heading {
525 let fragment = self.anchor_style.generate_fragment(&heading.text);
526
527 if !fragment.is_empty() {
528 let final_fragment = if let Some(count) = fragment_counts.get_mut(&fragment) {
530 let suffix = *count;
531 *count += 1;
532 format!("{fragment}-{suffix}")
533 } else {
534 fragment_counts.insert(fragment.clone(), 1);
535 fragment
536 };
537
538 file_index.add_heading(HeadingIndex {
539 text: heading.text.clone(),
540 auto_anchor: final_fragment,
541 custom_anchor: heading.custom_id.clone(),
542 line: line_idx + 1, });
544 }
545 }
546 }
547
548 for link in &ctx.links {
550 if link.is_reference {
551 continue;
552 }
553
554 let url = &link.url;
555
556 if Self::is_external_url_fast(url) {
558 continue;
559 }
560
561 if Self::is_cross_file_link(url)
563 && let Some(fragment_pos) = url.find('#')
564 {
565 let path_part = &url[..fragment_pos];
566 let fragment = &url[fragment_pos + 1..];
567
568 if fragment.is_empty() || fragment.contains("{{") || fragment.contains("{%") {
570 continue;
571 }
572
573 file_index.add_cross_file_link(CrossFileLinkIndex {
574 target_path: path_part.to_string(),
575 fragment: fragment.to_string(),
576 line: link.line,
577 column: link.start_col + 1,
578 });
579 }
580 }
581 }
582
583 fn cross_file_check(
584 &self,
585 file_path: &Path,
586 file_index: &FileIndex,
587 workspace_index: &crate::workspace_index::WorkspaceIndex,
588 ) -> LintResult {
589 let mut warnings = Vec::new();
590
591 const MARKDOWN_EXTENSIONS: &[&str] = &[
593 ".md",
594 ".markdown",
595 ".mdx",
596 ".mkd",
597 ".mkdn",
598 ".mdown",
599 ".mdwn",
600 ".qmd",
601 ".rmd",
602 ];
603
604 for cross_link in &file_index.cross_file_links {
606 if cross_link.fragment.is_empty() {
608 continue;
609 }
610
611 let base_target_path = if let Some(parent) = file_path.parent() {
613 parent.join(&cross_link.target_path)
614 } else {
615 Path::new(&cross_link.target_path).to_path_buf()
616 };
617
618 let base_target_path = normalize_path(&base_target_path);
620
621 let target_paths_to_try = Self::resolve_path_with_extensions(&base_target_path, MARKDOWN_EXTENSIONS);
624
625 let mut target_file_index = None;
627
628 for target_path in &target_paths_to_try {
629 if let Some(index) = workspace_index.get_file(target_path) {
630 target_file_index = Some(index);
631 break;
632 }
633 }
634
635 if let Some(target_file_index) = target_file_index {
636 if !target_file_index.has_anchor(&cross_link.fragment) {
638 warnings.push(LintWarning {
639 rule_name: Some(self.name().to_string()),
640 line: cross_link.line,
641 column: cross_link.column,
642 end_line: cross_link.line,
643 end_column: cross_link.column + cross_link.target_path.len() + 1 + cross_link.fragment.len(),
644 message: format!(
645 "Link fragment '{}' not found in '{}'",
646 cross_link.fragment, cross_link.target_path
647 ),
648 severity: Severity::Error,
649 fix: None,
650 });
651 }
652 }
653 }
655
656 Ok(warnings)
657 }
658
659 fn default_config_section(&self) -> Option<(String, toml::Value)> {
660 let value: toml::Value = toml::from_str(
661 r#"
662# Anchor generation style to match your target platform
663# Options: "github" (default), "kramdown-gfm", "kramdown"
664# Note: "jekyll" is accepted as an alias for "kramdown-gfm" (backward compatibility)
665anchor-style = "github"
666"#,
667 )
668 .ok()?;
669 Some(("MD051".to_string(), value))
670 }
671}
672
673#[cfg(test)]
674mod tests {
675 use super::*;
676 use crate::lint_context::LintContext;
677
678 #[test]
679 fn test_quarto_cross_references() {
680 let rule = MD051LinkFragments::new();
681
682 let content = r#"# Test Document
684
685## Figures
686
687See [@fig-plot] for the visualization.
688
689More details in [@tbl-results] and [@sec-methods].
690
691The equation [@eq-regression] shows the relationship.
692
693Reference to [@lst-code] for implementation."#;
694 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Quarto, None);
695 let result = rule.check(&ctx).unwrap();
696 assert!(
697 result.is_empty(),
698 "Quarto cross-references (@fig-, @tbl-, @sec-, @eq-) should not trigger MD051 warnings. Got {} warnings",
699 result.len()
700 );
701
702 let content_with_anchor = r#"# Test
704
705See [link](#test) for details."#;
706 let ctx_anchor = LintContext::new(content_with_anchor, crate::config::MarkdownFlavor::Quarto, None);
707 let result_anchor = rule.check(&ctx_anchor).unwrap();
708 assert!(result_anchor.is_empty(), "Valid anchor should not trigger warning");
709
710 let content_invalid = r#"# Test
712
713See [link](#nonexistent) for details."#;
714 let ctx_invalid = LintContext::new(content_invalid, crate::config::MarkdownFlavor::Quarto, None);
715 let result_invalid = rule.check(&ctx_invalid).unwrap();
716 assert_eq!(result_invalid.len(), 1, "Invalid anchor should still trigger warning");
717 }
718
719 #[test]
721 fn test_cross_file_scope() {
722 let rule = MD051LinkFragments::new();
723 assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
724 }
725
726 #[test]
727 fn test_contribute_to_index_extracts_headings() {
728 let rule = MD051LinkFragments::new();
729 let content = "# First Heading\n\n# Second { #custom }\n\n## Third";
730 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
731
732 let mut file_index = FileIndex::new();
733 rule.contribute_to_index(&ctx, &mut file_index);
734
735 assert_eq!(file_index.headings.len(), 3);
736 assert_eq!(file_index.headings[0].text, "First Heading");
737 assert_eq!(file_index.headings[0].auto_anchor, "first-heading");
738 assert!(file_index.headings[0].custom_anchor.is_none());
739
740 assert_eq!(file_index.headings[1].text, "Second");
741 assert_eq!(file_index.headings[1].custom_anchor, Some("custom".to_string()));
742
743 assert_eq!(file_index.headings[2].text, "Third");
744 }
745
746 #[test]
747 fn test_contribute_to_index_extracts_cross_file_links() {
748 let rule = MD051LinkFragments::new();
749 let content = "See [docs](other.md#installation) and [more](../guide.md#getting-started)";
750 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
751
752 let mut file_index = FileIndex::new();
753 rule.contribute_to_index(&ctx, &mut file_index);
754
755 assert_eq!(file_index.cross_file_links.len(), 2);
756 assert_eq!(file_index.cross_file_links[0].target_path, "other.md");
757 assert_eq!(file_index.cross_file_links[0].fragment, "installation");
758 assert_eq!(file_index.cross_file_links[1].target_path, "../guide.md");
759 assert_eq!(file_index.cross_file_links[1].fragment, "getting-started");
760 }
761
762 #[test]
763 fn test_cross_file_check_valid_fragment() {
764 use crate::workspace_index::WorkspaceIndex;
765
766 let rule = MD051LinkFragments::new();
767
768 let mut workspace_index = WorkspaceIndex::new();
770 let mut target_file_index = FileIndex::new();
771 target_file_index.add_heading(HeadingIndex {
772 text: "Installation Guide".to_string(),
773 auto_anchor: "installation-guide".to_string(),
774 custom_anchor: None,
775 line: 1,
776 });
777 workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
778
779 let mut current_file_index = FileIndex::new();
781 current_file_index.add_cross_file_link(CrossFileLinkIndex {
782 target_path: "install.md".to_string(),
783 fragment: "installation-guide".to_string(),
784 line: 3,
785 column: 5,
786 });
787
788 let warnings = rule
789 .cross_file_check(Path::new("docs/readme.md"), ¤t_file_index, &workspace_index)
790 .unwrap();
791
792 assert!(warnings.is_empty());
794 }
795
796 #[test]
797 fn test_cross_file_check_invalid_fragment() {
798 use crate::workspace_index::WorkspaceIndex;
799
800 let rule = MD051LinkFragments::new();
801
802 let mut workspace_index = WorkspaceIndex::new();
804 let mut target_file_index = FileIndex::new();
805 target_file_index.add_heading(HeadingIndex {
806 text: "Installation Guide".to_string(),
807 auto_anchor: "installation-guide".to_string(),
808 custom_anchor: None,
809 line: 1,
810 });
811 workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
812
813 let mut current_file_index = FileIndex::new();
815 current_file_index.add_cross_file_link(CrossFileLinkIndex {
816 target_path: "install.md".to_string(),
817 fragment: "nonexistent".to_string(),
818 line: 3,
819 column: 5,
820 });
821
822 let warnings = rule
823 .cross_file_check(Path::new("docs/readme.md"), ¤t_file_index, &workspace_index)
824 .unwrap();
825
826 assert_eq!(warnings.len(), 1);
828 assert!(warnings[0].message.contains("nonexistent"));
829 assert!(warnings[0].message.contains("install.md"));
830 }
831
832 #[test]
833 fn test_cross_file_check_custom_anchor_match() {
834 use crate::workspace_index::WorkspaceIndex;
835
836 let rule = MD051LinkFragments::new();
837
838 let mut workspace_index = WorkspaceIndex::new();
840 let mut target_file_index = FileIndex::new();
841 target_file_index.add_heading(HeadingIndex {
842 text: "Installation Guide".to_string(),
843 auto_anchor: "installation-guide".to_string(),
844 custom_anchor: Some("install".to_string()),
845 line: 1,
846 });
847 workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
848
849 let mut current_file_index = FileIndex::new();
851 current_file_index.add_cross_file_link(CrossFileLinkIndex {
852 target_path: "install.md".to_string(),
853 fragment: "install".to_string(),
854 line: 3,
855 column: 5,
856 });
857
858 let warnings = rule
859 .cross_file_check(Path::new("docs/readme.md"), ¤t_file_index, &workspace_index)
860 .unwrap();
861
862 assert!(warnings.is_empty());
864 }
865
866 #[test]
867 fn test_cross_file_check_target_not_in_workspace() {
868 use crate::workspace_index::WorkspaceIndex;
869
870 let rule = MD051LinkFragments::new();
871
872 let workspace_index = WorkspaceIndex::new();
874
875 let mut current_file_index = FileIndex::new();
877 current_file_index.add_cross_file_link(CrossFileLinkIndex {
878 target_path: "external.md".to_string(),
879 fragment: "heading".to_string(),
880 line: 3,
881 column: 5,
882 });
883
884 let warnings = rule
885 .cross_file_check(Path::new("docs/readme.md"), ¤t_file_index, &workspace_index)
886 .unwrap();
887
888 assert!(warnings.is_empty());
890 }
891}