1use crate::rule::{CrossFileScope, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::anchor_styles::AnchorStyle;
3use crate::workspace_index::{CrossFileLinkIndex, FileIndex, HeadingIndex};
4use pulldown_cmark::LinkType;
5use regex::Regex;
6use std::collections::{HashMap, HashSet};
7use std::path::{Component, Path, PathBuf};
8use std::sync::LazyLock;
9static HTML_ANCHOR_PATTERN: LazyLock<Regex> =
12 LazyLock::new(|| Regex::new(r#"\b(?:id|name)\s*=\s*["']([^"']+)["']"#).unwrap());
13
14static ATTR_ANCHOR_PATTERN: LazyLock<Regex> =
18 LazyLock::new(|| Regex::new(r#"\{\s*#([a-zA-Z][a-zA-Z0-9_-]*)[^}]*\}"#).unwrap());
19
20fn normalize_path(path: &Path) -> PathBuf {
22 let mut result = PathBuf::new();
23 for component in path.components() {
24 match component {
25 Component::CurDir => {} Component::ParentDir => {
27 result.pop(); }
29 c => result.push(c.as_os_str()),
30 }
31 }
32 result
33}
34
35#[derive(Clone)]
42pub struct MD051LinkFragments {
43 anchor_style: AnchorStyle,
45}
46
47impl Default for MD051LinkFragments {
48 fn default() -> Self {
49 Self::new()
50 }
51}
52
53impl MD051LinkFragments {
54 pub fn new() -> Self {
55 Self {
56 anchor_style: AnchorStyle::GitHub,
57 }
58 }
59
60 pub fn with_anchor_style(style: AnchorStyle) -> Self {
62 Self { anchor_style: style }
63 }
64
65 fn extract_headings_from_context(
69 &self,
70 ctx: &crate::lint_context::LintContext,
71 ) -> (HashSet<String>, HashSet<String>) {
72 let mut markdown_headings = HashSet::with_capacity(32);
73 let mut html_anchors = HashSet::with_capacity(16);
74 let mut fragment_counts = std::collections::HashMap::new();
75
76 for line_info in &ctx.lines {
77 if line_info.in_front_matter {
78 continue;
79 }
80
81 if line_info.in_code_block {
83 continue;
84 }
85
86 let content = line_info.content(ctx.content);
87 let bytes = content.as_bytes();
88
89 if bytes.contains(&b'<') && (content.contains("id=") || content.contains("name=")) {
91 let mut pos = 0;
94 while pos < content.len() {
95 if let Some(start) = content[pos..].find('<') {
96 let tag_start = pos + start;
97 if let Some(end) = content[tag_start..].find('>') {
98 let tag_end = tag_start + end + 1;
99 let tag = &content[tag_start..tag_end];
100
101 if let Some(caps) = HTML_ANCHOR_PATTERN.find(tag) {
103 let matched_text = caps.as_str();
104 if let Some(caps) = HTML_ANCHOR_PATTERN.captures(matched_text)
105 && let Some(id_match) = caps.get(1)
106 {
107 let id = id_match.as_str();
108 if !id.is_empty() {
109 html_anchors.insert(id.to_string());
110 }
111 }
112 }
113 pos = tag_end;
114 } else {
115 break;
116 }
117 } else {
118 break;
119 }
120 }
121 }
122
123 if line_info.heading.is_none() && content.contains('{') && content.contains('#') {
126 for caps in ATTR_ANCHOR_PATTERN.captures_iter(content) {
127 if let Some(id_match) = caps.get(1) {
128 markdown_headings.insert(id_match.as_str().to_lowercase());
130 }
131 }
132 }
133
134 if let Some(heading) = &line_info.heading {
136 if let Some(custom_id) = &heading.custom_id {
138 markdown_headings.insert(custom_id.to_lowercase());
139 }
140
141 let fragment = self.anchor_style.generate_fragment(&heading.text);
144
145 if !fragment.is_empty() {
146 let final_fragment = if let Some(count) = fragment_counts.get_mut(&fragment) {
148 let suffix = *count;
149 *count += 1;
150 format!("{fragment}-{suffix}")
151 } else {
152 fragment_counts.insert(fragment.clone(), 1);
153 fragment
154 };
155 markdown_headings.insert(final_fragment);
156 }
157 }
158 }
159
160 (markdown_headings, html_anchors)
161 }
162
163 #[inline]
165 fn is_external_url_fast(url: &str) -> bool {
166 url.starts_with("http://")
168 || url.starts_with("https://")
169 || url.starts_with("ftp://")
170 || url.starts_with("mailto:")
171 || url.starts_with("tel:")
172 || url.starts_with("//")
173 }
174
175 #[inline]
177 fn is_cross_file_link(url: &str) -> bool {
178 if let Some(fragment_pos) = url.find('#') {
179 let path_part = &url[..fragment_pos];
180
181 if path_part.is_empty() {
183 return false;
184 }
185
186 if let Some(tag_start) = path_part.find("{%")
192 && path_part[tag_start + 2..].contains("%}")
193 {
194 return true;
195 }
196 if let Some(var_start) = path_part.find("{{")
197 && path_part[var_start + 2..].contains("}}")
198 {
199 return true;
200 }
201
202 if path_part.starts_with('/') {
205 return true;
206 }
207
208 path_part.contains('.')
213 && (
214 {
216 let clean_path = path_part.split('?').next().unwrap_or(path_part);
217 if let Some(after_dot) = clean_path.strip_prefix('.') {
219 let dots_count = clean_path.matches('.').count();
220 if dots_count == 1 {
221 !after_dot.is_empty() && after_dot.len() <= 10 &&
224 after_dot.chars().all(|c| c.is_ascii_alphanumeric())
225 } else {
226 clean_path.split('.').next_back().is_some_and(|ext| {
228 !ext.is_empty() && ext.len() <= 10 && ext.chars().all(|c| c.is_ascii_alphanumeric())
229 })
230 }
231 } else {
232 clean_path.split('.').next_back().is_some_and(|ext| {
234 !ext.is_empty() && ext.len() <= 10 && ext.chars().all(|c| c.is_ascii_alphanumeric())
235 })
236 }
237 } ||
238 path_part.contains('/') || path_part.contains('\\') ||
240 path_part.starts_with("./") || path_part.starts_with("../")
242 )
243 } else {
244 false
245 }
246 }
247}
248
249impl Rule for MD051LinkFragments {
250 fn name(&self) -> &'static str {
251 "MD051"
252 }
253
254 fn description(&self) -> &'static str {
255 "Link fragments should reference valid headings"
256 }
257
258 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
259 if !ctx.likely_has_links_or_images() {
261 return true;
262 }
263 !ctx.has_char('#')
265 }
266
267 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
268 let mut warnings = Vec::new();
269
270 if ctx.content.is_empty() || ctx.links.is_empty() || self.should_skip(ctx) {
271 return Ok(warnings);
272 }
273
274 let (markdown_headings, html_anchors) = self.extract_headings_from_context(ctx);
275
276 for link in &ctx.links {
277 if link.is_reference {
278 continue;
279 }
280
281 if matches!(link.link_type, LinkType::WikiLink { .. }) {
283 continue;
284 }
285
286 if ctx.is_in_jinja_range(link.byte_offset) {
288 continue;
289 }
290
291 let url = &link.url;
292
293 if !url.contains('#') || Self::is_external_url_fast(url) {
295 continue;
296 }
297
298 if url.contains("{{#") && url.contains("}}") {
301 continue;
302 }
303
304 if url.starts_with('@') {
308 continue;
309 }
310
311 if Self::is_cross_file_link(url) {
313 continue;
314 }
315
316 let Some(fragment_pos) = url.find('#') else {
317 continue;
318 };
319
320 let fragment = &url[fragment_pos + 1..];
321
322 if (url.contains("{{") && fragment.contains('|')) || fragment.ends_with("}}") || fragment.ends_with("%}") {
324 continue;
325 }
326
327 if fragment.is_empty() {
328 continue;
329 }
330
331 let found = if html_anchors.contains(fragment) {
334 true
335 } else {
336 let fragment_lower = fragment.to_lowercase();
337 markdown_headings.contains(&fragment_lower)
338 };
339
340 if !found {
341 warnings.push(LintWarning {
342 rule_name: Some(self.name().to_string()),
343 message: format!("Link anchor '#{fragment}' does not exist in document headings"),
344 line: link.line,
345 column: link.start_col + 1,
346 end_line: link.line,
347 end_column: link.end_col + 1,
348 severity: Severity::Warning,
349 fix: None,
350 });
351 }
352 }
353
354 Ok(warnings)
355 }
356
357 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
358 Ok(ctx.content.to_string())
361 }
362
363 fn as_any(&self) -> &dyn std::any::Any {
364 self
365 }
366
367 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
368 where
369 Self: Sized,
370 {
371 let anchor_style = if let Some(rule_config) = config.rules.get("MD051") {
373 if let Some(style_str) = rule_config.values.get("anchor-style").and_then(|v| v.as_str()) {
374 match style_str.to_lowercase().as_str() {
375 "kramdown" => AnchorStyle::Kramdown,
376 "kramdown-gfm" => AnchorStyle::KramdownGfm,
377 "jekyll" => AnchorStyle::KramdownGfm, _ => AnchorStyle::GitHub,
379 }
380 } else {
381 AnchorStyle::GitHub
382 }
383 } else {
384 AnchorStyle::GitHub
385 };
386
387 Box::new(MD051LinkFragments::with_anchor_style(anchor_style))
388 }
389
390 fn category(&self) -> RuleCategory {
391 RuleCategory::Link
392 }
393
394 fn cross_file_scope(&self) -> CrossFileScope {
395 CrossFileScope::Workspace
396 }
397
398 fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, file_index: &mut FileIndex) {
399 let mut fragment_counts = HashMap::new();
400
401 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
403 if line_info.in_front_matter {
404 continue;
405 }
406
407 if line_info.in_code_block {
409 continue;
410 }
411
412 let content = line_info.content(ctx.content);
413
414 if content.contains('<') && (content.contains("id=") || content.contains("name=")) {
416 let mut pos = 0;
417 while pos < content.len() {
418 if let Some(start) = content[pos..].find('<') {
419 let tag_start = pos + start;
420 if let Some(end) = content[tag_start..].find('>') {
421 let tag_end = tag_start + end + 1;
422 let tag = &content[tag_start..tag_end];
423
424 if let Some(caps) = HTML_ANCHOR_PATTERN.captures(tag)
425 && let Some(id_match) = caps.get(1)
426 {
427 file_index.add_html_anchor(id_match.as_str().to_string());
428 }
429 pos = tag_end;
430 } else {
431 break;
432 }
433 } else {
434 break;
435 }
436 }
437 }
438
439 if line_info.heading.is_none() && content.contains("{") && content.contains("#") {
442 for caps in ATTR_ANCHOR_PATTERN.captures_iter(content) {
443 if let Some(id_match) = caps.get(1) {
444 file_index.add_attribute_anchor(id_match.as_str().to_string());
445 }
446 }
447 }
448
449 if let Some(heading) = &line_info.heading {
451 let fragment = self.anchor_style.generate_fragment(&heading.text);
452
453 if !fragment.is_empty() {
454 let final_fragment = if let Some(count) = fragment_counts.get_mut(&fragment) {
456 let suffix = *count;
457 *count += 1;
458 format!("{fragment}-{suffix}")
459 } else {
460 fragment_counts.insert(fragment.clone(), 1);
461 fragment
462 };
463
464 file_index.add_heading(HeadingIndex {
465 text: heading.text.clone(),
466 auto_anchor: final_fragment,
467 custom_anchor: heading.custom_id.clone(),
468 line: line_idx + 1, });
470 }
471 }
472 }
473
474 for link in &ctx.links {
476 if link.is_reference {
477 continue;
478 }
479
480 let url = &link.url;
481
482 if Self::is_external_url_fast(url) {
484 continue;
485 }
486
487 if Self::is_cross_file_link(url)
489 && let Some(fragment_pos) = url.find('#')
490 {
491 let path_part = &url[..fragment_pos];
492 let fragment = &url[fragment_pos + 1..];
493
494 if fragment.is_empty() || fragment.contains("{{") || fragment.contains("{%") {
496 continue;
497 }
498
499 file_index.add_cross_file_link(CrossFileLinkIndex {
500 target_path: path_part.to_string(),
501 fragment: fragment.to_string(),
502 line: link.line,
503 column: link.start_col + 1,
504 });
505 }
506 }
507 }
508
509 fn cross_file_check(
510 &self,
511 file_path: &Path,
512 file_index: &FileIndex,
513 workspace_index: &crate::workspace_index::WorkspaceIndex,
514 ) -> LintResult {
515 let mut warnings = Vec::new();
516
517 for cross_link in &file_index.cross_file_links {
519 if cross_link.fragment.is_empty() {
521 continue;
522 }
523
524 let target_path = if let Some(parent) = file_path.parent() {
526 parent.join(&cross_link.target_path)
527 } else {
528 Path::new(&cross_link.target_path).to_path_buf()
529 };
530
531 let target_path = normalize_path(&target_path);
533
534 if let Some(target_file_index) = workspace_index.get_file(&target_path) {
536 if !target_file_index.has_anchor(&cross_link.fragment) {
538 warnings.push(LintWarning {
539 rule_name: Some(self.name().to_string()),
540 line: cross_link.line,
541 column: cross_link.column,
542 end_line: cross_link.line,
543 end_column: cross_link.column + cross_link.target_path.len() + 1 + cross_link.fragment.len(),
544 message: format!(
545 "Link fragment '{}' not found in '{}'",
546 cross_link.fragment, cross_link.target_path
547 ),
548 severity: Severity::Warning,
549 fix: None,
550 });
551 }
552 }
553 }
555
556 Ok(warnings)
557 }
558
559 fn default_config_section(&self) -> Option<(String, toml::Value)> {
560 let value: toml::Value = toml::from_str(
561 r#"
562# Anchor generation style to match your target platform
563# Options: "github" (default), "kramdown-gfm", "kramdown"
564# Note: "jekyll" is accepted as an alias for "kramdown-gfm" (backward compatibility)
565anchor-style = "github"
566"#,
567 )
568 .ok()?;
569 Some(("MD051".to_string(), value))
570 }
571}
572
573#[cfg(test)]
574mod tests {
575 use super::*;
576 use crate::lint_context::LintContext;
577
578 #[test]
579 fn test_quarto_cross_references() {
580 let rule = MD051LinkFragments::new();
581
582 let content = r#"# Test Document
584
585## Figures
586
587See [@fig-plot] for the visualization.
588
589More details in [@tbl-results] and [@sec-methods].
590
591The equation [@eq-regression] shows the relationship.
592
593Reference to [@lst-code] for implementation."#;
594 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Quarto, None);
595 let result = rule.check(&ctx).unwrap();
596 assert!(
597 result.is_empty(),
598 "Quarto cross-references (@fig-, @tbl-, @sec-, @eq-) should not trigger MD051 warnings. Got {} warnings",
599 result.len()
600 );
601
602 let content_with_anchor = r#"# Test
604
605See [link](#test) for details."#;
606 let ctx_anchor = LintContext::new(content_with_anchor, crate::config::MarkdownFlavor::Quarto, None);
607 let result_anchor = rule.check(&ctx_anchor).unwrap();
608 assert!(result_anchor.is_empty(), "Valid anchor should not trigger warning");
609
610 let content_invalid = r#"# Test
612
613See [link](#nonexistent) for details."#;
614 let ctx_invalid = LintContext::new(content_invalid, crate::config::MarkdownFlavor::Quarto, None);
615 let result_invalid = rule.check(&ctx_invalid).unwrap();
616 assert_eq!(result_invalid.len(), 1, "Invalid anchor should still trigger warning");
617 }
618
619 #[test]
621 fn test_cross_file_scope() {
622 let rule = MD051LinkFragments::new();
623 assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
624 }
625
626 #[test]
627 fn test_contribute_to_index_extracts_headings() {
628 let rule = MD051LinkFragments::new();
629 let content = "# First Heading\n\n# Second { #custom }\n\n## Third";
630 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
631
632 let mut file_index = FileIndex::new();
633 rule.contribute_to_index(&ctx, &mut file_index);
634
635 assert_eq!(file_index.headings.len(), 3);
636 assert_eq!(file_index.headings[0].text, "First Heading");
637 assert_eq!(file_index.headings[0].auto_anchor, "first-heading");
638 assert!(file_index.headings[0].custom_anchor.is_none());
639
640 assert_eq!(file_index.headings[1].text, "Second");
641 assert_eq!(file_index.headings[1].custom_anchor, Some("custom".to_string()));
642
643 assert_eq!(file_index.headings[2].text, "Third");
644 }
645
646 #[test]
647 fn test_contribute_to_index_extracts_cross_file_links() {
648 let rule = MD051LinkFragments::new();
649 let content = "See [docs](other.md#installation) and [more](../guide.md#getting-started)";
650 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
651
652 let mut file_index = FileIndex::new();
653 rule.contribute_to_index(&ctx, &mut file_index);
654
655 assert_eq!(file_index.cross_file_links.len(), 2);
656 assert_eq!(file_index.cross_file_links[0].target_path, "other.md");
657 assert_eq!(file_index.cross_file_links[0].fragment, "installation");
658 assert_eq!(file_index.cross_file_links[1].target_path, "../guide.md");
659 assert_eq!(file_index.cross_file_links[1].fragment, "getting-started");
660 }
661
662 #[test]
663 fn test_cross_file_check_valid_fragment() {
664 use crate::workspace_index::WorkspaceIndex;
665
666 let rule = MD051LinkFragments::new();
667
668 let mut workspace_index = WorkspaceIndex::new();
670 let mut target_file_index = FileIndex::new();
671 target_file_index.add_heading(HeadingIndex {
672 text: "Installation Guide".to_string(),
673 auto_anchor: "installation-guide".to_string(),
674 custom_anchor: None,
675 line: 1,
676 });
677 workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
678
679 let mut current_file_index = FileIndex::new();
681 current_file_index.add_cross_file_link(CrossFileLinkIndex {
682 target_path: "install.md".to_string(),
683 fragment: "installation-guide".to_string(),
684 line: 3,
685 column: 5,
686 });
687
688 let warnings = rule
689 .cross_file_check(Path::new("docs/readme.md"), ¤t_file_index, &workspace_index)
690 .unwrap();
691
692 assert!(warnings.is_empty());
694 }
695
696 #[test]
697 fn test_cross_file_check_invalid_fragment() {
698 use crate::workspace_index::WorkspaceIndex;
699
700 let rule = MD051LinkFragments::new();
701
702 let mut workspace_index = WorkspaceIndex::new();
704 let mut target_file_index = FileIndex::new();
705 target_file_index.add_heading(HeadingIndex {
706 text: "Installation Guide".to_string(),
707 auto_anchor: "installation-guide".to_string(),
708 custom_anchor: None,
709 line: 1,
710 });
711 workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
712
713 let mut current_file_index = FileIndex::new();
715 current_file_index.add_cross_file_link(CrossFileLinkIndex {
716 target_path: "install.md".to_string(),
717 fragment: "nonexistent".to_string(),
718 line: 3,
719 column: 5,
720 });
721
722 let warnings = rule
723 .cross_file_check(Path::new("docs/readme.md"), ¤t_file_index, &workspace_index)
724 .unwrap();
725
726 assert_eq!(warnings.len(), 1);
728 assert!(warnings[0].message.contains("nonexistent"));
729 assert!(warnings[0].message.contains("install.md"));
730 }
731
732 #[test]
733 fn test_cross_file_check_custom_anchor_match() {
734 use crate::workspace_index::WorkspaceIndex;
735
736 let rule = MD051LinkFragments::new();
737
738 let mut workspace_index = WorkspaceIndex::new();
740 let mut target_file_index = FileIndex::new();
741 target_file_index.add_heading(HeadingIndex {
742 text: "Installation Guide".to_string(),
743 auto_anchor: "installation-guide".to_string(),
744 custom_anchor: Some("install".to_string()),
745 line: 1,
746 });
747 workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
748
749 let mut current_file_index = FileIndex::new();
751 current_file_index.add_cross_file_link(CrossFileLinkIndex {
752 target_path: "install.md".to_string(),
753 fragment: "install".to_string(),
754 line: 3,
755 column: 5,
756 });
757
758 let warnings = rule
759 .cross_file_check(Path::new("docs/readme.md"), ¤t_file_index, &workspace_index)
760 .unwrap();
761
762 assert!(warnings.is_empty());
764 }
765
766 #[test]
767 fn test_cross_file_check_target_not_in_workspace() {
768 use crate::workspace_index::WorkspaceIndex;
769
770 let rule = MD051LinkFragments::new();
771
772 let workspace_index = WorkspaceIndex::new();
774
775 let mut current_file_index = FileIndex::new();
777 current_file_index.add_cross_file_link(CrossFileLinkIndex {
778 target_path: "external.md".to_string(),
779 fragment: "heading".to_string(),
780 line: 3,
781 column: 5,
782 });
783
784 let warnings = rule
785 .cross_file_check(Path::new("docs/readme.md"), ¤t_file_index, &workspace_index)
786 .unwrap();
787
788 assert!(warnings.is_empty());
790 }
791}