1use crate::rule::{CrossFileScope, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::anchor_styles::AnchorStyle;
3use crate::workspace_index::{CrossFileLinkIndex, FileIndex, HeadingIndex};
4use pulldown_cmark::LinkType;
5use regex::Regex;
6use std::collections::{HashMap, HashSet};
7use std::path::{Component, Path, PathBuf};
8use std::sync::LazyLock;
9static HTML_ANCHOR_PATTERN: LazyLock<Regex> =
12 LazyLock::new(|| Regex::new(r#"\b(?:id|name)\s*=\s*["']([^"']+)["']"#).unwrap());
13
14fn normalize_path(path: &Path) -> PathBuf {
16 let mut result = PathBuf::new();
17 for component in path.components() {
18 match component {
19 Component::CurDir => {} Component::ParentDir => {
21 result.pop(); }
23 c => result.push(c.as_os_str()),
24 }
25 }
26 result
27}
28
29#[derive(Clone)]
36pub struct MD051LinkFragments {
37 anchor_style: AnchorStyle,
39}
40
41impl Default for MD051LinkFragments {
42 fn default() -> Self {
43 Self::new()
44 }
45}
46
47impl MD051LinkFragments {
48 pub fn new() -> Self {
49 Self {
50 anchor_style: AnchorStyle::GitHub,
51 }
52 }
53
54 pub fn with_anchor_style(style: AnchorStyle) -> Self {
56 Self { anchor_style: style }
57 }
58
59 fn extract_headings_from_context(
63 &self,
64 ctx: &crate::lint_context::LintContext,
65 ) -> (HashSet<String>, HashSet<String>) {
66 let mut markdown_headings = HashSet::with_capacity(32);
67 let mut html_anchors = HashSet::with_capacity(16);
68 let mut fragment_counts = std::collections::HashMap::new();
69
70 for line_info in &ctx.lines {
71 if line_info.in_front_matter {
72 continue;
73 }
74
75 if !line_info.in_code_block {
77 let content = line_info.content(ctx.content);
78 let bytes = content.as_bytes();
79
80 if bytes.contains(&b'<') && (content.contains("id=") || content.contains("name=")) {
82 let mut pos = 0;
85 while pos < content.len() {
86 if let Some(start) = content[pos..].find('<') {
87 let tag_start = pos + start;
88 if let Some(end) = content[tag_start..].find('>') {
89 let tag_end = tag_start + end + 1;
90 let tag = &content[tag_start..tag_end];
91
92 if let Some(caps) = HTML_ANCHOR_PATTERN.find(tag) {
94 let matched_text = caps.as_str();
95 if let Some(caps) = HTML_ANCHOR_PATTERN.captures(matched_text)
96 && let Some(id_match) = caps.get(1)
97 {
98 let id = id_match.as_str();
99 if !id.is_empty() {
100 html_anchors.insert(id.to_string());
101 }
102 }
103 }
104 pos = tag_end;
105 } else {
106 break;
107 }
108 } else {
109 break;
110 }
111 }
112 }
113 }
114
115 if let Some(heading) = &line_info.heading {
117 if let Some(custom_id) = &heading.custom_id {
119 markdown_headings.insert(custom_id.to_lowercase());
120 }
121
122 let fragment = self.anchor_style.generate_fragment(&heading.text);
125
126 if !fragment.is_empty() {
127 let final_fragment = if let Some(count) = fragment_counts.get_mut(&fragment) {
129 let suffix = *count;
130 *count += 1;
131 format!("{fragment}-{suffix}")
132 } else {
133 fragment_counts.insert(fragment.clone(), 1);
134 fragment
135 };
136 markdown_headings.insert(final_fragment);
137 }
138 }
139 }
140
141 (markdown_headings, html_anchors)
142 }
143
144 #[inline]
146 fn is_external_url_fast(url: &str) -> bool {
147 url.starts_with("http://")
149 || url.starts_with("https://")
150 || url.starts_with("ftp://")
151 || url.starts_with("mailto:")
152 || url.starts_with("tel:")
153 || url.starts_with("//")
154 }
155
156 #[inline]
158 fn is_cross_file_link(url: &str) -> bool {
159 if let Some(fragment_pos) = url.find('#') {
160 let path_part = &url[..fragment_pos];
161
162 if path_part.is_empty() {
164 return false;
165 }
166
167 if let Some(tag_start) = path_part.find("{%")
173 && path_part[tag_start + 2..].contains("%}")
174 {
175 return true;
176 }
177 if let Some(var_start) = path_part.find("{{")
178 && path_part[var_start + 2..].contains("}}")
179 {
180 return true;
181 }
182
183 if path_part.starts_with('/') {
186 return true;
187 }
188
189 path_part.contains('.')
194 && (
195 {
197 let clean_path = path_part.split('?').next().unwrap_or(path_part);
198 if let Some(after_dot) = clean_path.strip_prefix('.') {
200 let dots_count = clean_path.matches('.').count();
201 if dots_count == 1 {
202 !after_dot.is_empty() && after_dot.len() <= 10 &&
205 after_dot.chars().all(|c| c.is_ascii_alphanumeric()) &&
206 (after_dot.len() <= 4 || matches!(after_dot, "html" | "json" | "yaml" | "toml"))
208 } else {
209 clean_path.split('.').next_back().is_some_and(|ext| {
211 !ext.is_empty() && ext.len() <= 10 && ext.chars().all(|c| c.is_ascii_alphanumeric())
212 })
213 }
214 } else {
215 clean_path.split('.').next_back().is_some_and(|ext| {
217 !ext.is_empty() && ext.len() <= 10 && ext.chars().all(|c| c.is_ascii_alphanumeric())
218 })
219 }
220 } ||
221 path_part.contains('/') || path_part.contains('\\') ||
223 path_part.starts_with("./") || path_part.starts_with("../")
225 )
226 } else {
227 false
228 }
229 }
230}
231
232impl Rule for MD051LinkFragments {
233 fn name(&self) -> &'static str {
234 "MD051"
235 }
236
237 fn description(&self) -> &'static str {
238 "Link fragments should reference valid headings"
239 }
240
241 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
242 if !ctx.likely_has_links_or_images() {
244 return true;
245 }
246 !ctx.has_char('#')
248 }
249
250 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
251 let mut warnings = Vec::new();
252
253 if ctx.content.is_empty() || ctx.links.is_empty() || self.should_skip(ctx) {
254 return Ok(warnings);
255 }
256
257 let (markdown_headings, html_anchors) = self.extract_headings_from_context(ctx);
258
259 for link in &ctx.links {
260 if link.is_reference {
261 continue;
262 }
263
264 if matches!(link.link_type, LinkType::WikiLink { .. }) {
266 continue;
267 }
268
269 if ctx.is_in_jinja_range(link.byte_offset) {
271 continue;
272 }
273
274 let url = &link.url;
275
276 if !url.contains('#') || Self::is_external_url_fast(url) {
278 continue;
279 }
280
281 if url.contains("{{#") && url.contains("}}") {
284 continue;
285 }
286
287 if url.starts_with('@') {
291 continue;
292 }
293
294 if Self::is_cross_file_link(url) {
296 continue;
297 }
298
299 let Some(fragment_pos) = url.find('#') else {
300 continue;
301 };
302
303 let fragment = &url[fragment_pos + 1..];
304
305 if (url.contains("{{") && fragment.contains('|')) || fragment.ends_with("}}") || fragment.ends_with("%}") {
307 continue;
308 }
309
310 if fragment.is_empty() {
311 continue;
312 }
313
314 let found = if html_anchors.contains(fragment) {
317 true
318 } else {
319 let fragment_lower = fragment.to_lowercase();
320 markdown_headings.contains(&fragment_lower)
321 };
322
323 if !found {
324 warnings.push(LintWarning {
325 rule_name: Some(self.name().to_string()),
326 message: format!("Link anchor '#{fragment}' does not exist in document headings"),
327 line: link.line,
328 column: link.start_col + 1,
329 end_line: link.line,
330 end_column: link.end_col + 1,
331 severity: Severity::Warning,
332 fix: None,
333 });
334 }
335 }
336
337 Ok(warnings)
338 }
339
340 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
341 Ok(ctx.content.to_string())
344 }
345
346 fn as_any(&self) -> &dyn std::any::Any {
347 self
348 }
349
350 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
351 where
352 Self: Sized,
353 {
354 let anchor_style = if let Some(rule_config) = config.rules.get("MD051") {
356 if let Some(style_str) = rule_config.values.get("anchor-style").and_then(|v| v.as_str()) {
357 match style_str.to_lowercase().as_str() {
358 "kramdown" => AnchorStyle::Kramdown,
359 "kramdown-gfm" => AnchorStyle::KramdownGfm,
360 "jekyll" => AnchorStyle::KramdownGfm, _ => AnchorStyle::GitHub,
362 }
363 } else {
364 AnchorStyle::GitHub
365 }
366 } else {
367 AnchorStyle::GitHub
368 };
369
370 Box::new(MD051LinkFragments::with_anchor_style(anchor_style))
371 }
372
373 fn category(&self) -> RuleCategory {
374 RuleCategory::Link
375 }
376
377 fn cross_file_scope(&self) -> CrossFileScope {
378 CrossFileScope::Workspace
379 }
380
381 fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, file_index: &mut FileIndex) {
382 let mut fragment_counts = HashMap::new();
383
384 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
386 if line_info.in_front_matter {
387 continue;
388 }
389
390 if let Some(heading) = &line_info.heading {
391 let fragment = self.anchor_style.generate_fragment(&heading.text);
392
393 if !fragment.is_empty() {
394 let final_fragment = if let Some(count) = fragment_counts.get_mut(&fragment) {
396 let suffix = *count;
397 *count += 1;
398 format!("{fragment}-{suffix}")
399 } else {
400 fragment_counts.insert(fragment.clone(), 1);
401 fragment
402 };
403
404 file_index.add_heading(HeadingIndex {
405 text: heading.text.clone(),
406 auto_anchor: final_fragment,
407 custom_anchor: heading.custom_id.clone(),
408 line: line_idx + 1, });
410 }
411 }
412 }
413
414 for link in &ctx.links {
416 if link.is_reference {
417 continue;
418 }
419
420 let url = &link.url;
421
422 if Self::is_external_url_fast(url) {
424 continue;
425 }
426
427 if Self::is_cross_file_link(url)
429 && let Some(fragment_pos) = url.find('#')
430 {
431 let path_part = &url[..fragment_pos];
432 let fragment = &url[fragment_pos + 1..];
433
434 if fragment.is_empty() || fragment.contains("{{") || fragment.contains("{%") {
436 continue;
437 }
438
439 file_index.add_cross_file_link(CrossFileLinkIndex {
440 target_path: path_part.to_string(),
441 fragment: fragment.to_string(),
442 line: link.line,
443 column: link.start_col + 1,
444 });
445 }
446 }
447 }
448
449 fn cross_file_check(
450 &self,
451 file_path: &Path,
452 file_index: &FileIndex,
453 workspace_index: &crate::workspace_index::WorkspaceIndex,
454 ) -> LintResult {
455 let mut warnings = Vec::new();
456
457 for cross_link in &file_index.cross_file_links {
459 if cross_link.fragment.is_empty() {
461 continue;
462 }
463
464 let target_path = if let Some(parent) = file_path.parent() {
466 parent.join(&cross_link.target_path)
467 } else {
468 Path::new(&cross_link.target_path).to_path_buf()
469 };
470
471 let target_path = normalize_path(&target_path);
473
474 if let Some(target_file_index) = workspace_index.get_file(&target_path) {
476 if !target_file_index.has_anchor(&cross_link.fragment) {
478 warnings.push(LintWarning {
479 rule_name: Some(self.name().to_string()),
480 line: cross_link.line,
481 column: cross_link.column,
482 end_line: cross_link.line,
483 end_column: cross_link.column + cross_link.target_path.len() + 1 + cross_link.fragment.len(),
484 message: format!(
485 "Link fragment '{}' not found in '{}'",
486 cross_link.fragment, cross_link.target_path
487 ),
488 severity: Severity::Warning,
489 fix: None,
490 });
491 }
492 }
493 }
495
496 Ok(warnings)
497 }
498
499 fn default_config_section(&self) -> Option<(String, toml::Value)> {
500 let value: toml::Value = toml::from_str(
501 r#"
502# Anchor generation style to match your target platform
503# Options: "github" (default), "kramdown-gfm", "kramdown"
504# Note: "jekyll" is accepted as an alias for "kramdown-gfm" (backward compatibility)
505anchor-style = "github"
506"#,
507 )
508 .ok()?;
509 Some(("MD051".to_string(), value))
510 }
511}
512
513#[cfg(test)]
514mod tests {
515 use super::*;
516 use crate::lint_context::LintContext;
517
518 #[test]
519 fn test_quarto_cross_references() {
520 let rule = MD051LinkFragments::new();
521
522 let content = r#"# Test Document
524
525## Figures
526
527See [@fig-plot] for the visualization.
528
529More details in [@tbl-results] and [@sec-methods].
530
531The equation [@eq-regression] shows the relationship.
532
533Reference to [@lst-code] for implementation."#;
534 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Quarto);
535 let result = rule.check(&ctx).unwrap();
536 assert!(
537 result.is_empty(),
538 "Quarto cross-references (@fig-, @tbl-, @sec-, @eq-) should not trigger MD051 warnings. Got {} warnings",
539 result.len()
540 );
541
542 let content_with_anchor = r#"# Test
544
545See [link](#test) for details."#;
546 let ctx_anchor = LintContext::new(content_with_anchor, crate::config::MarkdownFlavor::Quarto);
547 let result_anchor = rule.check(&ctx_anchor).unwrap();
548 assert!(result_anchor.is_empty(), "Valid anchor should not trigger warning");
549
550 let content_invalid = r#"# Test
552
553See [link](#nonexistent) for details."#;
554 let ctx_invalid = LintContext::new(content_invalid, crate::config::MarkdownFlavor::Quarto);
555 let result_invalid = rule.check(&ctx_invalid).unwrap();
556 assert_eq!(result_invalid.len(), 1, "Invalid anchor should still trigger warning");
557 }
558
559 #[test]
561 fn test_cross_file_scope() {
562 let rule = MD051LinkFragments::new();
563 assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
564 }
565
566 #[test]
567 fn test_contribute_to_index_extracts_headings() {
568 let rule = MD051LinkFragments::new();
569 let content = "# First Heading\n\n# Second { #custom }\n\n## Third";
570 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
571
572 let mut file_index = FileIndex::new();
573 rule.contribute_to_index(&ctx, &mut file_index);
574
575 assert_eq!(file_index.headings.len(), 3);
576 assert_eq!(file_index.headings[0].text, "First Heading");
577 assert_eq!(file_index.headings[0].auto_anchor, "first-heading");
578 assert!(file_index.headings[0].custom_anchor.is_none());
579
580 assert_eq!(file_index.headings[1].text, "Second");
581 assert_eq!(file_index.headings[1].custom_anchor, Some("custom".to_string()));
582
583 assert_eq!(file_index.headings[2].text, "Third");
584 }
585
586 #[test]
587 fn test_contribute_to_index_extracts_cross_file_links() {
588 let rule = MD051LinkFragments::new();
589 let content = "See [docs](other.md#installation) and [more](../guide.md#getting-started)";
590 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
591
592 let mut file_index = FileIndex::new();
593 rule.contribute_to_index(&ctx, &mut file_index);
594
595 assert_eq!(file_index.cross_file_links.len(), 2);
596 assert_eq!(file_index.cross_file_links[0].target_path, "other.md");
597 assert_eq!(file_index.cross_file_links[0].fragment, "installation");
598 assert_eq!(file_index.cross_file_links[1].target_path, "../guide.md");
599 assert_eq!(file_index.cross_file_links[1].fragment, "getting-started");
600 }
601
602 #[test]
603 fn test_cross_file_check_valid_fragment() {
604 use crate::workspace_index::WorkspaceIndex;
605
606 let rule = MD051LinkFragments::new();
607
608 let mut workspace_index = WorkspaceIndex::new();
610 let mut target_file_index = FileIndex::new();
611 target_file_index.add_heading(HeadingIndex {
612 text: "Installation Guide".to_string(),
613 auto_anchor: "installation-guide".to_string(),
614 custom_anchor: None,
615 line: 1,
616 });
617 workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
618
619 let mut current_file_index = FileIndex::new();
621 current_file_index.add_cross_file_link(CrossFileLinkIndex {
622 target_path: "install.md".to_string(),
623 fragment: "installation-guide".to_string(),
624 line: 3,
625 column: 5,
626 });
627
628 let warnings = rule
629 .cross_file_check(Path::new("docs/readme.md"), ¤t_file_index, &workspace_index)
630 .unwrap();
631
632 assert!(warnings.is_empty());
634 }
635
636 #[test]
637 fn test_cross_file_check_invalid_fragment() {
638 use crate::workspace_index::WorkspaceIndex;
639
640 let rule = MD051LinkFragments::new();
641
642 let mut workspace_index = WorkspaceIndex::new();
644 let mut target_file_index = FileIndex::new();
645 target_file_index.add_heading(HeadingIndex {
646 text: "Installation Guide".to_string(),
647 auto_anchor: "installation-guide".to_string(),
648 custom_anchor: None,
649 line: 1,
650 });
651 workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
652
653 let mut current_file_index = FileIndex::new();
655 current_file_index.add_cross_file_link(CrossFileLinkIndex {
656 target_path: "install.md".to_string(),
657 fragment: "nonexistent".to_string(),
658 line: 3,
659 column: 5,
660 });
661
662 let warnings = rule
663 .cross_file_check(Path::new("docs/readme.md"), ¤t_file_index, &workspace_index)
664 .unwrap();
665
666 assert_eq!(warnings.len(), 1);
668 assert!(warnings[0].message.contains("nonexistent"));
669 assert!(warnings[0].message.contains("install.md"));
670 }
671
672 #[test]
673 fn test_cross_file_check_custom_anchor_match() {
674 use crate::workspace_index::WorkspaceIndex;
675
676 let rule = MD051LinkFragments::new();
677
678 let mut workspace_index = WorkspaceIndex::new();
680 let mut target_file_index = FileIndex::new();
681 target_file_index.add_heading(HeadingIndex {
682 text: "Installation Guide".to_string(),
683 auto_anchor: "installation-guide".to_string(),
684 custom_anchor: Some("install".to_string()),
685 line: 1,
686 });
687 workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
688
689 let mut current_file_index = FileIndex::new();
691 current_file_index.add_cross_file_link(CrossFileLinkIndex {
692 target_path: "install.md".to_string(),
693 fragment: "install".to_string(),
694 line: 3,
695 column: 5,
696 });
697
698 let warnings = rule
699 .cross_file_check(Path::new("docs/readme.md"), ¤t_file_index, &workspace_index)
700 .unwrap();
701
702 assert!(warnings.is_empty());
704 }
705
706 #[test]
707 fn test_cross_file_check_target_not_in_workspace() {
708 use crate::workspace_index::WorkspaceIndex;
709
710 let rule = MD051LinkFragments::new();
711
712 let workspace_index = WorkspaceIndex::new();
714
715 let mut current_file_index = FileIndex::new();
717 current_file_index.add_cross_file_link(CrossFileLinkIndex {
718 target_path: "external.md".to_string(),
719 fragment: "heading".to_string(),
720 line: 3,
721 column: 5,
722 });
723
724 let warnings = rule
725 .cross_file_check(Path::new("docs/readme.md"), ¤t_file_index, &workspace_index)
726 .unwrap();
727
728 assert!(warnings.is_empty());
730 }
731}