1use crate::rule::{CrossFileScope, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::anchor_styles::AnchorStyle;
3use crate::workspace_index::{CrossFileLinkIndex, FileIndex, HeadingIndex};
4use pulldown_cmark::LinkType;
5use regex::Regex;
6use std::collections::{HashMap, HashSet};
7use std::path::{Component, Path, PathBuf};
8use std::sync::LazyLock;
9static HTML_ANCHOR_PATTERN: LazyLock<Regex> =
12 LazyLock::new(|| Regex::new(r#"\b(?:id|name)\s*=\s*["']([^"']+)["']"#).unwrap());
13
14fn normalize_path(path: &Path) -> PathBuf {
16 let mut result = PathBuf::new();
17 for component in path.components() {
18 match component {
19 Component::CurDir => {} Component::ParentDir => {
21 result.pop(); }
23 c => result.push(c.as_os_str()),
24 }
25 }
26 result
27}
28
29#[derive(Clone)]
36pub struct MD051LinkFragments {
37 anchor_style: AnchorStyle,
39}
40
41impl Default for MD051LinkFragments {
42 fn default() -> Self {
43 Self::new()
44 }
45}
46
47impl MD051LinkFragments {
48 pub fn new() -> Self {
49 Self {
50 anchor_style: AnchorStyle::GitHub,
51 }
52 }
53
54 pub fn with_anchor_style(style: AnchorStyle) -> Self {
56 Self { anchor_style: style }
57 }
58
59 fn extract_headings_from_context(
63 &self,
64 ctx: &crate::lint_context::LintContext,
65 ) -> (HashSet<String>, HashSet<String>) {
66 let mut markdown_headings = HashSet::with_capacity(32);
67 let mut html_anchors = HashSet::with_capacity(16);
68 let mut fragment_counts = std::collections::HashMap::new();
69
70 for line_info in &ctx.lines {
71 if line_info.in_front_matter {
72 continue;
73 }
74
75 if !line_info.in_code_block {
77 let content = line_info.content(ctx.content);
78 let bytes = content.as_bytes();
79
80 if bytes.contains(&b'<') && (content.contains("id=") || content.contains("name=")) {
82 let mut pos = 0;
85 while pos < content.len() {
86 if let Some(start) = content[pos..].find('<') {
87 let tag_start = pos + start;
88 if let Some(end) = content[tag_start..].find('>') {
89 let tag_end = tag_start + end + 1;
90 let tag = &content[tag_start..tag_end];
91
92 if let Some(caps) = HTML_ANCHOR_PATTERN.find(tag) {
94 let matched_text = caps.as_str();
95 if let Some(caps) = HTML_ANCHOR_PATTERN.captures(matched_text)
96 && let Some(id_match) = caps.get(1)
97 {
98 let id = id_match.as_str();
99 if !id.is_empty() {
100 html_anchors.insert(id.to_string());
101 }
102 }
103 }
104 pos = tag_end;
105 } else {
106 break;
107 }
108 } else {
109 break;
110 }
111 }
112 }
113 }
114
115 if let Some(heading) = &line_info.heading {
117 if let Some(custom_id) = &heading.custom_id {
119 markdown_headings.insert(custom_id.to_lowercase());
120 }
121
122 let fragment = self.anchor_style.generate_fragment(&heading.text);
125
126 if !fragment.is_empty() {
127 let final_fragment = if let Some(count) = fragment_counts.get_mut(&fragment) {
129 let suffix = *count;
130 *count += 1;
131 format!("{fragment}-{suffix}")
132 } else {
133 fragment_counts.insert(fragment.clone(), 1);
134 fragment
135 };
136 markdown_headings.insert(final_fragment);
137 }
138 }
139 }
140
141 (markdown_headings, html_anchors)
142 }
143
144 #[inline]
146 fn is_external_url_fast(url: &str) -> bool {
147 url.starts_with("http://")
149 || url.starts_with("https://")
150 || url.starts_with("ftp://")
151 || url.starts_with("mailto:")
152 || url.starts_with("tel:")
153 || url.starts_with("//")
154 }
155
156 #[inline]
158 fn is_cross_file_link(url: &str) -> bool {
159 if let Some(fragment_pos) = url.find('#') {
160 let path_part = &url[..fragment_pos];
161
162 if path_part.is_empty() {
164 return false;
165 }
166
167 if let Some(tag_start) = path_part.find("{%")
173 && path_part[tag_start + 2..].contains("%}")
174 {
175 return true;
176 }
177 if let Some(var_start) = path_part.find("{{")
178 && path_part[var_start + 2..].contains("}}")
179 {
180 return true;
181 }
182
183 if path_part.starts_with('/') {
186 return true;
187 }
188
189 path_part.contains('.')
194 && (
195 {
197 let clean_path = path_part.split('?').next().unwrap_or(path_part);
198 if let Some(after_dot) = clean_path.strip_prefix('.') {
200 let dots_count = clean_path.matches('.').count();
201 if dots_count == 1 {
202 !after_dot.is_empty() && after_dot.len() <= 10 &&
205 after_dot.chars().all(|c| c.is_ascii_alphanumeric())
206 } else {
207 clean_path.split('.').next_back().is_some_and(|ext| {
209 !ext.is_empty() && ext.len() <= 10 && ext.chars().all(|c| c.is_ascii_alphanumeric())
210 })
211 }
212 } else {
213 clean_path.split('.').next_back().is_some_and(|ext| {
215 !ext.is_empty() && ext.len() <= 10 && ext.chars().all(|c| c.is_ascii_alphanumeric())
216 })
217 }
218 } ||
219 path_part.contains('/') || path_part.contains('\\') ||
221 path_part.starts_with("./") || path_part.starts_with("../")
223 )
224 } else {
225 false
226 }
227 }
228}
229
230impl Rule for MD051LinkFragments {
231 fn name(&self) -> &'static str {
232 "MD051"
233 }
234
235 fn description(&self) -> &'static str {
236 "Link fragments should reference valid headings"
237 }
238
239 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
240 if !ctx.likely_has_links_or_images() {
242 return true;
243 }
244 !ctx.has_char('#')
246 }
247
248 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
249 let mut warnings = Vec::new();
250
251 if ctx.content.is_empty() || ctx.links.is_empty() || self.should_skip(ctx) {
252 return Ok(warnings);
253 }
254
255 let (markdown_headings, html_anchors) = self.extract_headings_from_context(ctx);
256
257 for link in &ctx.links {
258 if link.is_reference {
259 continue;
260 }
261
262 if matches!(link.link_type, LinkType::WikiLink { .. }) {
264 continue;
265 }
266
267 if ctx.is_in_jinja_range(link.byte_offset) {
269 continue;
270 }
271
272 let url = &link.url;
273
274 if !url.contains('#') || Self::is_external_url_fast(url) {
276 continue;
277 }
278
279 if url.contains("{{#") && url.contains("}}") {
282 continue;
283 }
284
285 if url.starts_with('@') {
289 continue;
290 }
291
292 if Self::is_cross_file_link(url) {
294 continue;
295 }
296
297 let Some(fragment_pos) = url.find('#') else {
298 continue;
299 };
300
301 let fragment = &url[fragment_pos + 1..];
302
303 if (url.contains("{{") && fragment.contains('|')) || fragment.ends_with("}}") || fragment.ends_with("%}") {
305 continue;
306 }
307
308 if fragment.is_empty() {
309 continue;
310 }
311
312 let found = if html_anchors.contains(fragment) {
315 true
316 } else {
317 let fragment_lower = fragment.to_lowercase();
318 markdown_headings.contains(&fragment_lower)
319 };
320
321 if !found {
322 warnings.push(LintWarning {
323 rule_name: Some(self.name().to_string()),
324 message: format!("Link anchor '#{fragment}' does not exist in document headings"),
325 line: link.line,
326 column: link.start_col + 1,
327 end_line: link.line,
328 end_column: link.end_col + 1,
329 severity: Severity::Warning,
330 fix: None,
331 });
332 }
333 }
334
335 Ok(warnings)
336 }
337
338 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
339 Ok(ctx.content.to_string())
342 }
343
344 fn as_any(&self) -> &dyn std::any::Any {
345 self
346 }
347
348 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
349 where
350 Self: Sized,
351 {
352 let anchor_style = if let Some(rule_config) = config.rules.get("MD051") {
354 if let Some(style_str) = rule_config.values.get("anchor-style").and_then(|v| v.as_str()) {
355 match style_str.to_lowercase().as_str() {
356 "kramdown" => AnchorStyle::Kramdown,
357 "kramdown-gfm" => AnchorStyle::KramdownGfm,
358 "jekyll" => AnchorStyle::KramdownGfm, _ => AnchorStyle::GitHub,
360 }
361 } else {
362 AnchorStyle::GitHub
363 }
364 } else {
365 AnchorStyle::GitHub
366 };
367
368 Box::new(MD051LinkFragments::with_anchor_style(anchor_style))
369 }
370
371 fn category(&self) -> RuleCategory {
372 RuleCategory::Link
373 }
374
375 fn cross_file_scope(&self) -> CrossFileScope {
376 CrossFileScope::Workspace
377 }
378
379 fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, file_index: &mut FileIndex) {
380 let mut fragment_counts = HashMap::new();
381
382 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
384 if line_info.in_front_matter {
385 continue;
386 }
387
388 if let Some(heading) = &line_info.heading {
389 let fragment = self.anchor_style.generate_fragment(&heading.text);
390
391 if !fragment.is_empty() {
392 let final_fragment = if let Some(count) = fragment_counts.get_mut(&fragment) {
394 let suffix = *count;
395 *count += 1;
396 format!("{fragment}-{suffix}")
397 } else {
398 fragment_counts.insert(fragment.clone(), 1);
399 fragment
400 };
401
402 file_index.add_heading(HeadingIndex {
403 text: heading.text.clone(),
404 auto_anchor: final_fragment,
405 custom_anchor: heading.custom_id.clone(),
406 line: line_idx + 1, });
408 }
409 }
410 }
411
412 for link in &ctx.links {
414 if link.is_reference {
415 continue;
416 }
417
418 let url = &link.url;
419
420 if Self::is_external_url_fast(url) {
422 continue;
423 }
424
425 if Self::is_cross_file_link(url)
427 && let Some(fragment_pos) = url.find('#')
428 {
429 let path_part = &url[..fragment_pos];
430 let fragment = &url[fragment_pos + 1..];
431
432 if fragment.is_empty() || fragment.contains("{{") || fragment.contains("{%") {
434 continue;
435 }
436
437 file_index.add_cross_file_link(CrossFileLinkIndex {
438 target_path: path_part.to_string(),
439 fragment: fragment.to_string(),
440 line: link.line,
441 column: link.start_col + 1,
442 });
443 }
444 }
445 }
446
447 fn cross_file_check(
448 &self,
449 file_path: &Path,
450 file_index: &FileIndex,
451 workspace_index: &crate::workspace_index::WorkspaceIndex,
452 ) -> LintResult {
453 let mut warnings = Vec::new();
454
455 for cross_link in &file_index.cross_file_links {
457 if cross_link.fragment.is_empty() {
459 continue;
460 }
461
462 let target_path = if let Some(parent) = file_path.parent() {
464 parent.join(&cross_link.target_path)
465 } else {
466 Path::new(&cross_link.target_path).to_path_buf()
467 };
468
469 let target_path = normalize_path(&target_path);
471
472 if let Some(target_file_index) = workspace_index.get_file(&target_path) {
474 if !target_file_index.has_anchor(&cross_link.fragment) {
476 warnings.push(LintWarning {
477 rule_name: Some(self.name().to_string()),
478 line: cross_link.line,
479 column: cross_link.column,
480 end_line: cross_link.line,
481 end_column: cross_link.column + cross_link.target_path.len() + 1 + cross_link.fragment.len(),
482 message: format!(
483 "Link fragment '{}' not found in '{}'",
484 cross_link.fragment, cross_link.target_path
485 ),
486 severity: Severity::Warning,
487 fix: None,
488 });
489 }
490 }
491 }
493
494 Ok(warnings)
495 }
496
497 fn default_config_section(&self) -> Option<(String, toml::Value)> {
498 let value: toml::Value = toml::from_str(
499 r#"
500# Anchor generation style to match your target platform
501# Options: "github" (default), "kramdown-gfm", "kramdown"
502# Note: "jekyll" is accepted as an alias for "kramdown-gfm" (backward compatibility)
503anchor-style = "github"
504"#,
505 )
506 .ok()?;
507 Some(("MD051".to_string(), value))
508 }
509}
510
511#[cfg(test)]
512mod tests {
513 use super::*;
514 use crate::lint_context::LintContext;
515
516 #[test]
517 fn test_quarto_cross_references() {
518 let rule = MD051LinkFragments::new();
519
520 let content = r#"# Test Document
522
523## Figures
524
525See [@fig-plot] for the visualization.
526
527More details in [@tbl-results] and [@sec-methods].
528
529The equation [@eq-regression] shows the relationship.
530
531Reference to [@lst-code] for implementation."#;
532 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Quarto, None);
533 let result = rule.check(&ctx).unwrap();
534 assert!(
535 result.is_empty(),
536 "Quarto cross-references (@fig-, @tbl-, @sec-, @eq-) should not trigger MD051 warnings. Got {} warnings",
537 result.len()
538 );
539
540 let content_with_anchor = r#"# Test
542
543See [link](#test) for details."#;
544 let ctx_anchor = LintContext::new(content_with_anchor, crate::config::MarkdownFlavor::Quarto, None);
545 let result_anchor = rule.check(&ctx_anchor).unwrap();
546 assert!(result_anchor.is_empty(), "Valid anchor should not trigger warning");
547
548 let content_invalid = r#"# Test
550
551See [link](#nonexistent) for details."#;
552 let ctx_invalid = LintContext::new(content_invalid, crate::config::MarkdownFlavor::Quarto, None);
553 let result_invalid = rule.check(&ctx_invalid).unwrap();
554 assert_eq!(result_invalid.len(), 1, "Invalid anchor should still trigger warning");
555 }
556
557 #[test]
559 fn test_cross_file_scope() {
560 let rule = MD051LinkFragments::new();
561 assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
562 }
563
564 #[test]
565 fn test_contribute_to_index_extracts_headings() {
566 let rule = MD051LinkFragments::new();
567 let content = "# First Heading\n\n# Second { #custom }\n\n## Third";
568 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
569
570 let mut file_index = FileIndex::new();
571 rule.contribute_to_index(&ctx, &mut file_index);
572
573 assert_eq!(file_index.headings.len(), 3);
574 assert_eq!(file_index.headings[0].text, "First Heading");
575 assert_eq!(file_index.headings[0].auto_anchor, "first-heading");
576 assert!(file_index.headings[0].custom_anchor.is_none());
577
578 assert_eq!(file_index.headings[1].text, "Second");
579 assert_eq!(file_index.headings[1].custom_anchor, Some("custom".to_string()));
580
581 assert_eq!(file_index.headings[2].text, "Third");
582 }
583
584 #[test]
585 fn test_contribute_to_index_extracts_cross_file_links() {
586 let rule = MD051LinkFragments::new();
587 let content = "See [docs](other.md#installation) and [more](../guide.md#getting-started)";
588 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
589
590 let mut file_index = FileIndex::new();
591 rule.contribute_to_index(&ctx, &mut file_index);
592
593 assert_eq!(file_index.cross_file_links.len(), 2);
594 assert_eq!(file_index.cross_file_links[0].target_path, "other.md");
595 assert_eq!(file_index.cross_file_links[0].fragment, "installation");
596 assert_eq!(file_index.cross_file_links[1].target_path, "../guide.md");
597 assert_eq!(file_index.cross_file_links[1].fragment, "getting-started");
598 }
599
600 #[test]
601 fn test_cross_file_check_valid_fragment() {
602 use crate::workspace_index::WorkspaceIndex;
603
604 let rule = MD051LinkFragments::new();
605
606 let mut workspace_index = WorkspaceIndex::new();
608 let mut target_file_index = FileIndex::new();
609 target_file_index.add_heading(HeadingIndex {
610 text: "Installation Guide".to_string(),
611 auto_anchor: "installation-guide".to_string(),
612 custom_anchor: None,
613 line: 1,
614 });
615 workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
616
617 let mut current_file_index = FileIndex::new();
619 current_file_index.add_cross_file_link(CrossFileLinkIndex {
620 target_path: "install.md".to_string(),
621 fragment: "installation-guide".to_string(),
622 line: 3,
623 column: 5,
624 });
625
626 let warnings = rule
627 .cross_file_check(Path::new("docs/readme.md"), ¤t_file_index, &workspace_index)
628 .unwrap();
629
630 assert!(warnings.is_empty());
632 }
633
634 #[test]
635 fn test_cross_file_check_invalid_fragment() {
636 use crate::workspace_index::WorkspaceIndex;
637
638 let rule = MD051LinkFragments::new();
639
640 let mut workspace_index = WorkspaceIndex::new();
642 let mut target_file_index = FileIndex::new();
643 target_file_index.add_heading(HeadingIndex {
644 text: "Installation Guide".to_string(),
645 auto_anchor: "installation-guide".to_string(),
646 custom_anchor: None,
647 line: 1,
648 });
649 workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
650
651 let mut current_file_index = FileIndex::new();
653 current_file_index.add_cross_file_link(CrossFileLinkIndex {
654 target_path: "install.md".to_string(),
655 fragment: "nonexistent".to_string(),
656 line: 3,
657 column: 5,
658 });
659
660 let warnings = rule
661 .cross_file_check(Path::new("docs/readme.md"), ¤t_file_index, &workspace_index)
662 .unwrap();
663
664 assert_eq!(warnings.len(), 1);
666 assert!(warnings[0].message.contains("nonexistent"));
667 assert!(warnings[0].message.contains("install.md"));
668 }
669
670 #[test]
671 fn test_cross_file_check_custom_anchor_match() {
672 use crate::workspace_index::WorkspaceIndex;
673
674 let rule = MD051LinkFragments::new();
675
676 let mut workspace_index = WorkspaceIndex::new();
678 let mut target_file_index = FileIndex::new();
679 target_file_index.add_heading(HeadingIndex {
680 text: "Installation Guide".to_string(),
681 auto_anchor: "installation-guide".to_string(),
682 custom_anchor: Some("install".to_string()),
683 line: 1,
684 });
685 workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
686
687 let mut current_file_index = FileIndex::new();
689 current_file_index.add_cross_file_link(CrossFileLinkIndex {
690 target_path: "install.md".to_string(),
691 fragment: "install".to_string(),
692 line: 3,
693 column: 5,
694 });
695
696 let warnings = rule
697 .cross_file_check(Path::new("docs/readme.md"), ¤t_file_index, &workspace_index)
698 .unwrap();
699
700 assert!(warnings.is_empty());
702 }
703
704 #[test]
705 fn test_cross_file_check_target_not_in_workspace() {
706 use crate::workspace_index::WorkspaceIndex;
707
708 let rule = MD051LinkFragments::new();
709
710 let workspace_index = WorkspaceIndex::new();
712
713 let mut current_file_index = FileIndex::new();
715 current_file_index.add_cross_file_link(CrossFileLinkIndex {
716 target_path: "external.md".to_string(),
717 fragment: "heading".to_string(),
718 line: 3,
719 column: 5,
720 });
721
722 let warnings = rule
723 .cross_file_check(Path::new("docs/readme.md"), ¤t_file_index, &workspace_index)
724 .unwrap();
725
726 assert!(warnings.is_empty());
728 }
729}