rumdl_lib/rules/
md051_link_fragments.rs1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::anchor_styles::AnchorStyle;
3use pulldown_cmark::LinkType;
4use regex::Regex;
5use std::collections::HashSet;
6use std::sync::LazyLock;
7static HTML_ANCHOR_PATTERN: LazyLock<Regex> =
10 LazyLock::new(|| Regex::new(r#"\b(?:id|name)\s*=\s*["']([^"']+)["']"#).unwrap());
11
12#[derive(Clone)]
19pub struct MD051LinkFragments {
20 anchor_style: AnchorStyle,
22}
23
24impl Default for MD051LinkFragments {
25 fn default() -> Self {
26 Self::new()
27 }
28}
29
30impl MD051LinkFragments {
31 pub fn new() -> Self {
32 Self {
33 anchor_style: AnchorStyle::GitHub,
34 }
35 }
36
37 pub fn with_anchor_style(style: AnchorStyle) -> Self {
39 Self { anchor_style: style }
40 }
41
42 fn extract_headings_from_context(
46 &self,
47 ctx: &crate::lint_context::LintContext,
48 ) -> (HashSet<String>, HashSet<String>) {
49 let mut markdown_headings = HashSet::with_capacity(32);
50 let mut html_anchors = HashSet::with_capacity(16);
51 let mut fragment_counts = std::collections::HashMap::new();
52
53 for line_info in &ctx.lines {
54 if line_info.in_front_matter {
55 continue;
56 }
57
58 if !line_info.in_code_block {
60 let content = line_info.content(ctx.content);
61 let bytes = content.as_bytes();
62
63 if bytes.contains(&b'<') && (content.contains("id=") || content.contains("name=")) {
65 let mut pos = 0;
68 while pos < content.len() {
69 if let Some(start) = content[pos..].find('<') {
70 let tag_start = pos + start;
71 if let Some(end) = content[tag_start..].find('>') {
72 let tag_end = tag_start + end + 1;
73 let tag = &content[tag_start..tag_end];
74
75 if let Some(caps) = HTML_ANCHOR_PATTERN.find(tag) {
77 let matched_text = caps.as_str();
78 if let Some(caps) = HTML_ANCHOR_PATTERN.captures(matched_text)
79 && let Some(id_match) = caps.get(1)
80 {
81 let id = id_match.as_str();
82 if !id.is_empty() {
83 html_anchors.insert(id.to_string());
84 }
85 }
86 }
87 pos = tag_end;
88 } else {
89 break;
90 }
91 } else {
92 break;
93 }
94 }
95 }
96 }
97
98 if let Some(heading) = &line_info.heading {
100 if let Some(custom_id) = &heading.custom_id {
102 markdown_headings.insert(custom_id.to_lowercase());
103 }
104
105 let fragment = self.anchor_style.generate_fragment(&heading.text);
108
109 if !fragment.is_empty() {
110 let final_fragment = if let Some(count) = fragment_counts.get_mut(&fragment) {
112 let suffix = *count;
113 *count += 1;
114 format!("{fragment}-{suffix}")
115 } else {
116 fragment_counts.insert(fragment.clone(), 1);
117 fragment
118 };
119 markdown_headings.insert(final_fragment);
120 }
121 }
122 }
123
124 (markdown_headings, html_anchors)
125 }
126
127 #[inline]
129 fn is_external_url_fast(url: &str) -> bool {
130 url.starts_with("http://")
132 || url.starts_with("https://")
133 || url.starts_with("ftp://")
134 || url.starts_with("mailto:")
135 || url.starts_with("tel:")
136 || url.starts_with("//")
137 }
138
139 #[inline]
141 fn is_cross_file_link(url: &str) -> bool {
142 if let Some(fragment_pos) = url.find('#') {
143 let path_part = &url[..fragment_pos];
144
145 if path_part.is_empty() {
147 return false;
148 }
149
150 if let Some(tag_start) = path_part.find("{%")
156 && path_part[tag_start + 2..].contains("%}")
157 {
158 return true;
159 }
160 if let Some(var_start) = path_part.find("{{")
161 && path_part[var_start + 2..].contains("}}")
162 {
163 return true;
164 }
165
166 if path_part.starts_with('/') {
169 return true;
170 }
171
172 path_part.contains('.')
177 && (
178 {
180 let clean_path = path_part.split('?').next().unwrap_or(path_part);
181 if let Some(after_dot) = clean_path.strip_prefix('.') {
183 let dots_count = clean_path.matches('.').count();
184 if dots_count == 1 {
185 !after_dot.is_empty() && after_dot.len() <= 10 &&
188 after_dot.chars().all(|c| c.is_ascii_alphanumeric()) &&
189 (after_dot.len() <= 4 || matches!(after_dot, "html" | "json" | "yaml" | "toml"))
191 } else {
192 clean_path.split('.').next_back().is_some_and(|ext| {
194 !ext.is_empty() && ext.len() <= 10 && ext.chars().all(|c| c.is_ascii_alphanumeric())
195 })
196 }
197 } else {
198 clean_path.split('.').next_back().is_some_and(|ext| {
200 !ext.is_empty() && ext.len() <= 10 && ext.chars().all(|c| c.is_ascii_alphanumeric())
201 })
202 }
203 } ||
204 path_part.contains('/') || path_part.contains('\\') ||
206 path_part.starts_with("./") || path_part.starts_with("../")
208 )
209 } else {
210 false
211 }
212 }
213}
214
215impl Rule for MD051LinkFragments {
216 fn name(&self) -> &'static str {
217 "MD051"
218 }
219
220 fn description(&self) -> &'static str {
221 "Link fragments should reference valid headings"
222 }
223
224 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
225 if !ctx.likely_has_links_or_images() {
227 return true;
228 }
229 !ctx.has_char('#')
231 }
232
233 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
234 let mut warnings = Vec::new();
235
236 if ctx.content.is_empty() || ctx.links.is_empty() || self.should_skip(ctx) {
237 return Ok(warnings);
238 }
239
240 let (markdown_headings, html_anchors) = self.extract_headings_from_context(ctx);
241
242 for link in &ctx.links {
243 if link.is_reference {
244 continue;
245 }
246
247 if matches!(link.link_type, LinkType::WikiLink { .. }) {
249 continue;
250 }
251
252 if ctx.is_in_jinja_range(link.byte_offset) {
254 continue;
255 }
256
257 let url = &link.url;
258
259 if !url.contains('#') || Self::is_external_url_fast(url) {
261 continue;
262 }
263
264 if url.contains("{{#") && url.contains("}}") {
267 continue;
268 }
269
270 if url.starts_with('@') {
274 continue;
275 }
276
277 if Self::is_cross_file_link(url) {
279 continue;
280 }
281
282 let Some(fragment_pos) = url.find('#') else {
283 continue;
284 };
285
286 let fragment = &url[fragment_pos + 1..];
287
288 if (url.contains("{{") && fragment.contains('|')) || fragment.ends_with("}}") || fragment.ends_with("%}") {
290 continue;
291 }
292
293 if fragment.is_empty() {
294 continue;
295 }
296
297 let found = if html_anchors.contains(fragment) {
300 true
301 } else {
302 let fragment_lower = fragment.to_lowercase();
303 markdown_headings.contains(&fragment_lower)
304 };
305
306 if !found {
307 warnings.push(LintWarning {
308 rule_name: Some(self.name().to_string()),
309 message: format!("Link anchor '#{fragment}' does not exist in document headings"),
310 line: link.line,
311 column: link.start_col + 1,
312 end_line: link.line,
313 end_column: link.end_col + 1,
314 severity: Severity::Warning,
315 fix: None,
316 });
317 }
318 }
319
320 Ok(warnings)
321 }
322
323 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
324 Ok(ctx.content.to_string())
327 }
328
329 fn as_any(&self) -> &dyn std::any::Any {
330 self
331 }
332
333 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
334 where
335 Self: Sized,
336 {
337 let anchor_style = if let Some(rule_config) = config.rules.get("MD051") {
339 if let Some(style_str) = rule_config.values.get("anchor-style").and_then(|v| v.as_str()) {
340 match style_str.to_lowercase().as_str() {
341 "kramdown" => AnchorStyle::Kramdown,
342 "kramdown-gfm" => AnchorStyle::KramdownGfm,
343 "jekyll" => AnchorStyle::KramdownGfm, _ => AnchorStyle::GitHub,
345 }
346 } else {
347 AnchorStyle::GitHub
348 }
349 } else {
350 AnchorStyle::GitHub
351 };
352
353 Box::new(MD051LinkFragments::with_anchor_style(anchor_style))
354 }
355
356 fn default_config_section(&self) -> Option<(String, toml::Value)> {
357 let value: toml::Value = toml::from_str(
358 r#"
359# Anchor generation style to match your target platform
360# Options: "github" (default), "kramdown-gfm", "kramdown"
361# Note: "jekyll" is accepted as an alias for "kramdown-gfm" (backward compatibility)
362anchor-style = "github"
363"#,
364 )
365 .ok()?;
366 Some(("MD051".to_string(), value))
367 }
368}
369
370#[cfg(test)]
371mod tests {
372 use super::*;
373 use crate::lint_context::LintContext;
374
375 #[test]
376 fn test_quarto_cross_references() {
377 let rule = MD051LinkFragments::new();
378
379 let content = r#"# Test Document
381
382## Figures
383
384See [@fig-plot] for the visualization.
385
386More details in [@tbl-results] and [@sec-methods].
387
388The equation [@eq-regression] shows the relationship.
389
390Reference to [@lst-code] for implementation."#;
391 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Quarto);
392 let result = rule.check(&ctx).unwrap();
393 assert!(
394 result.is_empty(),
395 "Quarto cross-references (@fig-, @tbl-, @sec-, @eq-) should not trigger MD051 warnings. Got {} warnings",
396 result.len()
397 );
398
399 let content_with_anchor = r#"# Test
401
402See [link](#test) for details."#;
403 let ctx_anchor = LintContext::new(content_with_anchor, crate::config::MarkdownFlavor::Quarto);
404 let result_anchor = rule.check(&ctx_anchor).unwrap();
405 assert!(result_anchor.is_empty(), "Valid anchor should not trigger warning");
406
407 let content_invalid = r#"# Test
409
410See [link](#nonexistent) for details."#;
411 let ctx_invalid = LintContext::new(content_invalid, crate::config::MarkdownFlavor::Quarto);
412 let result_invalid = rule.check(&ctx_invalid).unwrap();
413 assert_eq!(result_invalid.len(), 1, "Invalid anchor should still trigger warning");
414 }
415}