1use once_cell::sync::Lazy;
2use regex::Regex;
3use serde::Deserialize;
4use std::collections::HashSet;
5use std::rc::Rc;
6
7use tree_sitter::Node;
8
9use crate::{
10 linter::{range_from_tree_sitter, RuleViolation},
11 rules::{Context, Rule, RuleLinter, RuleType},
12};
13
14#[derive(Debug, PartialEq, Clone, Deserialize, Default)]
16pub struct MD051LinkFragmentsTable {
17 #[serde(default)]
18 pub ignore_case: bool,
19 #[serde(default)]
20 pub ignored_pattern: String,
21}
22
23#[derive(Debug, Clone)]
24struct LinkFragment {
25 fragment: String,
26 range: tree_sitter::Range,
27}
28
29static LINK_PATTERN: Lazy<Regex> =
31 Lazy::new(|| Regex::new(r"\[([^\]]*)\]\(([^)]*#[^)]*)\)").unwrap());
32
33static RANGE_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"^L\d+C\d+-L\d+C\d+$").unwrap());
34
35static ID_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r#"id\s*=\s*["']([^"']+)["']"#).unwrap());
36
37static NAME_PATTERN: Lazy<Regex> =
38 Lazy::new(|| Regex::new(r#"name\s*=\s*["']([^"']+)["']"#).unwrap());
39
40pub(crate) struct MD051Linter {
41 context: Rc<Context>,
42 valid_fragments: HashSet<String>,
43 valid_fragments_lowercase: HashSet<String>, link_fragments: Vec<LinkFragment>,
45}
46
47impl MD051Linter {
48 pub fn new(context: Rc<Context>) -> Self {
49 Self {
50 context,
51 valid_fragments: HashSet::new(),
52 valid_fragments_lowercase: HashSet::new(),
53 link_fragments: Vec::new(),
54 }
55 }
56
57 fn extract_heading_text(&self, node: &Node) -> Option<String> {
58 let start_byte = node.start_byte();
60 let end_byte = node.end_byte();
61 let document_content = self.context.document_content.borrow();
62 let _heading_content = &document_content[start_byte..end_byte];
63
64 if node.kind() == "atx_heading" {
66 for i in 0..node.child_count() {
68 let child = node.child(i).unwrap();
69 if child.kind() == "inline" {
70 let child_start = child.start_byte();
71 let child_end = child.end_byte();
72 let text = &document_content[child_start..child_end].trim();
73 return Some(text.to_string());
74 }
75 }
76 }
77
78 if node.kind() == "setext_heading" {
80 for i in 0..node.child_count() {
81 let child = node.child(i).unwrap();
82 if child.kind() == "paragraph" {
83 for j in 0..child.child_count() {
85 let grandchild = child.child(j).unwrap();
86 if grandchild.kind() == "inline" {
87 let grandchild_start = grandchild.start_byte();
88 let grandchild_end = grandchild.end_byte();
89 let text = &document_content[grandchild_start..grandchild_end].trim();
90 return Some(text.to_string());
91 }
92 }
93 }
94 }
95 }
96
97 None
98 }
99
100 fn generate_github_fragment(&self, heading_text: &str) -> String {
101 let mut result = heading_text.to_lowercase();
108
109 result = result
111 .chars()
112 .filter(|c| c.is_alphanumeric() || c.is_whitespace() || *c == '-')
113 .collect();
114
115 result = result.replace(' ', "-");
117
118 let chars: Vec<char> = result.chars().collect();
120 let mut filtered = Vec::new();
121 let mut prev_was_dash = false;
122
123 for ch in chars {
124 if ch == '-' {
125 if !prev_was_dash {
126 filtered.push(ch);
127 prev_was_dash = true;
128 }
129 } else {
130 filtered.push(ch);
131 prev_was_dash = false;
132 }
133 }
134 result = filtered.into_iter().collect();
135
136 result = result.trim_matches('-').to_string();
138
139 result
140 }
141
142 fn extract_custom_anchor(&self, heading_text: &str) -> Option<String> {
143 if let Some(start) = heading_text.rfind("{#") {
145 if let Some(end) = heading_text[start..].find('}') {
146 let anchor = &heading_text[start + 2..start + end];
147 return Some(anchor.to_string());
148 }
149 }
150 None
151 }
152
153 fn extract_link_fragments(&self, node: &Node) -> Vec<String> {
154 let start_byte = node.start_byte();
156 let end_byte = node.end_byte();
157 let document_content = self.context.document_content.borrow();
158 let content = &document_content[start_byte..end_byte];
159
160 let mut fragments = Vec::new();
161
162 for cap in LINK_PATTERN.captures_iter(content) {
163 if let Some(url_with_fragment) = cap.get(2) {
164 let url_text = url_with_fragment.as_str();
165 if let Some(hash_pos) = url_text.rfind('#') {
166 let fragment = &url_text[hash_pos + 1..];
167 if !fragment.is_empty() && !fragment.contains(' ') {
170 fragments.push(fragment.to_string());
171 }
172 }
173 }
174 }
175
176 fragments
177 }
178
179 fn is_github_special_fragment(&self, fragment: &str) -> bool {
180 if fragment == "top" {
184 return true;
185 }
186
187 if fragment.starts_with('L')
189 && fragment.len() > 1
190 && fragment[1..].chars().all(|c| c.is_ascii_digit())
191 {
192 return true;
193 }
194
195 if RANGE_PATTERN.is_match(fragment) {
197 return true;
198 }
199
200 false
204 }
205
206 fn extract_html_id_or_name(&self, node: &Node) -> Vec<String> {
207 let mut ids = Vec::new();
209 let start_byte = node.start_byte();
210 let end_byte = node.end_byte();
211 let document_content = self.context.document_content.borrow();
212 let html_content = &document_content[start_byte..end_byte];
213
214 for cap in ID_PATTERN.captures_iter(html_content) {
215 if let Some(id) = cap.get(1) {
216 ids.push(id.as_str().to_string());
217 }
218 }
219
220 for cap in NAME_PATTERN.captures_iter(html_content) {
221 if let Some(name) = cap.get(1) {
222 ids.push(name.as_str().to_string());
223 }
224 }
225
226 ids
227 }
228}
229
230impl RuleLinter for MD051Linter {
231 fn feed(&mut self, node: &Node) {
232 match node.kind() {
233 "atx_heading" | "setext_heading" => {
234 if let Some(heading_text) = self.extract_heading_text(node) {
235 if let Some(custom_anchor) = self.extract_custom_anchor(&heading_text) {
237 self.valid_fragments.insert(custom_anchor.clone());
238 self.valid_fragments_lowercase
239 .insert(custom_anchor.to_lowercase());
240 let clean_text = heading_text
242 .replace(&format!("{{#{custom_anchor}}}"), "")
243 .trim()
244 .to_string();
245 if !clean_text.is_empty() {
246 let fragment = self.generate_github_fragment(&clean_text);
247 if !fragment.is_empty() {
248 self.valid_fragments.insert(fragment.clone());
249 self.valid_fragments_lowercase
250 .insert(fragment.to_lowercase());
251 }
252 }
253 } else {
254 let fragment = self.generate_github_fragment(&heading_text);
256 if !fragment.is_empty() {
257 let mut unique_fragment = fragment.clone();
259 let mut counter = 1;
260 while self.valid_fragments.contains(&unique_fragment) {
261 unique_fragment = format!("{fragment}-{counter}");
262 counter += 1;
263 }
264 self.valid_fragments.insert(unique_fragment.clone());
265 self.valid_fragments_lowercase
266 .insert(unique_fragment.to_lowercase());
267 }
268 }
269 }
270 }
271 "inline" | "html_block" => {
272 let ids = self.extract_html_id_or_name(node);
274 for id in ids {
275 self.valid_fragments.insert(id.clone());
276 self.valid_fragments_lowercase.insert(id.to_lowercase());
277 }
278
279 let fragments = self.extract_link_fragments(node);
281 for fragment in fragments {
282 self.link_fragments.push(LinkFragment {
286 fragment,
287 range: node.range(),
288 });
289 }
290 }
291 _ => {
292 }
294 }
295 }
296
297 fn finalize(&mut self) -> Vec<RuleViolation> {
298 let mut violations = Vec::new();
299 let config = &self.context.config.linters.settings.link_fragments;
300
301 let ignored_regex = if !config.ignored_pattern.is_empty() {
303 Regex::new(&config.ignored_pattern).ok()
304 } else {
305 None
306 };
307
308 for link_fragment in &self.link_fragments {
309 let fragment = &link_fragment.fragment;
310 let mut is_valid = false;
311
312 if self.is_github_special_fragment(fragment) {
314 is_valid = true;
315 }
316
317 if !is_valid {
319 if let Some(ref regex) = ignored_regex {
320 if regex.is_match(fragment) {
321 is_valid = true;
322 }
323 }
324 }
325
326 if !is_valid {
328 if config.ignore_case {
329 let fragment_lower = fragment.to_lowercase();
330 is_valid = self.valid_fragments_lowercase.contains(&fragment_lower);
331 } else {
332 is_valid = self.valid_fragments.contains(fragment);
333 }
334 }
335
336 if !is_valid {
337 violations.push(RuleViolation::new(
338 &MD051,
339 format!("Link fragment '{fragment}' does not match any heading or anchor in the document"),
340 self.context.file_path.clone(),
341 range_from_tree_sitter(&link_fragment.range),
342 ));
343 }
344 }
345
346 violations
347 }
348}
349
350pub const MD051: Rule = Rule {
351 id: "MD051",
352 alias: "link-fragments",
353 tags: &["links"],
354 description: "Link fragments should be valid",
355 rule_type: RuleType::Document,
356 required_nodes: &["link", "atx_heading", "setext_heading"],
357 new_linter: |context| Box::new(MD051Linter::new(context)),
358};
359
360#[cfg(test)]
361mod test {
362 use std::path::PathBuf;
363
364 use crate::config::{LintersSettingsTable, MD051LinkFragmentsTable, RuleSeverity};
365 use crate::linter::MultiRuleLinter;
366 use crate::test_utils::test_helpers::test_config_with_rules;
367
368 fn test_config() -> crate::config::QuickmarkConfig {
369 test_config_with_rules(vec![("link-fragments", RuleSeverity::Error)])
370 }
371
372 fn test_config_with_settings(
373 ignore_case: bool,
374 ignored_pattern: String,
375 ) -> crate::config::QuickmarkConfig {
376 crate::test_utils::test_helpers::test_config_with_settings(
377 vec![("link-fragments", RuleSeverity::Error)],
378 LintersSettingsTable {
379 link_fragments: MD051LinkFragmentsTable {
380 ignore_case,
381 ignored_pattern,
382 },
383 ..Default::default()
384 },
385 )
386 }
387
388 #[test]
389 fn test_basic_valid_fragment() {
390 let input = "# Test Heading
391
392[Valid Link](#test-heading)
393";
394
395 let config = test_config();
396 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
397 let violations = linter.analyze();
398
399 assert_eq!(0, violations.len());
401 }
402
403 #[test]
404 fn test_basic_invalid_fragment() {
405 let input = "# Test Heading
406
407[Invalid Link](#nonexistent-heading)
408";
409
410 let config = test_config();
411 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
412 let violations = linter.analyze();
413
414 assert_eq!(1, violations.len());
416 }
417
418 #[test]
419 fn test_case_sensitive_default() {
420 let input = "# Test Heading
421
422[Invalid Link](#Test-Heading)
423";
424
425 let config = test_config();
426 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
427 let violations = linter.analyze();
428
429 assert_eq!(1, violations.len());
431 }
432
433 #[test]
434 fn test_ignore_case_option() {
435 let input = "# Test Heading
436
437[Valid Link](#Test-Heading)
438";
439
440 let config = test_config_with_settings(true, String::new());
441 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
442 let violations = linter.analyze();
443
444 assert_eq!(0, violations.len());
446 }
447
448 #[test]
449 fn test_punctuation_removal() {
450 let input = "# Test: Heading! With? Punctuation.
451
452[Valid Link](#test-heading-with-punctuation)
453";
454
455 let config = test_config();
456 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
457 let violations = linter.analyze();
458
459 assert_eq!(0, violations.len());
461 }
462
463 #[test]
464 fn test_duplicate_headings() {
465 let input = "# Test Heading
466
467## Test Heading
468
469[Link 1](#test-heading)
470[Link 2](#test-heading-1)
471";
472
473 let config = test_config();
474 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
475 let violations = linter.analyze();
476
477 assert_eq!(0, violations.len());
479 }
480
481 #[test]
482 fn test_custom_anchor() {
483 let input = "# Test Heading {#custom-anchor}
484
485[Valid Link](#custom-anchor)
486";
487
488 let config = test_config();
489 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
490 let violations = linter.analyze();
491
492 assert_eq!(0, violations.len());
494 }
495
496 #[test]
497 fn test_html_id_attribute() {
498 let input = "# Test Heading
499
500<div id=\"my-custom-id\">Content</div>
501
502[Valid Link](#my-custom-id)
503";
504
505 let config = test_config();
506 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
507 let violations = linter.analyze();
508
509 assert_eq!(0, violations.len());
511 }
512
513 #[test]
514 fn test_html_name_attribute() {
515 let input = "# Test Heading
516
517<a name=\"my-anchor\">Anchor</a>
518
519[Valid Link](#my-anchor)
520";
521
522 let config = test_config();
523 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
524 let violations = linter.analyze();
525
526 assert_eq!(0, violations.len());
528 }
529
530 #[test]
531 fn test_ignored_pattern() {
532 let input = "# Test Heading
533
534[Link to external](#external-fragment)
535";
536
537 let config = test_config_with_settings(false, "external-.*".to_string());
538 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
539 let violations = linter.analyze();
540
541 assert_eq!(0, violations.len());
543 }
544
545 #[test]
546 fn test_github_special_fragments() {
547 let input = "# Test Heading
548
549[Link to top](#top)
550[Link to line](#L20)
551[Link to range](#L19C5-L21C11)
552[Invalid range](#L10-L20)
553";
554
555 let config = test_config();
556 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
557 let violations = linter.analyze();
558
559 assert_eq!(1, violations.len());
561 assert!(violations[0].message().contains("Link fragment 'L10-L20'"));
562 }
563
564 #[test]
565 fn test_multiple_violations() {
566 let input = "# Valid Heading
567
568[Valid Link](#valid-heading)
569[Invalid Link 1](#invalid-one)
570[Invalid Link 2](#invalid-two)
571";
572
573 let config = test_config();
574 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
575 let violations = linter.analyze();
576
577 assert_eq!(2, violations.len());
579 }
580
581 #[test]
582 fn test_setext_headings() {
583 let input = "Test Heading
584============
585
586Another Heading
587---------------
588
589[Valid Link 1](#test-heading)
590[Valid Link 2](#another-heading)
591";
592
593 let config = test_config();
594 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
595 let violations = linter.analyze();
596
597 assert_eq!(0, violations.len());
599 }
600
601 #[test]
602 fn test_edge_cases_for_consistency() {
603 let input = "# Test Heading
604
605[Valid link](#test-heading)
606[Fragment with spaces](#test heading)
607[Empty fragment](#)
608[Invalid single L](#L)
609[Valid L with number](#L123)
610";
611
612 let config = test_config();
613 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
614 let violations = linter.analyze();
615
616 assert_eq!(1, violations.len());
619 assert!(violations[0].message().contains("Link fragment 'L'"));
620 }
621}