1use once_cell::sync::Lazy;
2use regex::Regex;
3use serde::Deserialize;
4use std::collections::{HashMap, HashSet};
5use std::rc::Rc;
6use tree_sitter::Node;
7
8use crate::{
9 linter::{range_from_tree_sitter, RuleViolation},
10 rules::{Context, Rule, RuleLinter, RuleType},
11};
12
13#[derive(Debug, PartialEq, Clone, Deserialize)]
15pub struct MD053LinkImageReferenceDefinitionsTable {
16 #[serde(default)]
17 pub ignored_definitions: Vec<String>,
18}
19
20impl Default for MD053LinkImageReferenceDefinitionsTable {
21 fn default() -> Self {
22 Self {
23 ignored_definitions: vec!["//".to_string()],
24 }
25 }
26}
27
28static FULL_REFERENCE_PATTERN: Lazy<Regex> =
30 Lazy::new(|| Regex::new(r"\[([^\]]*)\]\[([^\]]*)\]").unwrap());
31
32static COLLAPSED_REFERENCE_PATTERN: Lazy<Regex> =
33 Lazy::new(|| Regex::new(r"\[([^\]]+)\]\[\]").unwrap());
34
35static SHORTCUT_REFERENCE_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"\[([^\]]+)\]").unwrap());
36
37static REFERENCE_DEFINITION_PATTERN: Lazy<Regex> =
38 Lazy::new(|| Regex::new(r"(?m)^\s*\[([^\]]+)\]:\s*").unwrap());
39
40#[derive(Debug, Clone)]
41struct ReferenceDefinition {
42 label: String,
43 range: tree_sitter::Range,
44}
45
46pub(crate) struct MD053Linter {
47 context: Rc<Context>,
48 definitions: HashMap<String, Vec<ReferenceDefinition>>, references: HashSet<String>, }
51
52impl MD053Linter {
53 pub fn new(context: Rc<Context>) -> Self {
54 Self {
55 context,
56 definitions: HashMap::new(),
57 references: HashSet::new(),
58 }
59 }
60
61 fn normalize_reference(&self, label: &str) -> String {
62 let mut result = String::with_capacity(label.len());
67 let mut prev_was_space = false;
68
69 for ch in label.chars() {
70 if ch.is_whitespace() {
71 if !prev_was_space && !result.is_empty() {
72 result.push(' ');
73 prev_was_space = true;
74 }
75 } else {
76 result.push(ch.to_lowercase().next().unwrap_or(ch));
77 prev_was_space = false;
78 }
79 }
80
81 if result.ends_with(' ') {
83 result.pop();
84 }
85
86 result
87 }
88
89 fn extract_reference_definition(&self, node: &Node) -> Vec<ReferenceDefinition> {
90 let start_byte = node.start_byte();
93 let end_byte = node.end_byte();
94 let document_content = self.context.document_content.borrow();
95 let content = &document_content[start_byte..end_byte];
96
97 REFERENCE_DEFINITION_PATTERN
98 .captures_iter(content)
99 .filter_map(|cap| {
100 cap.get(1).map(|label| {
101 let normalized_label = self.normalize_reference(label.as_str());
102 ReferenceDefinition {
103 label: normalized_label,
104 range: node.range(),
105 }
106 })
107 })
108 .collect()
109 }
110
111 fn extract_reference_links(&self, node: &Node) -> Vec<String> {
112 let start_byte = node.start_byte();
117 let end_byte = node.end_byte();
118 let document_content = self.context.document_content.borrow();
119 let content = &document_content[start_byte..end_byte];
120
121 let mut links = Vec::new();
122
123 if content.contains('(') && content.contains(')') {
125 return links; }
127
128 for cap in FULL_REFERENCE_PATTERN.captures_iter(content) {
130 if let Some(label) = cap.get(2) {
131 let label_str = label.as_str();
132 if !label_str.is_empty() {
133 links.push(self.normalize_reference(label_str));
134 }
135 }
136 }
137
138 for cap in COLLAPSED_REFERENCE_PATTERN.captures_iter(content) {
140 if let Some(label) = cap.get(1) {
141 links.push(self.normalize_reference(label.as_str()));
142 }
143 }
144
145 let mut shortcut_candidates = Vec::new();
148 for cap in SHORTCUT_REFERENCE_PATTERN.captures_iter(content) {
149 if let Some(label) = cap.get(1) {
150 let full_match = cap.get(0).expect("regex match should have group 0");
151 let start = full_match.start();
152 let end = full_match.end();
153 let remaining = &content[end..];
154
155 let immediately_followed_by_bracket = remaining.starts_with('[');
158 if !immediately_followed_by_bracket {
159 shortcut_candidates.push((
160 start,
161 end,
162 self.normalize_reference(label.as_str()),
163 ));
164 }
165 }
166 }
167
168 let mut existing_labels: HashSet<String> = links.iter().cloned().collect();
171 for (_start, _end, normalized_label) in shortcut_candidates {
172 if !existing_labels.contains(&normalized_label) {
176 existing_labels.insert(normalized_label.clone());
177 links.push(normalized_label);
178 }
179 }
180
181 links
182 }
183}
184
185impl RuleLinter for MD053Linter {
186 fn feed(&mut self, node: &Node) {
187 match node.kind() {
188 "link_reference_definition" => {
190 let definitions = self.extract_reference_definition(node);
191 for definition in definitions {
192 self.definitions
193 .entry(definition.label.clone())
194 .or_default()
195 .push(definition);
196 }
197 }
198 "paragraph" => {
200 let links = self.extract_reference_links(node);
201 for link in links {
202 self.references.insert(link);
203 }
204 }
205 "link" | "image" => {
207 let links = self.extract_reference_links(node);
208 for link in links {
209 self.references.insert(link);
210 }
211 }
212 _ => {
213 }
215 }
216 }
217
218 fn finalize(&mut self) -> Vec<RuleViolation> {
219 let mut violations = Vec::new();
220 let config = &self
221 .context
222 .config
223 .linters
224 .settings
225 .link_image_reference_definitions;
226 let ignored_definitions: HashSet<String> = config
227 .ignored_definitions
228 .iter()
229 .map(|label| self.normalize_reference(label))
230 .collect();
231
232 for (label, definitions) in &self.definitions {
234 if ignored_definitions.contains(label) {
236 continue;
237 }
238
239 let is_unused = !self.references.contains(label);
241
242 if definitions.len() > 1 {
243 if is_unused {
245 let first_def = &definitions[0];
247 violations.push(RuleViolation::new(
248 &MD053,
249 format!(
250 "Unused link or image reference definition: \"{}\"",
251 first_def.label
252 ),
253 self.context.file_path.clone(),
254 range_from_tree_sitter(&first_def.range),
255 ));
256 }
257 for definition in &definitions[1..] {
259 violations.push(RuleViolation::new(
260 &MD053,
261 format!(
262 "Duplicate link or image reference definition: \"{}\"",
263 definition.label
264 ),
265 self.context.file_path.clone(),
266 range_from_tree_sitter(&definition.range),
267 ));
268 }
269 } else if is_unused {
270 let def = &definitions[0];
272 violations.push(RuleViolation::new(
273 &MD053,
274 format!(
275 "Unused link or image reference definition: \"{}\"",
276 def.label
277 ),
278 self.context.file_path.clone(),
279 range_from_tree_sitter(&def.range),
280 ));
281 }
282 }
283
284 violations
285 }
286}
287
288pub const MD053: Rule = Rule {
289 id: "MD053",
290 alias: "link-image-reference-definitions",
291 tags: &["links", "images"],
292 description: "Link and image reference definitions should be needed",
293 rule_type: RuleType::Document,
294 required_nodes: &["link", "image", "paragraph", "link_reference_definition"],
295 new_linter: |context| Box::new(MD053Linter::new(context)),
296};
297
298#[cfg(test)]
299mod test {
300 use std::path::PathBuf;
301
302 use crate::config::{
303 LintersSettingsTable, MD053LinkImageReferenceDefinitionsTable, RuleSeverity,
304 };
305 use crate::linter::MultiRuleLinter;
306 use crate::test_utils::test_helpers::test_config_with_rules;
307
308 fn test_config() -> crate::config::QuickmarkConfig {
309 test_config_with_rules(vec![(
310 "link-image-reference-definitions",
311 RuleSeverity::Error,
312 )])
313 }
314
315 fn test_config_with_ignored_definitions(
316 ignored_definitions: Vec<String>,
317 ) -> crate::config::QuickmarkConfig {
318 crate::test_utils::test_helpers::test_config_with_settings(
319 vec![("link-image-reference-definitions", RuleSeverity::Error)],
320 LintersSettingsTable {
321 link_image_reference_definitions: MD053LinkImageReferenceDefinitionsTable {
322 ignored_definitions,
323 },
324 ..Default::default()
325 },
326 )
327 }
328
329 #[test]
330 fn test_unused_definition_basic() {
331 let input = "[unused]: https://example.com
332
333Some text.
334";
335
336 let config = test_config();
337 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
338 let violations = linter.analyze();
339
340 assert_eq!(1, violations.len());
342 assert!(violations[0]
343 .message()
344 .contains("Unused link or image reference definition: \"unused\""));
345 }
346
347 #[test]
348 fn test_used_definition_basic() {
349 let input = "[label]: https://example.com
350
351[Good link][label]
352";
353
354 let config = test_config();
355 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
356 let violations = linter.analyze();
357
358 assert_eq!(0, violations.len());
360 }
361
362 #[test]
363 fn test_duplicate_definitions() {
364 let input = "[label]: https://example.com/1
365[label]: https://example.com/2
366
367[Good link][label]
368";
369
370 let config = test_config();
371 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
372 let violations = linter.analyze();
373
374 assert_eq!(1, violations.len());
376 assert!(violations[0]
377 .message()
378 .contains("Duplicate link or image reference definition: \"label\""));
379 }
380
381 #[test]
382 fn test_unused_and_duplicate() {
383 let input = "[unused1]: https://example.com/1
384[unused2]: https://example.com/2
385[duplicate]: https://example.com/3
386[duplicate]: https://example.com/4
387
388Some text.
389";
390
391 let config = test_config();
392 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
393 let violations = linter.analyze();
394
395 assert_eq!(4, violations.len());
397
398 let messages: Vec<&str> = violations.iter().map(|v| v.message()).collect();
400 let unused_count = messages.iter().filter(|m| m.contains("Unused")).count();
401 let duplicate_count = messages.iter().filter(|m| m.contains("Duplicate")).count();
402
403 assert_eq!(3, unused_count); assert_eq!(1, duplicate_count); }
406
407 #[test]
408 fn test_collapsed_reference_format() {
409 let input = "[label]: https://example.com
410
411[label][]
412";
413
414 let config = test_config();
415 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
416 let violations = linter.analyze();
417
418 assert_eq!(0, violations.len());
420 }
421
422 #[test]
423 fn test_shortcut_reference_format() {
424 let input = "[label]: https://example.com
425
426[label]
427";
428
429 let config = test_config();
430 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
431 let violations = linter.analyze();
432
433 assert_eq!(0, violations.len());
435 }
436
437 #[test]
438 fn test_image_references() {
439 let input = "[image]: https://example.com/image.png
440[unused-image]: https://example.com/unused.png
441
442![Alt text][image]
443";
444
445 let config = test_config();
446 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
447 let violations = linter.analyze();
448
449 assert_eq!(1, violations.len());
451 assert!(violations[0]
452 .message()
453 .contains("Unused link or image reference definition: \"unused-image\""));
454 }
455
456 #[test]
457 fn test_case_insensitive_matching() {
458 let input = "[Label]: https://example.com
459
460[Good link][LABEL]
461";
462
463 let config = test_config();
464 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
465 let violations = linter.analyze();
466
467 assert_eq!(0, violations.len());
469 }
470
471 #[test]
472 fn test_whitespace_normalization() {
473 let input = "[ label with spaces ]: https://example.com
474
475[Good link][label with spaces]
476";
477
478 let config = test_config();
479 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
480 let violations = linter.analyze();
481
482 assert_eq!(0, violations.len());
484 }
485
486 #[test]
487 fn test_ignored_definitions_default() {
488 let input = "[//]: # (This is a comment)
489[unused]: https://example.com
490
491Some text.
492";
493
494 let config = test_config();
495 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
496 let violations = linter.analyze();
497
498 assert_eq!(1, violations.len());
500 assert!(violations[0]
501 .message()
502 .contains("Unused link or image reference definition: \"unused\""));
503 }
504
505 #[test]
506 fn test_custom_ignored_definitions() {
507 let input = "[custom]: https://example.com
508[another]: https://example.com
509[regular]: https://example.com
510
511[Good link][regular]
512";
513
514 let config =
515 test_config_with_ignored_definitions(vec!["custom".to_string(), "another".to_string()]);
516 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
517 let violations = linter.analyze();
518
519 assert_eq!(0, violations.len());
521 }
522
523 #[test]
524 fn test_mixed_scenarios_comprehensive() {
525 let input = "[used-full]: https://example.com/1
526[used-collapsed]: https://example.com/2
527[used-shortcut]: https://example.com/3
528[unused]: https://example.com/4
529[duplicate-used]: https://example.com/5
530[duplicate-used]: https://example.com/6
531[duplicate-unused]: https://example.com/7
532[duplicate-unused]: https://example.com/8
533[//]: # (Ignored comment)
534
535[Link 1][used-full]
536[used-collapsed][]
537[used-shortcut]
538[Link 2][duplicate-used]
539";
540
541 let config = test_config();
542 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
543 let violations = linter.analyze();
544
545 assert_eq!(4, violations.len());
551
552 let messages: Vec<&str> = violations.iter().map(|v| v.message()).collect();
553 let unused_count = messages.iter().filter(|m| m.contains("Unused")).count();
554 let duplicate_count = messages.iter().filter(|m| m.contains("Duplicate")).count();
555
556 assert_eq!(2, unused_count); assert_eq!(2, duplicate_count); }
559
560 #[test]
561 fn test_inline_links_ignored() {
562 let input = "[unused]: https://example.com
563
564[Inline link](https://example.com) and [another](https://example.com).
565";
566
567 let config = test_config();
568 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
569 let violations = linter.analyze();
570
571 assert_eq!(1, violations.len());
573 assert!(violations[0]
574 .message()
575 .contains("Unused link or image reference definition: \"unused\""));
576 }
577}