1use once_cell::sync::Lazy;
2use regex::Regex;
3use serde::Deserialize;
4use std::collections::HashSet;
5use std::rc::Rc;
6use tree_sitter::Node;
7
8use crate::{
9 linter::{range_from_tree_sitter, RuleViolation},
10 rules::{Context, Rule, RuleLinter, RuleType},
11};
12
13#[derive(Debug, PartialEq, Clone, Deserialize)]
15pub struct MD052ReferenceLinksImagesTable {
16 #[serde(default)]
17 pub shortcut_syntax: bool,
18 #[serde(default)]
19 pub ignored_labels: Vec<String>,
20}
21
22impl Default for MD052ReferenceLinksImagesTable {
23 fn default() -> Self {
24 Self {
25 shortcut_syntax: false,
26 ignored_labels: vec!["x".to_string()],
27 }
28 }
29}
30
31static FULL_REFERENCE_PATTERN: Lazy<Regex> =
33 Lazy::new(|| Regex::new(r"\[([^\]]*)\]\[([^\]]*)\]").unwrap());
34
35static COLLAPSED_REFERENCE_PATTERN: Lazy<Regex> =
36 Lazy::new(|| Regex::new(r"\[([^\]]+)\]\[\]").unwrap());
37
38static SHORTCUT_REFERENCE_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"\[([^\]]+)\]").unwrap());
39
40static REFERENCE_DEFINITION_PATTERN: Lazy<Regex> =
41 Lazy::new(|| Regex::new(r"(?m)^\s*\[([^\]]+)\]:\s*").unwrap());
42
43#[derive(Debug, Clone)]
44struct ReferenceLink {
45 label: String,
46 range: tree_sitter::Range,
47 is_shortcut: bool,
48}
49
50pub(crate) struct MD052Linter {
51 context: Rc<Context>,
52 definitions: HashSet<String>,
53 references: Vec<ReferenceLink>,
54}
55
56impl MD052Linter {
57 pub fn new(context: Rc<Context>) -> Self {
58 Self {
59 context,
60 definitions: HashSet::new(),
61 references: Vec::new(),
62 }
63 }
64
65 fn normalize_reference(&self, label: &str) -> String {
66 label
71 .to_lowercase()
72 .split_whitespace()
73 .collect::<Vec<_>>()
74 .join(" ")
75 }
76
77 fn extract_reference_definition(&self, node: &Node) -> Vec<String> {
78 let start_byte = node.start_byte();
81 let end_byte = node.end_byte();
82 let document_content = self.context.document_content.borrow();
83 let content = &document_content[start_byte..end_byte];
84
85 let mut definitions = Vec::new();
86 for cap in REFERENCE_DEFINITION_PATTERN.captures_iter(content) {
87 if let Some(label) = cap.get(1) {
88 definitions.push(self.normalize_reference(label.as_str()));
89 }
90 }
91 definitions
92 }
93
94 fn extract_reference_links(&self, node: &Node) -> Vec<(String, bool)> {
95 let start_byte = node.start_byte();
100 let end_byte = node.end_byte();
101 let document_content = self.context.document_content.borrow();
102 let content = &document_content[start_byte..end_byte];
103
104 let mut links = Vec::new();
105
106 if content.contains('(') && content.contains(')') {
108 return links; }
110
111 for cap in FULL_REFERENCE_PATTERN.captures_iter(content) {
113 if let Some(label) = cap.get(2) {
114 let label_str = label.as_str();
115 if !label_str.is_empty() {
116 links.push((self.normalize_reference(label_str), false));
117 }
118 }
119 }
120
121 for cap in COLLAPSED_REFERENCE_PATTERN.captures_iter(content) {
123 if let Some(label) = cap.get(1) {
124 links.push((self.normalize_reference(label.as_str()), false));
125 }
126 }
127
128 if links.is_empty() {
130 for cap in SHORTCUT_REFERENCE_PATTERN.captures_iter(content) {
131 if let Some(label) = cap.get(1) {
132 let match_end = cap.get(0).unwrap().end();
135 let remaining = &content[match_end..];
136 if !remaining.trim_start().starts_with('[') {
137 links.push((self.normalize_reference(label.as_str()), true));
138 }
139 }
140 }
141 }
142
143 links
144 }
145}
146
147impl RuleLinter for MD052Linter {
148 fn feed(&mut self, node: &Node) {
149 match node.kind() {
150 "paragraph" => {
152 let definitions = self.extract_reference_definition(node);
153 for definition in definitions {
154 self.definitions.insert(definition);
155 }
156
157 let links = self.extract_reference_links(node);
159 for (label, is_shortcut) in links {
160 self.references.push(ReferenceLink {
161 label,
162 range: node.range(),
163 is_shortcut,
164 });
165 }
166 }
167 "link" | "image" => {
169 let links = self.extract_reference_links(node);
170 for (label, is_shortcut) in links {
171 self.references.push(ReferenceLink {
172 label,
173 range: node.range(),
174 is_shortcut,
175 });
176 }
177 }
178 _ => {
179 let definitions = self.extract_reference_definition(node);
181 for definition in definitions {
182 self.definitions.insert(definition);
183 }
184 }
185 }
186 }
187
188 fn finalize(&mut self) -> Vec<RuleViolation> {
189 let mut violations = Vec::new();
190 let config = &self.context.config.linters.settings.reference_links_images;
191 let ignored_labels: HashSet<String> = config
192 .ignored_labels
193 .iter()
194 .map(|label| self.normalize_reference(label))
195 .collect();
196
197 for reference in &self.references {
198 if reference.is_shortcut && !config.shortcut_syntax {
200 continue;
201 }
202
203 let normalized_label = self.normalize_reference(&reference.label);
204
205 if ignored_labels.contains(&normalized_label) {
207 continue;
208 }
209
210 if !self.definitions.contains(&normalized_label) {
212 violations.push(RuleViolation::new(
213 &MD052,
214 format!(
215 "Missing link or image reference definition: \"{}\"",
216 reference.label
217 ),
218 self.context.file_path.clone(),
219 range_from_tree_sitter(&reference.range),
220 ));
221 }
222 }
223
224 violations
225 }
226}
227
228pub const MD052: Rule = Rule {
229 id: "MD052",
230 alias: "reference-links-images",
231 tags: &["links", "images"],
232 description: "Reference links and images should use a label that is defined",
233 rule_type: RuleType::Document,
234 required_nodes: &["link", "image", "paragraph"],
235 new_linter: |context| Box::new(MD052Linter::new(context)),
236};
237
238#[cfg(test)]
239mod test {
240 use std::path::PathBuf;
241
242 use crate::config::{LintersSettingsTable, MD052ReferenceLinksImagesTable, RuleSeverity};
243 use crate::linter::MultiRuleLinter;
244 use crate::test_utils::test_helpers::test_config_with_rules;
245
246 fn test_config() -> crate::config::QuickmarkConfig {
247 test_config_with_rules(vec![("reference-links-images", RuleSeverity::Error)])
248 }
249
250 fn test_config_with_settings(
251 shortcut_syntax: bool,
252 ignored_labels: Vec<String>,
253 ) -> crate::config::QuickmarkConfig {
254 crate::test_utils::test_helpers::test_config_with_settings(
255 vec![("reference-links-images", RuleSeverity::Error)],
256 LintersSettingsTable {
257 reference_links_images: MD052ReferenceLinksImagesTable {
258 shortcut_syntax,
259 ignored_labels,
260 },
261 ..Default::default()
262 },
263 )
264 }
265
266 #[test]
267 fn test_valid_full_reference() {
268 let input = "[Good link][label]
269
270[label]: https://example.com
271";
272
273 let config = test_config();
274 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
275 let violations = linter.analyze();
276
277 assert_eq!(0, violations.len());
279 }
280
281 #[test]
282 fn test_invalid_full_reference() {
283 let input = "[Bad link][missing]
284
285[label]: https://example.com
286";
287
288 let config = test_config();
289 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
290 let violations = linter.analyze();
291
292 assert_eq!(1, violations.len());
294 assert!(violations[0]
295 .message()
296 .contains("Missing link or image reference definition: \"missing\""));
297 }
298
299 #[test]
300 fn test_valid_collapsed_reference() {
301 let input = "[label][]
302
303[label]: https://example.com
304";
305
306 let config = test_config();
307 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
308 let violations = linter.analyze();
309
310 assert_eq!(0, violations.len());
312 }
313
314 #[test]
315 fn test_invalid_collapsed_reference() {
316 let input = "[missing][]
317
318[label]: https://example.com
319";
320
321 let config = test_config();
322 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
323 let violations = linter.analyze();
324
325 assert_eq!(1, violations.len());
327 assert!(violations[0]
328 .message()
329 .contains("Missing link or image reference definition: \"missing\""));
330 }
331
332 #[test]
333 fn test_shortcut_syntax_disabled_by_default() {
334 let input = "[undefined]
335
336[label]: https://example.com
337";
338
339 let config = test_config();
340 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
341 let violations = linter.analyze();
342
343 assert_eq!(0, violations.len());
345 }
346
347 #[test]
348 fn test_shortcut_syntax_enabled() {
349 let input = "[undefined]
350
351[label]: https://example.com
352";
353
354 let config = test_config_with_settings(true, vec!["x".to_string()]);
355 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
356 let violations = linter.analyze();
357
358 assert_eq!(1, violations.len());
360 assert!(violations[0]
361 .message()
362 .contains("Missing link or image reference definition: \"undefined\""));
363 }
364
365 #[test]
366 fn test_valid_shortcut_syntax_enabled() {
367 let input = "[label]
368
369[label]: https://example.com
370";
371
372 let config = test_config_with_settings(true, vec!["x".to_string()]);
373 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
374 let violations = linter.analyze();
375
376 assert_eq!(0, violations.len());
378 }
379
380 #[test]
381 fn test_ignored_labels_default_x() {
382 let input = "[x] Task item
383
384[label]: https://example.com
385";
386
387 let config = test_config();
388 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
389 let violations = linter.analyze();
390
391 assert_eq!(0, violations.len());
393 }
394
395 #[test]
396 fn test_custom_ignored_labels() {
397 let input = "[custom] Some text
398[another] More text
399
400[label]: https://example.com
401";
402
403 let config =
404 test_config_with_settings(true, vec!["custom".to_string(), "another".to_string()]);
405 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
406 let violations = linter.analyze();
407
408 assert_eq!(0, violations.len());
410 }
411
412 #[test]
413 fn test_case_insensitive_matching() {
414 let input = "[Good Link][LABEL]
415
416[label]: https://example.com
417";
418
419 let config = test_config();
420 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
421 let violations = linter.analyze();
422
423 assert_eq!(0, violations.len());
425 }
426
427 #[test]
428 fn test_whitespace_normalization() {
429 let input = "[Good Link][ label with spaces ]
430
431[label with spaces]: https://example.com
432";
433
434 let config = test_config();
435 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
436 let violations = linter.analyze();
437
438 assert_eq!(0, violations.len());
440 }
441
442 #[test]
443 fn test_images_full_reference() {
444 let input = "![Alt text][image]
445
446[image]: https://example.com/image.png
447";
448
449 let config = test_config();
450 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
451 let violations = linter.analyze();
452
453 assert_eq!(0, violations.len());
455 }
456
457 #[test]
458 fn test_images_invalid_reference() {
459 let input = "![Alt text][missing]
460
461[image]: https://example.com/image.png
462";
463
464 let config = test_config();
465 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
466 let violations = linter.analyze();
467
468 assert_eq!(1, violations.len());
470 assert!(violations[0]
471 .message()
472 .contains("Missing link or image reference definition: \"missing\""));
473 }
474
475 #[test]
476 fn test_multiple_violations() {
477 let input = "[Bad link][missing1]
478[Another bad][missing2]
479[Good link][valid]
480
481[valid]: https://example.com
482";
483
484 let config = test_config();
485 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
486 let violations = linter.analyze();
487
488 assert_eq!(2, violations.len());
490 }
491
492 #[test]
493 fn test_mixed_link_types() {
494 let input = "[Full][label1]
495[Collapsed][]
496[Shortcut]
497![Image][image1]
498![Collapsed image][]
499
500[label1]: https://example.com/1
501[collapsed]: https://example.com/2
502[shortcut]: https://example.com/3
503[image1]: https://example.com/image1.png
504[collapsed image]: https://example.com/image2.png
505";
506
507 let config = test_config();
508 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
509 let violations = linter.analyze();
510
511 assert_eq!(0, violations.len());
513 }
514
515 #[test]
516 fn test_duplicate_definitions() {
517 let input = "[Good link][label]
518
519[label]: https://example.com/1
520[label]: https://example.com/2
521";
522
523 let config = test_config();
524 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
525 let violations = linter.analyze();
526
527 assert_eq!(0, violations.len());
529 }
530}