mdbook_lint_core/rules/standard/
md052.rs1use crate::error::Result;
26use crate::{
27 Document, Violation,
28 rule::{Rule, RuleCategory, RuleMetadata},
29 violation::Severity,
30};
31use comrak::nodes::AstNode;
32use std::collections::HashSet;
33
34pub struct MD052 {
36 ignored_labels: Vec<String>,
37 #[allow(dead_code)]
38 shortcut_syntax: bool,
39}
40
41impl Default for MD052 {
42 fn default() -> Self {
43 Self::new()
44 }
45}
46
47impl MD052 {
48 pub fn new() -> Self {
50 Self {
51 ignored_labels: vec!["x".to_string()], shortcut_syntax: false,
53 }
54 }
55
56 #[allow(dead_code)]
58 pub fn ignored_labels(mut self, labels: Vec<String>) -> Self {
59 self.ignored_labels = labels;
60 self
61 }
62
63 #[allow(dead_code)]
65 pub fn shortcut_syntax(mut self, include: bool) -> Self {
66 self.shortcut_syntax = include;
67 self
68 }
69
70 fn collect_defined_labels(&self, document: &Document) -> HashSet<String> {
72 let mut definitions = HashSet::new();
73 let mut parser = RefDefParser::new(document.content.as_bytes());
74
75 while let Some(def) = parser.next_definition() {
76 definitions.insert(def.label.to_lowercase());
77 }
78
79 definitions
80 }
81
82 fn check_reference_labels(&self, document: &Document) -> Vec<Violation> {
84 let mut violations = Vec::new();
85 let defined_labels = self.collect_defined_labels(document);
86 let mut parser = LinkParser::new(document.content.as_bytes());
87
88 while let Some(link) = parser.next_link() {
89 match link {
90 LinkType::Reference {
91 label,
92 line,
93 column,
94 } => {
95 let label_lower = label.to_lowercase();
96 if !self.ignored_labels.contains(&label_lower)
97 && !defined_labels.contains(&label_lower)
98 {
99 violations.push(self.create_violation(
100 format!("Reference link uses undefined label '{label}'"),
101 line,
102 column,
103 Severity::Error,
104 ));
105 }
106 }
107 LinkType::Image {
108 label,
109 line,
110 column,
111 } => {
112 let label_lower = label.to_lowercase();
113 if !self.ignored_labels.contains(&label_lower)
114 && !defined_labels.contains(&label_lower)
115 {
116 violations.push(self.create_violation(
117 format!("Reference image uses undefined label '{label}'"),
118 line,
119 column,
120 Severity::Error,
121 ));
122 }
123 }
124 _ => {} }
126 }
127
128 violations
129 }
130}
131
132impl Rule for MD052 {
133 fn id(&self) -> &'static str {
134 "MD052"
135 }
136
137 fn name(&self) -> &'static str {
138 "reference-links-images"
139 }
140
141 fn description(&self) -> &'static str {
142 "Reference links and images should use a label that is defined"
143 }
144
145 fn metadata(&self) -> RuleMetadata {
146 RuleMetadata::stable(RuleCategory::Links)
147 }
148
149 fn check_with_ast<'a>(
150 &self,
151 document: &Document,
152 _ast: Option<&'a AstNode<'a>>,
153 ) -> Result<Vec<Violation>> {
154 let violations = self.check_reference_labels(document);
156 Ok(violations)
157 }
158}
159
160#[derive(Debug)]
162struct RefDefinition {
163 label: String,
164}
165
166struct RefDefParser<'a> {
168 input: &'a [u8],
169 pos: usize,
170 line: usize,
171}
172
173impl<'a> RefDefParser<'a> {
174 fn new(input: &'a [u8]) -> Self {
175 Self {
176 input,
177 pos: 0,
178 line: 1,
179 }
180 }
181
182 fn next_definition(&mut self) -> Option<RefDefinition> {
183 while self.pos < self.input.len() {
184 self.skip_whitespace();
186
187 if self.pos >= self.input.len() {
188 break;
189 }
190
191 if self.current_byte() == Some(b'[') {
193 if let Some(def) = self.try_parse_definition() {
194 return Some(def);
195 } else {
196 self.pos += 1;
198 }
199 } else {
200 self.skip_to_next_line();
202 }
203 }
204 None
205 }
206
207 fn try_parse_definition(&mut self) -> Option<RefDefinition> {
208 let start_pos = self.pos;
209
210 self.pos += 1;
212
213 let label = self.parse_ref_label()?;
215
216 if self.current_byte() != Some(b']') {
218 self.pos = start_pos;
219 return None;
220 }
221 self.pos += 1;
222
223 if self.current_byte() != Some(b':') {
225 self.pos = start_pos;
226 return None;
227 }
228 self.pos += 1;
229
230 if let Some(ch) = self.current_byte()
232 && ch != b' '
233 && ch != b'\t'
234 && ch != b'\n'
235 && ch != b'\r'
236 {
237 self.pos = start_pos;
238 return None;
239 }
240
241 Some(RefDefinition { label })
242 }
243
244 fn parse_ref_label(&mut self) -> Option<String> {
245 let mut label = String::new();
246 let mut has_content = false;
247
248 while let Some(ch) = self.current_byte() {
249 match ch {
250 b']' => {
251 if has_content {
252 return Some(label);
253 } else {
254 return None; }
256 }
257 b'\n' | b'\r' => return None, _ => {
259 label.push(ch as char);
260 has_content = true;
261 self.pos += 1;
262 }
263 }
264 }
265 None
266 }
267
268 fn skip_to_next_line(&mut self) {
269 while let Some(ch) = self.current_byte() {
270 self.pos += 1;
271 if ch == b'\n' {
272 self.line += 1;
273 break;
274 }
275 }
276 }
277
278 fn skip_whitespace(&mut self) {
279 while self.pos < self.input.len() {
280 match self.input[self.pos] {
281 b' ' | b'\t' => self.pos += 1,
282 _ => break,
283 }
284 }
285 }
286
287 fn current_byte(&self) -> Option<u8> {
288 self.input.get(self.pos).copied()
289 }
290}
291
292#[derive(Debug)]
294enum LinkType {
295 Reference {
296 label: String,
297 line: usize,
298 column: usize,
299 },
300 Image {
301 label: String,
302 line: usize,
303 column: usize,
304 },
305 Inline, }
307
308struct LinkParser<'a> {
310 input: &'a [u8],
311 pos: usize,
312 line: usize,
313 line_start: usize,
314 in_code_block: bool,
315}
316
317impl<'a> LinkParser<'a> {
318 fn new(input: &'a [u8]) -> Self {
319 Self {
320 input,
321 pos: 0,
322 line: 1,
323 line_start: 0,
324 in_code_block: false,
325 }
326 }
327
328 fn next_link(&mut self) -> Option<LinkType> {
329 while self.pos < self.input.len() {
330 match self.current_byte()? {
331 b'`' => {
332 if self.is_code_fence() {
333 self.toggle_code_block();
334 } else {
335 self.skip_code_span();
336 }
337 }
338 b'[' if !self.in_code_block => {
339 if let Some(link) = self.try_parse_link() {
340 return Some(link);
341 } else {
342 self.pos += 1;
343 }
344 }
345 b'!' if !self.in_code_block => {
346 if self.peek_byte(1) == Some(b'[') {
347 if let Some(image) = self.try_parse_image() {
348 return Some(image);
349 } else {
350 self.pos += 1;
351 }
352 } else {
353 self.pos += 1;
354 }
355 }
356 b'\n' => {
357 self.line += 1;
358 self.line_start = self.pos + 1;
359 self.pos += 1;
360 }
361 _ => self.pos += 1,
362 }
363 }
364 None
365 }
366
367 fn try_parse_link(&mut self) -> Option<LinkType> {
368 let start_pos = self.pos;
369 let start_line = self.line;
370 let start_col = self.pos - self.line_start + 1;
371
372 self.pos += 1;
374
375 let _text = self.parse_link_text()?;
377
378 if self.current_byte() != Some(b']') {
380 self.pos = start_pos + 1; return None;
382 }
383 self.pos += 1;
384
385 match self.current_byte() {
387 Some(b'(') => {
388 self.skip_inline_url();
390 Some(LinkType::Inline)
391 }
392 Some(b'[') => {
393 self.pos += 1;
395 let label = self.parse_reference_label().unwrap_or_default();
396
397 let final_label = if label.is_empty() { _text } else { label };
400
401 Some(LinkType::Reference {
402 label: final_label,
403 line: start_line,
404 column: start_col,
405 })
406 }
407 _ => {
408 if self.is_likely_reference() {
411 Some(LinkType::Reference {
412 label: _text,
413 line: start_line,
414 column: start_col,
415 })
416 } else {
417 None
418 }
419 }
420 }
421 }
422
423 fn try_parse_image(&mut self) -> Option<LinkType> {
424 let start_pos = self.pos;
425 let start_line = self.line;
426 let start_col = self.pos - self.line_start + 1;
427
428 self.pos += 2;
430
431 let _alt_text = self.parse_link_text()?;
433
434 if self.current_byte() != Some(b']') {
436 self.pos = start_pos + 1; return None;
438 }
439 self.pos += 1;
440
441 match self.current_byte() {
443 Some(b'(') => {
444 self.skip_inline_url();
446 Some(LinkType::Inline)
447 }
448 Some(b'[') => {
449 self.pos += 1;
451 let label = self.parse_reference_label().unwrap_or_default();
452
453 let final_label = if label.is_empty() { _alt_text } else { label };
456
457 Some(LinkType::Image {
458 label: final_label,
459 line: start_line,
460 column: start_col,
461 })
462 }
463 _ => {
464 Some(LinkType::Image {
466 label: _alt_text,
467 line: start_line,
468 column: start_col,
469 })
470 }
471 }
472 }
473
474 fn parse_link_text(&mut self) -> Option<String> {
475 let mut text = String::new();
476 let mut bracket_depth = 0;
477
478 while self.pos < self.input.len() {
479 let ch = self.input[self.pos];
480 match ch {
481 b'[' => {
482 bracket_depth += 1;
483 text.push(ch as char);
484 self.pos += 1;
485 }
486 b']' => {
487 if bracket_depth > 0 {
488 bracket_depth -= 1;
489 text.push(ch as char);
490 self.pos += 1;
491 } else {
492 return Some(text);
493 }
494 }
495 b'\\' => {
496 self.pos += 1;
498 if self.pos < self.input.len() {
499 let escaped = self.input[self.pos];
500 text.push('\\');
501 text.push(escaped as char);
502 self.pos += 1;
503 }
504 }
505 b'\n' => return None, _ => {
507 text.push(ch as char);
508 self.pos += 1;
509 }
510 }
511 }
512 None
513 }
514
515 fn parse_reference_label(&mut self) -> Option<String> {
516 let mut label = String::new();
517
518 while self.pos < self.input.len() {
519 let ch = self.input[self.pos];
520 match ch {
521 b']' => {
522 self.pos += 1;
523 return Some(label); }
525 b'\n' => return None, _ => {
527 label.push(ch as char);
528 self.pos += 1;
529 }
530 }
531 }
532 None
533 }
534
535 fn skip_inline_url(&mut self) {
536 if self.pos < self.input.len() && self.input[self.pos] == b'(' {
538 self.pos += 1;
539 }
540
541 let mut paren_depth = 1;
542 while self.pos < self.input.len() {
543 let ch = self.input[self.pos];
544 match ch {
545 b'(' => {
546 paren_depth += 1;
547 self.pos += 1;
548 }
549 b')' => {
550 paren_depth -= 1;
551 self.pos += 1;
552 if paren_depth == 0 {
553 break;
554 }
555 }
556 b'\\' => {
557 self.pos += 1;
559 if self.pos < self.input.len() {
560 self.pos += 1;
561 }
562 }
563 _ => self.pos += 1,
564 }
565 }
566 }
567
568 fn skip_code_span(&mut self) {
569 let start = self.pos;
570 self.pos += 1;
571
572 let mut backticks = 1;
574 while self.pos < self.input.len() && self.input[self.pos] == b'`' {
575 backticks += 1;
576 self.pos += 1;
577 }
578
579 let mut found = 0;
581 while self.pos < self.input.len() {
582 let ch = self.input[self.pos];
583 if ch == b'`' {
584 found += 1;
585 self.pos += 1;
586 if found == backticks {
587 return;
588 }
589 } else {
590 found = 0;
591 self.pos += 1;
592 if ch == b'\n' {
593 self.line += 1;
594 self.line_start = self.pos;
595 }
596 }
597 }
598
599 self.pos = start + 1;
601 }
602
603 fn is_code_fence(&mut self) -> bool {
604 let _start = self.pos;
605
606 let mut line_pos = self.line_start;
608 while line_pos < self.pos {
609 match self.input.get(line_pos) {
610 Some(b' ') | Some(b'\t') => line_pos += 1,
611 _ => return false, }
613 }
614
615 let mut count = 0;
617 let mut pos = self.pos;
618 while pos < self.input.len() && self.input[pos] == b'`' {
619 count += 1;
620 pos += 1;
621 }
622
623 count >= 3
624 }
625
626 fn toggle_code_block(&mut self) {
627 self.in_code_block = !self.in_code_block;
628 while self.pos < self.input.len() {
630 let ch = self.input[self.pos];
631 self.pos += 1;
632 if ch == b'\n' {
633 self.line += 1;
634 self.line_start = self.pos;
635 break;
636 }
637 }
638 }
639
640 fn is_likely_reference(&self) -> bool {
641 if self.pos >= self.input.len() {
643 return true; }
645
646 matches!(
647 self.input[self.pos],
648 b' ' | b'\t' | b'\n' | b'\r' | b'.' | b',' | b';' | b':' | b'!' | b'?'
649 )
650 }
651
652 fn current_byte(&self) -> Option<u8> {
653 self.input.get(self.pos).copied()
654 }
655
656 fn peek_byte(&self, offset: usize) -> Option<u8> {
657 self.input.get(self.pos + offset).copied()
658 }
659}
660
661#[cfg(test)]
662mod tests {
663 use super::*;
664 use crate::test_helpers::{
665 assert_no_violations, assert_single_violation, assert_violation_count,
666 };
667
668 #[test]
669 fn test_valid_references() {
670 let content = r#"[Full reference][label]
671[Collapsed reference][]
672
673[label]: https://example.com
674[collapsed reference]: https://example.com
675"#;
676
677 assert_no_violations(MD052::new(), content);
678 }
679
680 #[test]
681 fn test_undefined_reference() {
682 let content = r#"[Link text][undefined-label]
683
684[defined]: https://example.com
685"#;
686
687 let violation = assert_single_violation(MD052::new(), content);
688 assert_eq!(violation.line, 1);
689 assert!(violation.message.contains("undefined-label"));
690 }
691
692 #[test]
693 fn test_ignored_labels() {
694 let content = r#"[Checkbox][x]
695"#;
696
697 assert_no_violations(MD052::new(), content); }
699
700 #[test]
701 fn test_case_insensitive_matching() {
702 let content = r#"[Link][LABEL]
703
704[label]: https://example.com
705"#;
706
707 assert_no_violations(MD052::new(), content);
708 }
709
710 #[test]
711 fn test_collapsed_reference() {
712 let content = r#"[Label][]
713
714[label]: https://example.com
715"#;
716
717 assert_no_violations(MD052::new(), content);
718 }
719
720 #[test]
721 fn test_multiple_undefined_references() {
722 let content = r#"[Link 1][undefined1]
723[Link 2][undefined2]
724
725[defined]: https://example.com
726"#;
727
728 let violations = assert_violation_count(MD052::new(), content, 2);
729 assert!(violations[0].message.contains("undefined1"));
730 assert!(violations[1].message.contains("undefined2"));
731 }
732
733 #[test]
734 fn test_reference_images() {
735 let content = r#"![Alt text][undefined-image]
736
737[defined]: https://example.com
738"#;
739
740 let violation = assert_single_violation(MD052::new(), content);
741 assert_eq!(violation.line, 1);
742 assert!(violation.message.contains("undefined-image"));
743 }
744
745 #[test]
746 fn test_inline_links_ignored() {
747 let content = r#"[Inline link](https://example.com)
748
749"#;
750
751 assert_no_violations(MD052::new(), content);
752 }
753
754 #[test]
755 fn test_code_spans_ignored() {
756 let content = r#"`[not a link][label]`
757
758[label]: https://example.com
759"#;
760
761 assert_no_violations(MD052::new(), content);
762 }
763
764 #[test]
765 fn test_code_blocks_ignored() {
766 let content = r#"```
767[not a link][undefined]
768```
769
770[defined]: https://example.com
771"#;
772
773 assert_no_violations(MD052::new(), content);
774 }
775
776 #[test]
777 fn test_nested_brackets() {
778 let content = r#"[Link with [nested] text][label]
779
780[label]: https://example.com
781"#;
782
783 assert_no_violations(MD052::new(), content);
784 }
785
786 #[test]
787 fn test_escaped_brackets() {
788 let content = r#"\[Not a link\][label]
789
790[label]: https://example.com
791"#;
792
793 assert_no_violations(MD052::new(), content);
794 }
795
796 #[test]
797 fn test_shortcut_references() {
798 let content = r#"[label] is a shortcut reference.
799
800[label]: https://example.com
801"#;
802
803 assert_no_violations(MD052::new(), content);
804 }
805}