mdbook_lint_core/rules/standard/
md053.rs

1//! MD053 - Link and image reference definitions should be needed
2//!
3//! This rule checks for unused or duplicated reference definitions.
4//! Note: This is a simplified implementation that works with basic patterns.
5//!
6//! ## Correct
7//!
8//! ```markdown
9//! [Link][label]
10//!
11//! [label]: https://example.com
12//! ```
13//!
14//! ## Incorrect
15//!
16//! ```markdown
17//! [Link][label]
18//!
19//! [label]: https://example.com
20//! [unused]: https://example.com
21//! [label]: https://duplicate.com
22//! ```
23
24use crate::error::Result;
25use crate::{
26    Document, Violation,
27    rule::{Rule, RuleCategory, RuleMetadata},
28    violation::Severity,
29};
30use comrak::nodes::AstNode;
31
32use std::collections::{HashMap, HashSet};
33
34/// MD053 - Link and image reference definitions should be needed
35pub struct MD053 {
36    ignored_definitions: Vec<String>,
37}
38
39impl Default for MD053 {
40    fn default() -> Self {
41        Self::new()
42    }
43}
44
45impl MD053 {
46    /// Create a new MD053 rule instance
47    pub fn new() -> Self {
48        Self {
49            ignored_definitions: vec!["//".to_string()], // Default ignores comment syntax
50        }
51    }
52
53    /// Set the list of ignored definitions
54    #[allow(dead_code)]
55    pub fn ignored_definitions(mut self, definitions: Vec<String>) -> Self {
56        self.ignored_definitions = definitions;
57        self
58    }
59
60    /// Parse reference definitions from document content
61    fn collect_definitions(&self, document: &Document) -> Vec<(String, usize, usize)> {
62        let mut definitions = Vec::new();
63
64        for (line_num, line) in document.content.lines().enumerate() {
65            let line_number = line_num + 1;
66
67            // Check if line starts with [label]: (reference definition)
68            if let Some((label, column)) = self.parse_reference_definition(line) {
69                definitions.push((label.to_lowercase(), line_number, column));
70            }
71        }
72
73        definitions
74    }
75
76    /// Parse a reference definition from a line
77    /// Returns (label, column_position) if found
78    fn parse_reference_definition(&self, line: &str) -> Option<(String, usize)> {
79        let mut chars = line.char_indices().peekable();
80        let mut start_pos = 0;
81
82        // Skip leading whitespace
83        while let Some((pos, ch)) = chars.peek() {
84            if ch.is_whitespace() {
85                start_pos = *pos + 1;
86                chars.next();
87            } else {
88                break;
89            }
90        }
91
92        // Must start with [
93        if chars.next()?.1 != '[' {
94            return None;
95        }
96
97        let bracket_start = start_pos;
98        let mut label = String::new();
99        let mut found_closing_bracket = false;
100
101        // Find closing bracket and collect label
102        for (_, ch) in chars.by_ref() {
103            if ch == ']' {
104                found_closing_bracket = true;
105                break;
106            }
107            label.push(ch);
108        }
109
110        if !found_closing_bracket || label.is_empty() {
111            return None;
112        }
113
114        // Next character must be :
115        if chars.next()?.1 != ':' {
116            return None;
117        }
118
119        // Must be followed by whitespace or end of line
120        if let Some((_, ch)) = chars.peek()
121            && !ch.is_whitespace()
122        {
123            return None;
124        }
125
126        Some((label, bracket_start + 1))
127    }
128
129    /// Parse reference usage from document content
130    fn collect_used_labels(&self, document: &Document) -> HashSet<String> {
131        let mut used_labels = HashSet::new();
132
133        for line in document.content.lines() {
134            let mut chars = line.char_indices().peekable();
135            let mut in_backticks = false;
136
137            while let Some((i, ch)) = chars.next() {
138                match ch {
139                    '`' => {
140                        in_backticks = !in_backticks;
141                    }
142                    '[' if !in_backticks => {
143                        // Try to parse reference link
144                        if let Some(label) = self.parse_reference_usage(&line[i..]) {
145                            used_labels.insert(label.to_lowercase());
146
147                            // Skip past the parsed reference
148                            while let Some((_, next_ch)) = chars.peek() {
149                                if *next_ch == ']' {
150                                    chars.next();
151                                    break;
152                                }
153                                chars.next();
154                            }
155                        }
156                    }
157                    _ => {}
158                }
159            }
160        }
161
162        used_labels
163    }
164
165    /// Parse reference usage at the given position
166    /// Returns the reference label if found
167    fn parse_reference_usage(&self, text: &str) -> Option<String> {
168        if !text.starts_with('[') {
169            return None;
170        }
171
172        let mut chars = text.char_indices().skip(1);
173        let mut first_part = String::new();
174        let mut found_first_closing = false;
175
176        // Find first closing bracket
177        for (_, ch) in chars.by_ref() {
178            if ch == ']' {
179                found_first_closing = true;
180                break;
181            }
182            first_part.push(ch);
183        }
184
185        if !found_first_closing {
186            return None;
187        }
188
189        // Check what follows
190        if let Some((_, next_ch)) = chars.next()
191            && next_ch == '['
192        {
193            // Either [text][ref] or [label][]
194            let mut second_part = String::new();
195            let mut found_second_closing = false;
196
197            for (_, ch) in chars {
198                if ch == ']' {
199                    found_second_closing = true;
200                    break;
201                }
202                second_part.push(ch);
203            }
204
205            if found_second_closing {
206                if second_part.is_empty() {
207                    // Collapsed reference [label][]
208                    return Some(first_part);
209                } else {
210                    // Full reference [text][ref]
211                    return Some(second_part);
212                }
213            }
214        }
215
216        None
217    }
218
219    /// Check for unused and duplicate definitions
220    fn check_definitions(
221        &self,
222        definitions: Vec<(String, usize, usize)>,
223        used_labels: &HashSet<String>,
224    ) -> Vec<Violation> {
225        let mut violations = Vec::new();
226        let mut seen_labels: HashMap<String, (usize, usize)> = HashMap::new();
227
228        for (label, line, column) in definitions {
229            // Skip if label is in ignored list
230            if self.ignored_definitions.contains(&label) {
231                continue;
232            }
233
234            // Check for duplicates
235            if let Some((first_line, _first_column)) = seen_labels.get(&label) {
236                violations.push(self.create_violation(
237                    format!(
238                        "Reference definition '{label}' is duplicated (first defined at line {first_line})"
239                    ),
240                    line,
241                    column,
242                    Severity::Warning,
243                ));
244            } else {
245                seen_labels.insert(label.clone(), (line, column));
246
247                // Check if unused
248                if !used_labels.contains(&label) {
249                    violations.push(self.create_violation(
250                        format!("Reference definition '{label}' is unused"),
251                        line,
252                        column,
253                        Severity::Warning,
254                    ));
255                }
256            }
257        }
258
259        violations
260    }
261}
262
263impl Rule for MD053 {
264    fn id(&self) -> &'static str {
265        "MD053"
266    }
267
268    fn name(&self) -> &'static str {
269        "link-image-reference-definitions"
270    }
271
272    fn description(&self) -> &'static str {
273        "Link and image reference definitions should be needed"
274    }
275
276    fn metadata(&self) -> RuleMetadata {
277        RuleMetadata::stable(RuleCategory::Links)
278    }
279
280    fn check_with_ast<'a>(
281        &self,
282        document: &Document,
283        _ast: Option<&'a AstNode<'a>>,
284    ) -> Result<Vec<Violation>> {
285        // This rule works entirely with document content, not AST
286        let definitions = self.collect_definitions(document);
287        let used_labels = self.collect_used_labels(document);
288        let violations = self.check_definitions(definitions, &used_labels);
289
290        Ok(violations)
291    }
292}
293
294#[cfg(test)]
295mod tests {
296    use super::*;
297    use crate::test_helpers::{
298        assert_no_violations, assert_single_violation, assert_violation_count,
299    };
300
301    #[test]
302    fn test_used_definitions() {
303        let content = r#"[Link][label]
304
305[label]: https://example.com
306"#;
307
308        assert_no_violations(MD053::new(), content);
309    }
310
311    #[test]
312    fn test_unused_definition() {
313        let content = r#"[Link][used]
314
315[used]: https://example.com
316[unused]: https://example.com
317"#;
318
319        let violation = assert_single_violation(MD053::new(), content);
320        assert_eq!(violation.line, 4);
321        assert!(violation.message.contains("unused"));
322    }
323
324    #[test]
325    fn test_duplicate_definitions() {
326        let content = r#"[Link][label]
327
328[label]: https://example.com
329[label]: https://duplicate.com
330"#;
331
332        let violation = assert_single_violation(MD053::new(), content);
333        assert_eq!(violation.line, 4);
334        assert!(violation.message.contains("duplicated"));
335        assert!(violation.message.contains("first defined at line 3"));
336    }
337
338    #[test]
339    fn test_ignored_definitions() {
340        let content = r#"[//]: # (This is a comment)
341"#;
342
343        assert_no_violations(MD053::new(), content); // '//' is ignored by default
344    }
345
346    #[test]
347    fn test_case_insensitive_matching() {
348        let content = r#"[Link][LABEL]
349
350[label]: https://example.com
351"#;
352
353        assert_no_violations(MD053::new(), content);
354    }
355
356    #[test]
357    fn test_collapsed_reference() {
358        let content = r#"[Label][]
359
360[label]: https://example.com
361"#;
362
363        assert_no_violations(MD053::new(), content);
364    }
365
366    #[test]
367    fn test_unused_and_duplicate() {
368        let content = r#"[Link][used]
369
370[used]: https://example.com
371[unused]: https://example.com
372[used]: https://duplicate.com
373"#;
374
375        let violations = assert_violation_count(MD053::new(), content, 2);
376
377        // Check for unused definition
378        let unused_violation = violations
379            .iter()
380            .find(|v| v.message.contains("unused"))
381            .unwrap();
382        assert_eq!(unused_violation.line, 4);
383
384        // Check for duplicate definition
385        let duplicate_violation = violations
386            .iter()
387            .find(|v| v.message.contains("duplicated"))
388            .unwrap();
389        assert_eq!(duplicate_violation.line, 5);
390    }
391}