1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
use crate::rule::{LintError, LintResult, LintWarning, Rule};
use regex::Regex;
use std::collections::{HashSet, HashMap};
use lazy_static::lazy_static;
lazy_static! {
// Pattern to match reference definitions [ref]: url
static ref REF_REGEX: Regex = Regex::new(r"^\s*\[([^\]]+)\]:\s*\S+").unwrap();
// Pattern to match reference links and images ONLY: [text][reference] or ![text][reference]
static ref REF_LINK_REGEX: Regex = Regex::new(r"\[([^\]]+)\]\[([^\]]*)\]").unwrap();
static ref REF_IMAGE_REGEX: Regex = Regex::new(r"!\[([^\]]+)\]\[([^\]]*)\]").unwrap();
// Pattern for shortcut reference links [reference]
static ref SHORTCUT_REF_REGEX: Regex = Regex::new(r"\[([^\]]+)\]").unwrap();
// Pattern to match inline links and images (to exclude them)
static ref INLINE_LINK_REGEX: Regex = Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap();
static ref INLINE_IMAGE_REGEX: Regex = Regex::new(r"!\[([^\]]+)\]\(([^)]+)\)").unwrap();
// Pattern for reference definitions (same as REF_REGEX)
static ref REF_DEF_PATTERN: Regex = Regex::new(r"^\s*\[([^\]]+)\]:\s*\S+").unwrap();
// Patterns for code blocks
static ref FENCED_CODE_START: Regex = Regex::new(r"^(`{3,}|~{3,})").unwrap();
}
/// Rule MD052: Reference links and images should use a reference that exists
///
/// This rule is triggered when a reference link or image uses a reference that isn't defined.
pub struct MD052ReferenceLinkImages;
impl MD052ReferenceLinkImages {
pub fn new() -> Self {
Self
}
fn extract_references(&self, content: &str) -> HashSet<String> {
let mut references = HashSet::new();
for line in content.lines() {
if let Some(cap) = REF_REGEX.captures(line) {
// Store references in lowercase for case-insensitive comparison
references.insert(cap[1].to_lowercase());
}
}
references
}
fn find_undefined_references<'a>(&self, content: &'a str, references: &HashSet<String>) -> Vec<(usize, usize, &'a str)> {
let mut undefined = Vec::new();
// Use a HashMap to track references already reported to avoid duplicates
let mut reported_refs = HashMap::new();
// Track if we're inside a code block
let mut in_code_block = false;
let mut code_fence_marker: Option<&str> = None;
for (line_num, line) in content.lines().enumerate() {
// Skip reference definitions to avoid false positives
if REF_DEF_PATTERN.is_match(line) {
continue;
}
// Handle code blocks
if FENCED_CODE_START.is_match(line) && !in_code_block {
if let Some(marker) = FENCED_CODE_START.captures(line).and_then(|c| c.get(1)).map(|m| m.as_str()) {
// We've entered a code block
in_code_block = true;
code_fence_marker = Some(marker);
continue;
}
}
// Check if we're exiting a code block
if in_code_block {
if let Some(marker) = code_fence_marker {
if line.starts_with(marker) {
in_code_block = false;
code_fence_marker = None;
}
// Skip this line and all lines in the code block
continue;
}
}
// Skip checking lines in code blocks
if in_code_block {
continue;
}
// First, identify all inline links/images with their positions
let mut inline_elements = Vec::new();
// Capture all inline link positions
for cap in INLINE_LINK_REGEX.captures_iter(line) {
if let Some(m) = cap.get(0) {
inline_elements.push((m.start(), m.end()));
// Also check if there are any references inside this inline link
let link_text = cap.get(1).map_or("", |m| m.as_str());
let inside_refs = self.extract_references_inside_text(link_text);
for &(ref_start, ref_end) in &inside_refs {
// Adjust the positions to be relative to the line
inline_elements.push((m.start() + 1 + ref_start, m.start() + 1 + ref_end));
}
}
}
// Capture all inline image positions
for cap in INLINE_IMAGE_REGEX.captures_iter(line) {
if let Some(m) = cap.get(0) {
inline_elements.push((m.start(), m.end()));
// Also check if there are any references inside this inline image
let img_text = cap.get(1).map_or("", |m| m.as_str());
let inside_refs = self.extract_references_inside_text(img_text);
for &(ref_start, ref_end) in &inside_refs {
// Adjust the positions to be relative to the line
inline_elements.push((m.start() + 2 + ref_start, m.start() + 2 + ref_end));
}
}
}
// Then check for reference links
for cap in REF_LINK_REGEX.captures_iter(line) {
let full_match = cap.get(0).unwrap();
let match_start = full_match.start();
let match_end = full_match.end();
// Skip if this match overlaps with an inline element
if is_position_overlapping(match_start, match_end, &inline_elements) {
continue;
}
// Extract the reference
if let Some(ref_match) = cap.get(2) {
let ref_text = ref_match.as_str();
if ref_text.is_empty() {
// [text][] format - use text as reference
if let Some(text_match) = cap.get(1) {
let text = text_match.as_str();
let lowercase_text = text.to_lowercase();
if !references.contains(&lowercase_text) {
// Check if we've already reported this reference on this line
let key = format!("{}:{}", line_num + 1, lowercase_text);
if !reported_refs.contains_key(&key) {
undefined.push((line_num + 1, match_start, text));
reported_refs.insert(key, true);
}
}
}
} else {
// [text][reference] format
let lowercase_ref = ref_text.to_lowercase();
if !references.contains(&lowercase_ref) {
// Check if we've already reported this reference on this line
let key = format!("{}:{}", line_num + 1, lowercase_ref);
if !reported_refs.contains_key(&key) {
undefined.push((line_num + 1, match_start, ref_text));
reported_refs.insert(key, true);
}
}
}
}
}
// Check for shortcut reference links [reference]
for cap in SHORTCUT_REF_REGEX.captures_iter(line) {
let full_match = cap.get(0).unwrap();
let match_start = full_match.start();
let match_end = full_match.end();
// Skip if this match overlaps with an inline element
if is_position_overlapping(match_start, match_end, &inline_elements) {
continue;
}
// Check if this is followed by a [ or ( which would make it part of a reference or inline link
if match_end < line.len() {
let next_char = line[match_end..].chars().next();
if let Some(c) = next_char {
if c == '[' || c == '(' {
continue;
}
}
}
// Check if this is preceded by a ! which would make it an image
if match_start > 0 {
let prev_char = line[..match_start].chars().last();
if let Some(c) = prev_char {
if c == '!' {
continue;
}
}
}
// Extract the reference
if let Some(ref_match) = cap.get(1) {
let ref_text = ref_match.as_str();
let lowercase_ref = ref_text.to_lowercase();
if !references.contains(&lowercase_ref) {
// Check if we've already reported this reference on this line
let key = format!("{}:{}", line_num + 1, lowercase_ref);
if !reported_refs.contains_key(&key) {
undefined.push((line_num + 1, match_start, ref_text));
reported_refs.insert(key, true);
}
}
}
}
// Finally check for reference images
for cap in REF_IMAGE_REGEX.captures_iter(line) {
let full_match = cap.get(0).unwrap();
let match_start = full_match.start();
let match_end = full_match.end();
// Skip if this match overlaps with an inline element
if is_position_overlapping(match_start, match_end, &inline_elements) {
continue;
}
// Extract the reference
if let Some(ref_match) = cap.get(2) {
let ref_text = ref_match.as_str();
if ref_text.is_empty() {
// ![text][] format - use text as reference
if let Some(text_match) = cap.get(1) {
let text = text_match.as_str();
let lowercase_text = text.to_lowercase();
if !references.contains(&lowercase_text) {
// Check if we've already reported this reference on this line
let key = format!("{}:{}", line_num + 1, lowercase_text);
if !reported_refs.contains_key(&key) {
undefined.push((line_num + 1, match_start, text));
reported_refs.insert(key, true);
}
}
}
} else {
// ![text][reference] format
let lowercase_ref = ref_text.to_lowercase();
if !references.contains(&lowercase_ref) {
// Check if we've already reported this reference on this line
let key = format!("{}:{}", line_num + 1, lowercase_ref);
if !reported_refs.contains_key(&key) {
undefined.push((line_num + 1, match_start, ref_text));
reported_refs.insert(key, true);
}
}
}
}
}
}
undefined
}
// Helper method to extract reference positions inside text (for nested references)
fn extract_references_inside_text(&self, text: &str) -> Vec<(usize, usize)> {
let mut positions = Vec::new();
// Find reference links inside the text
for cap in REF_LINK_REGEX.captures_iter(text) {
if let Some(m) = cap.get(0) {
positions.push((m.start(), m.end()));
}
}
// Find reference images inside the text
for cap in REF_IMAGE_REGEX.captures_iter(text) {
if let Some(m) = cap.get(0) {
positions.push((m.start(), m.end()));
}
}
positions
}
}
// Helper function to check if a position overlaps with any of the excluded positions
fn is_position_overlapping(start: usize, end: usize, excluded_positions: &[(usize, usize)]) -> bool {
for &(excl_start, excl_end) in excluded_positions {
// Check for any overlap between positions
if start <= excl_end && end >= excl_start {
return true;
}
}
false
}
impl Rule for MD052ReferenceLinkImages {
fn name(&self) -> &'static str {
"MD052"
}
fn description(&self) -> &'static str {
"Reference links and images should use a reference that exists"
}
fn check(&self, content: &str) -> LintResult {
let mut warnings = Vec::new();
let references = self.extract_references(content);
let undefined = self.find_undefined_references(content, &references);
for (line_num, column, ref_text) in undefined {
warnings.push(LintWarning {
line: line_num,
column: column + 1,
message: format!("Reference '{}' not found", ref_text),
fix: None, // Cannot automatically fix undefined references
});
}
Ok(warnings)
}
fn fix(&self, content: &str) -> Result<String, LintError> {
// Cannot automatically fix undefined references as we don't know the intended URLs
Ok(content.to_string())
}
}