1use once_cell::sync::Lazy;
2use regex::Regex;
3use std::rc::Rc;
4use tree_sitter::Node;
5
6use crate::{
7 linter::{range_from_tree_sitter, RuleViolation},
8 rules::{Context, Rule, RuleLinter, RuleType},
9};
10
11static IMG_TAG_REGEX: Lazy<Regex> = Lazy::new(|| {
13 Regex::new(r"(?si)<(/?)img\b[^>]*>").expect("Invalid img tag regex")
15});
16
17static ALT_ATTRIBUTE_REGEX: Lazy<Regex> = Lazy::new(|| {
18 Regex::new(r#"(?si)\balt\s*=\s*(?:[\"']([^\"']*)['"]|([^\s>]+))"#)
19 .expect("Invalid alt attribute regex")
20});
21
22static ARIA_HIDDEN_REGEX: Lazy<Regex> = Lazy::new(|| {
23 Regex::new(r#"(?si)aria-hidden\s*=\s*(?:[\"']([^\"']*)['"]|([^\s>]+))"#)
24 .expect("Invalid aria-hidden regex")
25});
26
27static MARKDOWN_IMAGE_REGEX: Lazy<Regex> =
29 Lazy::new(|| Regex::new(r"!\[([^\]]*)\]\([^)]+\)").expect("Invalid markdown image regex"));
30
31static MARKDOWN_REFERENCE_IMAGE_REGEX: Lazy<Regex> = Lazy::new(|| {
32 Regex::new(r"!\[([^\]]*)\]\[([^\]]*)\]").expect("Invalid markdown reference image regex")
33});
34
35static MARKDOWN_REFERENCE_IMAGE_SHORTCUT_REGEX: Lazy<Regex> = Lazy::new(|| {
36 Regex::new(r"!\[([^\]]*)\]\[]").expect("Invalid markdown reference image shortcut regex")
37});
38
39pub(crate) struct MD045Linter {
40 context: Rc<Context>,
41 violations: Vec<RuleViolation>,
42 line_starts: Vec<usize>,
43}
44
45impl MD045Linter {
46 pub fn new(context: Rc<Context>) -> Self {
47 let line_starts: Vec<usize> = std::iter::once(0)
49 .chain(
50 context
51 .document_content
52 .borrow()
53 .match_indices('\n')
54 .map(|(i, _)| i + 1),
55 )
56 .collect();
57
58 Self {
59 context,
60 violations: Vec::new(),
61 line_starts,
62 }
63 }
64
65 fn is_in_code_context(&self, node: &Node) -> bool {
66 let mut current = node.parent();
68 while let Some(parent) = current {
69 match parent.kind() {
70 "code_span" | "fenced_code_block" | "indented_code_block" => {
71 return true;
72 }
73 _ => {
74 current = parent.parent();
75 }
76 }
77 }
78 false
79 }
80
81 fn contains_inline_code_with_images(&self, content: &str) -> bool {
82 static CODE_SPAN_WITH_IMG_REGEX: Lazy<Regex> = Lazy::new(|| {
84 Regex::new(r"^`[^`]*(?:<img|!\[)[^`]*`\s*(?:and\s*`[^`]*(?:<img|!\[)[^`]*`\s*)*$")
85 .expect("Invalid code span with image regex")
86 });
87 CODE_SPAN_WITH_IMG_REGEX.is_match(content.trim())
88 }
89
90 fn find_markdown_image_violations(&self, content: &str) -> Vec<(usize, usize)> {
91 let mut ranges = Vec::new();
92
93 for captures in MARKDOWN_IMAGE_REGEX.captures_iter(content) {
95 if let (Some(alt_text), Some(full_match)) = (captures.get(1), captures.get(0)) {
96 if alt_text.as_str().is_empty() {
97 ranges.push((full_match.start(), full_match.end()));
98 }
99 }
100 }
101
102 for captures in MARKDOWN_REFERENCE_IMAGE_REGEX.captures_iter(content) {
104 if let (Some(alt_text), Some(full_match)) = (captures.get(1), captures.get(0)) {
105 if alt_text.as_str().is_empty() {
106 ranges.push((full_match.start(), full_match.end()));
107 }
108 }
109 }
110
111 for captures in MARKDOWN_REFERENCE_IMAGE_SHORTCUT_REGEX.captures_iter(content) {
113 if let (Some(alt_text), Some(full_match)) = (captures.get(1), captures.get(0)) {
114 if alt_text.as_str().is_empty() {
115 ranges.push((full_match.start(), full_match.end()));
116 }
117 }
118 }
119
120 ranges
121 }
122
123 fn find_html_image_violations(&self, content: &str) -> Vec<(usize, usize)> {
124 let mut ranges = Vec::new();
125 for img_match in IMG_TAG_REGEX.find_iter(content) {
126 let img_tag = img_match.as_str();
127
128 if img_tag.starts_with("</") {
130 continue;
131 }
132
133 if let Some(aria_cap) = ARIA_HIDDEN_REGEX.captures(img_tag) {
135 let value = aria_cap.get(1).or(aria_cap.get(2));
136 if let Some(value_match) = value {
137 if value_match.as_str().to_lowercase() == "true" {
138 continue; }
140 }
141 }
142
143 let has_valid_alt = ALT_ATTRIBUTE_REGEX.captures(img_tag).is_some();
145
146 if !has_valid_alt {
147 ranges.push((img_match.start(), img_match.end()));
148 }
149 }
150 ranges
151 }
152
153 fn add_violation(&mut self, node: &Node, start_offset: usize, end_offset: usize) {
154 let start_byte = node.start_byte() + start_offset;
155 let end_byte = node.start_byte() + end_offset;
156
157 let (start_line, start_col) = self.byte_to_line_col(start_byte);
158 let (end_line, end_col) = self.byte_to_line_col(end_byte);
159
160 let range = range_from_tree_sitter(&tree_sitter::Range {
161 start_byte,
162 end_byte,
163 start_point: tree_sitter::Point {
164 row: start_line,
165 column: start_col,
166 },
167 end_point: tree_sitter::Point {
168 row: end_line,
169 column: end_col,
170 },
171 });
172
173 let violation = RuleViolation::new(
174 &MD045,
175 MD045.description.to_string(),
176 self.context.file_path.clone(),
177 range,
178 );
179 self.violations.push(violation);
180 }
181
182 fn byte_to_line_col(&self, byte_pos: usize) -> (usize, usize) {
183 let line = match self.line_starts.binary_search(&byte_pos) {
184 Ok(line) => line,
185 Err(line) => line - 1,
186 };
187 let line_start = self.line_starts[line];
188 let col = byte_pos - line_start;
189 (line, col)
190 }
191}
192
193pub const MD045: Rule = Rule {
194 id: "MD045",
195 alias: "no-alt-text",
196 tags: &["accessibility", "images"],
197 description: "Images should have alternate text (alt text)",
198 rule_type: RuleType::Token,
199 required_nodes: &["inline", "html_block"],
200 new_linter: |context| Box::new(MD045Linter::new(context)),
201};
202
203impl RuleLinter for MD045Linter {
204 fn feed(&mut self, node: &Node) {
205 match node.kind() {
206 "inline" | "html_block" => {
207 if self.is_in_code_context(node) {
208 return;
209 }
210
211 let (markdown_ranges, html_ranges) = {
212 let document_content = self.context.document_content.borrow();
213 let content = &document_content[node.start_byte()..node.end_byte()];
214
215 if self.contains_inline_code_with_images(content) {
216 (vec![], vec![])
217 } else if node.kind() == "inline" {
218 (
219 self.find_markdown_image_violations(content),
220 self.find_html_image_violations(content),
221 )
222 } else {
223 (vec![], self.find_html_image_violations(content))
225 }
226 };
227
228 for (start, end) in markdown_ranges {
229 self.add_violation(node, start, end);
230 }
231
232 for (start, end) in html_ranges {
233 self.add_violation(node, start, end);
234 }
235 }
236 _ => {}
237 }
238 }
239
240 fn finalize(&mut self) -> Vec<RuleViolation> {
241 std::mem::take(&mut self.violations)
242 }
243}
244
245#[cfg(test)]
246mod test {
247 use std::path::PathBuf;
248
249 use crate::config::RuleSeverity;
250 use crate::linter::MultiRuleLinter;
251 use crate::test_utils::test_helpers::test_config_with_rules;
252
253 fn test_config() -> crate::config::QuickmarkConfig {
254 test_config_with_rules(vec![
255 ("no-alt-text", RuleSeverity::Error),
256 ("no-inline-html", RuleSeverity::Off),
257 ])
258 }
259
260 #[test]
261 fn test_markdown_images_with_alt_text_no_violations() {
262 let input = "# Test\n\n\n\n\n\n![Reference image with alt][ref]\n\nReference image with alt text ![Alt text reference][ref2]\n\n[ref]: image.jpg\n[ref2]: image.jpg \"Title\"\n";
263
264 let config = test_config();
265 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
266 let violations = linter.analyze();
267 let md045_violations: Vec<_> = violations
268 .iter()
269 .filter(|v| v.rule().id == "MD045")
270 .collect();
271 assert_eq!(md045_violations.len(), 0);
272 }
273
274 #[test]
275 fn test_markdown_images_without_alt_text_violations() {
276 let input = "# Test\n\n\n\n\n\n and  in text\n\nReference image without alt ![][ref]\n\n[ref]: image.jpg\n";
277
278 let config = test_config();
279 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
280 let violations = linter.analyze();
281 let md045_violations: Vec<_> = violations
282 .iter()
283 .filter(|v| v.rule().id == "MD045")
284 .collect();
285
286 assert_eq!(md045_violations.len(), 4);
292 }
293
294 #[test]
295 fn test_html_images_with_alt_attribute_no_violations() {
296 let input = "# Test\n\n<img src=\"image.jpg\" alt=\"Valid alt text\" />\n\n<img src=\"image.jpg\" alt=\"Another valid\" >\n\n<IMG SRC=\"image.jpg\" ALT=\"Case insensitive\" />\n\n<img \n src=\"image.jpg\" \n alt=\"Multi-line\" \n />\n\n<img src=\"image.jpg\" alt=\"\" />\n\n<img src=\"image.jpg\" alt='' />\n";
297
298 let config = test_config();
299 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
300 let violations = linter.analyze();
301 let md045_violations: Vec<_> = violations
302 .iter()
303 .filter(|v| v.rule().id == "MD045")
304 .collect();
305 assert_eq!(md045_violations.len(), 0);
306 }
307
308 #[test]
309 fn test_html_images_without_alt_attribute_violations() {
310 let input = "# Test\n\n<img src=\"image.jpg\" />\n\n<img src=\"image.jpg\" alt>\n\n<IMG SRC=\"image.jpg\" />\n\n<img \n src=\"image.jpg\" \n title=\"Title only\" />\n\n<p><img src=\"nested.jpg\"></p>\n";
311
312 let config = test_config();
313 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
314 let violations = linter.analyze();
315 let md045_violations: Vec<_> = violations
316 .iter()
317 .filter(|v| v.rule().id == "MD045")
318 .collect();
319
320 assert_eq!(md045_violations.len(), 5);
327 }
328
329 #[test]
330 fn test_html_images_with_aria_hidden_no_violations() {
331 let input = "# Test\n\n<img src=\"image.jpg\" aria-hidden=\"true\" />\n\n<img src=\"image.jpg\" ARIA-HIDDEN=\"TRUE\" />\n\n<img \n src=\"image.jpg\" \n aria-hidden=\"true\"
332 />\n\n<img src=\"image.jpg\" aria-hidden='true' />\n";
333
334 let config = test_config();
335 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
336 let violations = linter.analyze();
337 let md045_violations: Vec<_> = violations
338 .iter()
339 .filter(|v| v.rule().id == "MD045")
340 .collect();
341 assert_eq!(md045_violations.len(), 0);
342 }
343
344 #[test]
345 fn test_html_images_with_aria_hidden_false_violations() {
346 let input = "# Test\n\n<img src=\"image.jpg\" aria-hidden=\"false\" />\n\n<img src=\"image.jpg\" aria-hidden=\"\" />\n\n<img src=\"image.jpg\" aria-hidden=\"other\" />\n";
347
348 let config = test_config();
349 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
350 let violations = linter.analyze();
351 let md045_violations: Vec<_> = violations
352 .iter()
353 .filter(|v| v.rule().id == "MD045")
354 .collect();
355
356 assert_eq!(md045_violations.len(), 3);
358 }
359
360 #[test]
361 fn test_mixed_image_types() {
362 let input = "# Test\n\n\n\n\n\n<img src=\"valid.jpg\" alt=\"Valid\" />\n\n<img src=\"no-alt.jpg\" />\n\n<img src=\"hidden.jpg\" aria-hidden=\"true\" />\n\n![Reference valid][ref1]\n\n![][ref2]\n\n[ref1]: image.jpg\n[ref2]: image.jpg\n";
363
364 let config = test_config();
365 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
366 let violations = linter.analyze();
367 let md045_violations: Vec<_> = violations
368 .iter()
369 .filter(|v| v.rule().id == "MD045")
370 .collect();
371
372 assert_eq!(md045_violations.len(), 3);
377 }
378
379 #[test]
380 fn test_multiline_markdown_images() {
381 let input = "# Test\n\n\n\n\n";
384
385 let config = test_config();
386 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
387 let violations = linter.analyze();
388 let md045_violations: Vec<_> = violations
389 .iter()
390 .filter(|v| v.rule().id == "MD045")
391 .collect();
392
393 assert_eq!(md045_violations.len(), 1);
395 }
396
397 #[test]
398 fn test_images_in_links() {
399 let input = "# Test\n\n[](link.html)\n\n[](link.html)\n\n[<img src=\"alt.jpg\" alt=\"Alt\" />](link.html)\n\n[<img src=\"no-alt.jpg\" />](link.html)\n";
400
401 let config = test_config();
402 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
403 let violations = linter.analyze();
404 let md045_violations: Vec<_> = violations
405 .iter()
406 .filter(|v| v.rule().id == "MD045")
407 .collect();
408
409 assert_eq!(md045_violations.len(), 2);
413 }
414
415 #[test]
416 fn test_no_false_positives_in_code_blocks() {
417 let input = "# Test\n\n```html\n\n<img src=\"image.jpg\" />\n```\n\n \n <img src=\"indented.jpg\" />\n\n`` and `<img src=\"inline.jpg\" />`\n\nRegular text with  should trigger.\n";
418
419 let config = test_config();
420 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
421 let violations = linter.analyze();
422 let md045_violations: Vec<_> = violations
423 .iter()
424 .filter(|v| v.rule().id == "MD045")
425 .collect();
426
427 assert_eq!(md045_violations.len(), 1);
429 }
430}