quickmark_core/rules/
md045.rs

1use once_cell::sync::Lazy;
2use regex::Regex;
3use std::rc::Rc;
4use tree_sitter::Node;
5
6use crate::{
7    linter::{range_from_tree_sitter, RuleViolation},
8    rules::{Context, Rule, RuleLinter, RuleType},
9};
10
11// Pre-compiled regex patterns for image parsing
12static IMG_TAG_REGEX: Lazy<Regex> = Lazy::new(|| {
13    // Use DOTALL flag to match across newlines and case-insensitive flag
14    Regex::new(r"(?si)<(/?)img\b[^>]*>").expect("Invalid img tag regex")
15});
16
17static ALT_ATTRIBUTE_REGEX: Lazy<Regex> = Lazy::new(|| {
18    Regex::new(r#"(?si)\balt\s*=\s*(?:[\"']([^\"']*)['"]|([^\s>]+))"#)
19        .expect("Invalid alt attribute regex")
20});
21
22static ARIA_HIDDEN_REGEX: Lazy<Regex> = Lazy::new(|| {
23    Regex::new(r#"(?si)aria-hidden\s*=\s*(?:[\"']([^\"']*)['"]|([^\s>]+))"#)
24        .expect("Invalid aria-hidden regex")
25});
26
27// Regex patterns for Markdown images
28static MARKDOWN_IMAGE_REGEX: Lazy<Regex> =
29    Lazy::new(|| Regex::new(r"!\[([^\]]*)\]\([^)]+\)").expect("Invalid markdown image regex"));
30
31static MARKDOWN_REFERENCE_IMAGE_REGEX: Lazy<Regex> = Lazy::new(|| {
32    Regex::new(r"!\[([^\]]*)\]\[([^\]]*)\]").expect("Invalid markdown reference image regex")
33});
34
35static MARKDOWN_REFERENCE_IMAGE_SHORTCUT_REGEX: Lazy<Regex> = Lazy::new(|| {
36    Regex::new(r"!\[([^\]]*)\]\[]").expect("Invalid markdown reference image shortcut regex")
37});
38
39pub(crate) struct MD045Linter {
40    context: Rc<Context>,
41    violations: Vec<RuleViolation>,
42    line_starts: Vec<usize>,
43}
44
45impl MD045Linter {
46    pub fn new(context: Rc<Context>) -> Self {
47        // Pre-calculate line starts for efficient line/col lookup
48        let line_starts: Vec<usize> = std::iter::once(0)
49            .chain(
50                context
51                    .document_content
52                    .borrow()
53                    .match_indices('\n')
54                    .map(|(i, _)| i + 1),
55            )
56            .collect();
57
58        Self {
59            context,
60            violations: Vec::new(),
61            line_starts,
62        }
63    }
64
65    fn is_in_code_context(&self, node: &Node) -> bool {
66        // Check if this node is inside a code span or code block
67        let mut current = node.parent();
68        while let Some(parent) = current {
69            match parent.kind() {
70                "code_span" | "fenced_code_block" | "indented_code_block" => {
71                    return true;
72                }
73                _ => {
74                    current = parent.parent();
75                }
76            }
77        }
78        false
79    }
80
81    fn contains_inline_code_with_images(&self, content: &str) -> bool {
82        // Check if the entire content is a single inline code span containing images
83        static CODE_SPAN_WITH_IMG_REGEX: Lazy<Regex> = Lazy::new(|| {
84            Regex::new(r"^`[^`]*(?:<img|!\[)[^`]*`\s*(?:and\s*`[^`]*(?:<img|!\[)[^`]*`\s*)*$")
85                .expect("Invalid code span with image regex")
86        });
87        CODE_SPAN_WITH_IMG_REGEX.is_match(content.trim())
88    }
89
90    fn find_markdown_image_violations(&self, content: &str) -> Vec<(usize, usize)> {
91        let mut ranges = Vec::new();
92
93        // Check inline images: ![alt](url)
94        for captures in MARKDOWN_IMAGE_REGEX.captures_iter(content) {
95            if let (Some(alt_text), Some(full_match)) = (captures.get(1), captures.get(0)) {
96                if alt_text.as_str().is_empty() {
97                    ranges.push((full_match.start(), full_match.end()));
98                }
99            }
100        }
101
102        // Check reference images: ![alt][ref]
103        for captures in MARKDOWN_REFERENCE_IMAGE_REGEX.captures_iter(content) {
104            if let (Some(alt_text), Some(full_match)) = (captures.get(1), captures.get(0)) {
105                if alt_text.as_str().is_empty() {
106                    ranges.push((full_match.start(), full_match.end()));
107                }
108            }
109        }
110
111        // Check shortcut reference images: ![alt][]
112        for captures in MARKDOWN_REFERENCE_IMAGE_SHORTCUT_REGEX.captures_iter(content) {
113            if let (Some(alt_text), Some(full_match)) = (captures.get(1), captures.get(0)) {
114                if alt_text.as_str().is_empty() {
115                    ranges.push((full_match.start(), full_match.end()));
116                }
117            }
118        }
119
120        ranges
121    }
122
123    fn find_html_image_violations(&self, content: &str) -> Vec<(usize, usize)> {
124        let mut ranges = Vec::new();
125        for img_match in IMG_TAG_REGEX.find_iter(content) {
126            let img_tag = img_match.as_str();
127
128            // Skip closing tags
129            if img_tag.starts_with("</") {
130                continue;
131            }
132
133            // Check for aria-hidden="true" first
134            if let Some(aria_cap) = ARIA_HIDDEN_REGEX.captures(img_tag) {
135                let value = aria_cap.get(1).or(aria_cap.get(2));
136                if let Some(value_match) = value {
137                    if value_match.as_str().to_lowercase() == "true" {
138                        continue; // Skip images with aria-hidden="true"
139                    }
140                }
141            }
142
143            // Check for alt attribute with value
144            let has_valid_alt = ALT_ATTRIBUTE_REGEX.captures(img_tag).is_some();
145
146            if !has_valid_alt {
147                ranges.push((img_match.start(), img_match.end()));
148            }
149        }
150        ranges
151    }
152
153    fn add_violation(&mut self, node: &Node, start_offset: usize, end_offset: usize) {
154        let start_byte = node.start_byte() + start_offset;
155        let end_byte = node.start_byte() + end_offset;
156
157        let (start_line, start_col) = self.byte_to_line_col(start_byte);
158        let (end_line, end_col) = self.byte_to_line_col(end_byte);
159
160        let range = range_from_tree_sitter(&tree_sitter::Range {
161            start_byte,
162            end_byte,
163            start_point: tree_sitter::Point {
164                row: start_line,
165                column: start_col,
166            },
167            end_point: tree_sitter::Point {
168                row: end_line,
169                column: end_col,
170            },
171        });
172
173        let violation = RuleViolation::new(
174            &MD045,
175            MD045.description.to_string(),
176            self.context.file_path.clone(),
177            range,
178        );
179        self.violations.push(violation);
180    }
181
182    fn byte_to_line_col(&self, byte_pos: usize) -> (usize, usize) {
183        let line = match self.line_starts.binary_search(&byte_pos) {
184            Ok(line) => line,
185            Err(line) => line - 1,
186        };
187        let line_start = self.line_starts[line];
188        let col = byte_pos - line_start;
189        (line, col)
190    }
191}
192
193pub const MD045: Rule = Rule {
194    id: "MD045",
195    alias: "no-alt-text",
196    tags: &["accessibility", "images"],
197    description: "Images should have alternate text (alt text)",
198    rule_type: RuleType::Token,
199    required_nodes: &["inline", "html_block"],
200    new_linter: |context| Box::new(MD045Linter::new(context)),
201};
202
203impl RuleLinter for MD045Linter {
204    fn feed(&mut self, node: &Node) {
205        match node.kind() {
206            "inline" | "html_block" => {
207                if self.is_in_code_context(node) {
208                    return;
209                }
210
211                let (markdown_ranges, html_ranges) = {
212                    let document_content = self.context.document_content.borrow();
213                    let content = &document_content[node.start_byte()..node.end_byte()];
214
215                    if self.contains_inline_code_with_images(content) {
216                        (vec![], vec![])
217                    } else if node.kind() == "inline" {
218                        (
219                            self.find_markdown_image_violations(content),
220                            self.find_html_image_violations(content),
221                        )
222                    } else {
223                        // html_block
224                        (vec![], self.find_html_image_violations(content))
225                    }
226                };
227
228                for (start, end) in markdown_ranges {
229                    self.add_violation(node, start, end);
230                }
231
232                for (start, end) in html_ranges {
233                    self.add_violation(node, start, end);
234                }
235            }
236            _ => {}
237        }
238    }
239
240    fn finalize(&mut self) -> Vec<RuleViolation> {
241        std::mem::take(&mut self.violations)
242    }
243}
244
245#[cfg(test)]
246mod test {
247    use std::path::PathBuf;
248
249    use crate::config::RuleSeverity;
250    use crate::linter::MultiRuleLinter;
251    use crate::test_utils::test_helpers::test_config_with_rules;
252
253    fn test_config() -> crate::config::QuickmarkConfig {
254        test_config_with_rules(vec![
255            ("no-alt-text", RuleSeverity::Error),
256            ("no-inline-html", RuleSeverity::Off),
257        ])
258    }
259
260    #[test]
261    fn test_markdown_images_with_alt_text_no_violations() {
262        let input = "# Test\n\n![Valid alt text](image.jpg)\n\n![Another valid image](image.jpg \"Title\")\n\n![Reference image with alt][ref]\n\nReference image with alt text ![Alt text reference][ref2]\n\n[ref]: image.jpg\n[ref2]: image.jpg \"Title\"\n";
263
264        let config = test_config();
265        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
266        let violations = linter.analyze();
267        let md045_violations: Vec<_> = violations
268            .iter()
269            .filter(|v| v.rule().id == "MD045")
270            .collect();
271        assert_eq!(md045_violations.len(), 0);
272    }
273
274    #[test]
275    fn test_markdown_images_without_alt_text_violations() {
276        let input = "# Test\n\n![](image.jpg)\n\n![](image.jpg \"Title\")\n\n![Empty alt](image.jpg) and ![](inline-image.jpg) in text\n\nReference image without alt ![][ref]\n\n[ref]: image.jpg\n";
277
278        let config = test_config();
279        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
280        let violations = linter.analyze();
281        let md045_violations: Vec<_> = violations
282            .iter()
283            .filter(|v| v.rule().id == "MD045")
284            .collect();
285
286        // Should find 4 violations:
287        // Line 2: ![](image.jpg)
288        // Line 4: ![](image.jpg "Title")
289        // Line 6: ![](inline-image.jpg)
290        // Line 8: ![][ref]
291        assert_eq!(md045_violations.len(), 4);
292    }
293
294    #[test]
295    fn test_html_images_with_alt_attribute_no_violations() {
296        let input = "# Test\n\n<img src=\"image.jpg\" alt=\"Valid alt text\" />\n\n<img src=\"image.jpg\" alt=\"Another valid\" >\n\n<IMG SRC=\"image.jpg\" ALT=\"Case insensitive\" />\n\n<img \n  src=\"image.jpg\" \n  alt=\"Multi-line\" \n  />\n\n<img src=\"image.jpg\" alt=\"\" />\n\n<img src=\"image.jpg\" alt='' />\n";
297
298        let config = test_config();
299        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
300        let violations = linter.analyze();
301        let md045_violations: Vec<_> = violations
302            .iter()
303            .filter(|v| v.rule().id == "MD045")
304            .collect();
305        assert_eq!(md045_violations.len(), 0);
306    }
307
308    #[test]
309    fn test_html_images_without_alt_attribute_violations() {
310        let input = "# Test\n\n<img src=\"image.jpg\" />\n\n<img src=\"image.jpg\" alt>\n\n<IMG SRC=\"image.jpg\" />\n\n<img \n  src=\"image.jpg\" \n  title=\"Title only\" />\n\n<p><img src=\"nested.jpg\"></p>\n";
311
312        let config = test_config();
313        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
314        let violations = linter.analyze();
315        let md045_violations: Vec<_> = violations
316            .iter()
317            .filter(|v| v.rule().id == "MD045")
318            .collect();
319
320        // Should find 4 violations:
321        // Line 2: <img src="image.jpg" />
322        // Line 4: <img src="image.jpg" alt>
323        // Line 6: <IMG SRC="image.jpg" />
324        // Line 8-10: Multi-line img tag
325        // Line 12: nested img tag
326        assert_eq!(md045_violations.len(), 5);
327    }
328
329    #[test]
330    fn test_html_images_with_aria_hidden_no_violations() {
331        let input = "# Test\n\n<img src=\"image.jpg\" aria-hidden=\"true\" />\n\n<img src=\"image.jpg\" ARIA-HIDDEN=\"TRUE\" />\n\n<img \n  src=\"image.jpg\" \n  aria-hidden=\"true\"
332  />\n\n<img src=\"image.jpg\" aria-hidden='true' />\n";
333
334        let config = test_config();
335        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
336        let violations = linter.analyze();
337        let md045_violations: Vec<_> = violations
338            .iter()
339            .filter(|v| v.rule().id == "MD045")
340            .collect();
341        assert_eq!(md045_violations.len(), 0);
342    }
343
344    #[test]
345    fn test_html_images_with_aria_hidden_false_violations() {
346        let input = "# Test\n\n<img src=\"image.jpg\" aria-hidden=\"false\" />\n\n<img src=\"image.jpg\" aria-hidden=\"\" />\n\n<img src=\"image.jpg\" aria-hidden=\"other\" />\n";
347
348        let config = test_config();
349        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
350        let violations = linter.analyze();
351        let md045_violations: Vec<_> = violations
352            .iter()
353            .filter(|v| v.rule().id == "MD045")
354            .collect();
355
356        // Should find 3 violations (aria-hidden != \"true\")
357        assert_eq!(md045_violations.len(), 3);
358    }
359
360    #[test]
361    fn test_mixed_image_types() {
362        let input = "# Test\n\n![Valid alt](image.jpg)\n\n![](no-alt.jpg)\n\n<img src=\"valid.jpg\" alt=\"Valid\" />\n\n<img src=\"no-alt.jpg\" />\n\n<img src=\"hidden.jpg\" aria-hidden=\"true\" />\n\n![Reference valid][ref1]\n\n![][ref2]\n\n[ref1]: image.jpg\n[ref2]: image.jpg\n";
363
364        let config = test_config();
365        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
366        let violations = linter.analyze();
367        let md045_violations: Vec<_> = violations
368            .iter()
369            .filter(|v| v.rule().id == "MD045")
370            .collect();
371
372        // Should find 3 violations:
373        // Line 4: ![](no-alt.jpg)
374        // Line 8: <img src="no-alt.jpg" />
375        // Line 14: ![][ref2]
376        assert_eq!(md045_violations.len(), 3);
377    }
378
379    #[test]
380    fn test_multiline_markdown_images() {
381        let input = "# Test\n\n![Alt text](image.jpg 
382\"Title\")\n\n![](image.jpg 
383\"Title\")\n";
384
385        let config = test_config();
386        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
387        let violations = linter.analyze();
388        let md045_violations: Vec<_> = violations
389            .iter()
390            .filter(|v| v.rule().id == "MD045")
391            .collect();
392
393        // Should find 1 violation (the second image without alt text)
394        assert_eq!(md045_violations.len(), 1);
395    }
396
397    #[test]
398    fn test_images_in_links() {
399        let input = "# Test\n\n[![Alt text](image.jpg)](link.html)\n\n[![](no-alt.jpg)](link.html)\n\n[<img src=\"alt.jpg\" alt=\"Alt\" />](link.html)\n\n[<img src=\"no-alt.jpg\" />](link.html)\n";
400
401        let config = test_config();
402        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
403        let violations = linter.analyze();
404        let md045_violations: Vec<_> = violations
405            .iter()
406            .filter(|v| v.rule().id == "MD045")
407            .collect();
408
409        // Should find 2 violations:
410        // Line 4: [![](no-alt.jpg)](link.html) - markdown image without alt
411        // Line 8: [<img src="no-alt.jpg" />](link.html) - HTML img without alt
412        assert_eq!(md045_violations.len(), 2);
413    }
414
415    #[test]
416    fn test_no_false_positives_in_code_blocks() {
417        let input = "# Test\n\n```html\n![](image.jpg)\n<img src=\"image.jpg\" />\n```\n\n    ![](indented-code.jpg)\n    <img src=\"indented.jpg\" />\n\n`![](inline-code.jpg)` and `<img src=\"inline.jpg\" />`\n\nRegular text with ![](actual-image.jpg) should trigger.\n";
418
419        let config = test_config();
420        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
421        let violations = linter.analyze();
422        let md045_violations: Vec<_> = violations
423            .iter()
424            .filter(|v| v.rule().id == "MD045")
425            .collect();
426
427        // Should only find 1 violation (the actual image outside code blocks)
428        assert_eq!(md045_violations.len(), 1);
429    }
430}