1use serde::Deserialize;
2use std::collections::HashSet;
3use std::rc::Rc;
4
5use once_cell::sync::Lazy;
6use regex::Regex;
7use tree_sitter::Node;
8
9use crate::{
10 linter::{range_from_tree_sitter, RuleViolation},
11 rules::{Context, Rule, RuleLinter, RuleType},
12};
13
14#[derive(Debug, PartialEq, Clone, Deserialize)]
16pub struct MD059DescriptiveLinkTextTable {
17 #[serde(default)]
18 pub prohibited_texts: Vec<String>,
19}
20
21impl Default for MD059DescriptiveLinkTextTable {
22 fn default() -> Self {
23 Self {
24 prohibited_texts: vec![
25 "click here".to_string(),
26 "here".to_string(),
27 "link".to_string(),
28 "more".to_string(),
29 ],
30 }
31 }
32}
33
34static RE_INLINE_LINK: Lazy<Regex> = Lazy::new(|| {
36 Regex::new(r"(?:^|[^!])\[([^\]]*)\]\(([^)]+)\)").expect("Failed to compile inline link regex")
37});
38
39static RE_REF_LINK: Lazy<Regex> = Lazy::new(|| {
41 Regex::new(r"(?:^|[^!])\[([^\]]*)\]\[([^\]]+)\]")
42 .expect("Failed to compile reference link regex")
43});
44
45static RE_COLLAPSED_REF_LINK: Lazy<Regex> = Lazy::new(|| {
47 Regex::new(r"(?:^|[^!])\[([^\]]+)\]\[\]")
48 .expect("Failed to compile collapsed reference link regex")
49});
50
51static RE_NORMALIZE_PUNCTUATION: Lazy<Regex> =
52 Lazy::new(|| Regex::new(r"[\W_]+").expect("Failed to compile punctuation regex"));
53static RE_NORMALIZE_WHITESPACE: Lazy<Regex> =
54 Lazy::new(|| Regex::new(r"\s+").expect("Failed to compile whitespace regex"));
55
56pub(crate) struct MD059Linter {
60 context: Rc<Context>,
61 violations: Vec<RuleViolation>,
62 prohibited_texts: HashSet<String>,
63}
64
65impl MD059Linter {
66 pub fn new(context: Rc<Context>) -> Self {
67 let prohibited_texts = context
68 .config
69 .linters
70 .settings
71 .descriptive_link_text
72 .prohibited_texts
73 .iter()
74 .map(|text| normalize_text(text))
75 .collect();
76
77 Self {
78 context,
79 violations: Vec::new(),
80 prohibited_texts,
81 }
82 }
83}
84
85impl RuleLinter for MD059Linter {
86 fn feed(&mut self, node: &Node) {
87 match node.kind() {
89 "link" => self.check_link_text(node),
90 "inline" => self.check_inline_for_links(node),
91 _ => {}
92 }
93 }
94
95 fn finalize(&mut self) -> Vec<RuleViolation> {
96 std::mem::take(&mut self.violations)
97 }
98}
99
100impl MD059Linter {
101 fn check_inline_for_links(&mut self, inline_node: &Node) {
102 let link_text = {
104 let document_content = self.context.document_content.borrow();
105 inline_node
106 .utf8_text(document_content.as_bytes())
107 .unwrap_or("")
108 .to_string()
109 };
110
111 if !link_text.is_empty() {
113 self.check_text_for_link_patterns(&link_text, inline_node);
114 }
115 }
116
117 fn check_text_for_link_patterns(&mut self, text: &str, node: &Node) {
118 for caps in RE_INLINE_LINK.captures_iter(text) {
119 if let Some(label_match) = caps.get(1) {
120 let label_text = label_match.as_str();
121 self.check_label_for_prohibited_text(label_text, node);
122 }
123 }
124
125 for caps in RE_REF_LINK.captures_iter(text) {
126 if let Some(label_match) = caps.get(1) {
127 let label_text = label_match.as_str();
128 self.check_label_for_prohibited_text(label_text, node);
129 }
130 }
131
132 for caps in RE_COLLAPSED_REF_LINK.captures_iter(text) {
133 if let Some(label_match) = caps.get(1) {
134 let label_text = label_match.as_str();
135 self.check_label_for_prohibited_text(label_text, node);
136 }
137 }
138 }
139
140 fn check_link_text(&mut self, link_node: &Node) {
141 if let Some(text) = self.extract_link_text(link_node) {
143 if self.contains_allowed_elements(link_node) {
145 return;
146 }
147
148 let normalized_text = normalize_text(&text);
149
150 if self.prohibited_texts.contains(&normalized_text) {
151 self.create_violation(link_node, &text);
152 }
153 }
154 }
155
156 fn check_label_for_prohibited_text(&mut self, label_text: &str, node: &Node) {
157 if label_text.contains('`') || label_text.contains('<') {
159 return;
160 }
161
162 let normalized_text = normalize_text(label_text);
163
164 if self.prohibited_texts.contains(&normalized_text) {
165 self.create_violation(node, label_text);
166 }
167 }
168
169 fn extract_link_text(&self, link_node: &Node) -> Option<String> {
170 let document_content = self.context.document_content.borrow();
173 let document_bytes = document_content.as_bytes();
174
175 for child in link_node.children(&mut link_node.walk()) {
177 if child.kind() == "label" {
178 let label_text = child.utf8_text(document_bytes).unwrap_or("");
180
181 if label_text.starts_with('[') && label_text.ends_with(']') {
183 let inner_text = &label_text[1..label_text.len() - 1];
184 return Some(inner_text.to_string());
185 }
186 }
187 }
188
189 let full_text = link_node.utf8_text(document_bytes).unwrap_or("");
191 if let Some(start) = full_text.find('[') {
192 if let Some(end) = full_text[start..].find(']') {
193 let inner_text = &full_text[start + 1..start + end];
194 return Some(inner_text.to_string());
195 }
196 }
197
198 None
199 }
200
201 fn contains_allowed_elements(&self, link_node: &Node) -> bool {
202 let allowed_types: &[&str] = &["code_span", "html_tag", "inline_html"];
205 let mut cursor = link_node.walk();
206 loop {
207 if allowed_types.contains(&cursor.node().kind()) {
208 return true;
209 }
210 if !cursor.goto_first_child() {
211 while !cursor.goto_next_sibling() {
212 if !cursor.goto_parent() {
213 return false;
214 }
215 }
216 }
217 }
218 }
219
220 fn create_violation(&mut self, node: &Node, link_text: &str) {
221 let message = format!("Link text should be descriptive: '{link_text}'");
222
223 self.violations.push(RuleViolation::new(
224 &MD059,
225 message,
226 self.context.file_path.clone(),
227 range_from_tree_sitter(&node.range()),
228 ));
229 }
230}
231
232fn normalize_text(text: &str) -> String {
235 let step1 = RE_NORMALIZE_PUNCTUATION.replace_all(text, " ");
237
238 let step2 = RE_NORMALIZE_WHITESPACE.replace_all(&step1, " ");
240
241 step2.to_lowercase().trim().to_string()
243}
244
245pub const MD059: Rule = Rule {
246 id: "MD059",
247 alias: "descriptive-link-text",
248 tags: &["accessibility", "links"],
249 description: "Link text should be descriptive",
250 rule_type: RuleType::Token,
251 required_nodes: &["link", "inline"],
252 new_linter: |context| Box::new(MD059Linter::new(context)),
253};
254
255#[cfg(test)]
256mod test {
257 use std::path::PathBuf;
258
259 use crate::config::RuleSeverity;
260 use crate::linter::MultiRuleLinter;
261 use crate::test_utils::test_helpers::test_config_with_rules;
262
263 use super::normalize_text;
264
265 fn test_config() -> crate::config::QuickmarkConfig {
266 test_config_with_rules(vec![
267 ("descriptive-link-text", RuleSeverity::Error),
268 ("heading-style", RuleSeverity::Off),
269 ("heading-increment", RuleSeverity::Off),
270 ("line-length", RuleSeverity::Off),
271 ])
272 }
273
274 #[test]
275 fn test_normalize_text() {
276 assert_eq!("click here", normalize_text("click here"));
277 assert_eq!("click here", normalize_text("Click Here"));
278 assert_eq!("click here", normalize_text("click here"));
279 assert_eq!("click here", normalize_text("click_here"));
280 assert_eq!("click here", normalize_text("click-here"));
281 assert_eq!("click here", normalize_text(" click here "));
282 assert_eq!("click here", normalize_text("click.here!"));
283 }
284
285 #[test]
286 fn test_descriptive_link_passes() {
287 let input = "[Download the budget document](https://example.com/budget.pdf)";
288
289 let config = test_config();
290 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
291 let violations = linter.analyze();
292
293 assert_eq!(0, violations.len());
294 }
295
296 #[test]
297 fn test_generic_link_text_fails() {
298 let input = "[click here](https://example.com)";
299
300 let config = test_config();
301 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
302 let violations = linter.analyze();
303
304 assert_eq!(1, violations.len());
305 let violation = &violations[0];
306 assert_eq!("MD059", violation.rule().id);
307 assert!(violation
308 .message()
309 .contains("Link text should be descriptive"));
310 assert!(violation.message().contains("click here"));
311 }
312
313 #[test]
314 fn test_prohibited_texts() {
315 let test_cases = vec",
317 "[link](url)",
318 "[more](url)",
319 "[click here](url)",
320 ];
321
322 for input in test_cases {
323 let config = test_config();
324 let mut linter =
325 MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
326 let violations = linter.analyze();
327
328 assert_eq!(1, violations.len(), "Failed for input: {input}");
329 let violation = &violations[0];
330 assert_eq!("MD059", violation.rule().id);
331 }
332 }
333
334 #[test]
335 fn test_case_insensitive() {
336 let input = "[CLICK HERE](https://example.com)";
337
338 let config = test_config();
339 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
340 let violations = linter.analyze();
341
342 assert_eq!(1, violations.len());
343 }
344
345 #[test]
346 fn test_punctuation_normalized() {
347 let input = "[click-here!](https://example.com)";
348
349 let config = test_config();
350 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
351 let violations = linter.analyze();
352
353 assert_eq!(1, violations.len());
354 }
355
356 #[test]
357 fn test_extra_whitespace_normalized() {
358 let input = "[ click here ](https://example.com)";
359
360 let config = test_config();
361 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
362 let violations = linter.analyze();
363
364 assert_eq!(1, violations.len());
365 }
366
367 #[test]
368 fn test_reference_links() {
369 let input = r#"[click here][ref]
370
371[ref]: https://example.com"#;
372
373 let config = test_config();
374 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
375 let violations = linter.analyze();
376
377 assert_eq!(1, violations.len());
378 }
379
380 #[test]
381 fn test_multiple_links() {
382 let input = "[good link](url1) and [click here](url2) and [another good](url3)";
383
384 let config = test_config();
385 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
386 let violations = linter.analyze();
387
388 assert_eq!(1, violations.len());
389 assert!(violations[0].message().contains("click here"));
390 }
391
392 #[test]
393 fn test_empty_link_text() {
394 let input = "[](https://example.com)";
395
396 let config = test_config();
397 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
398 let violations = linter.analyze();
399
400 assert_eq!(0, violations.len());
402 }
403
404 #[test]
405 fn test_links_with_code_allowed() {
406 let input = "[`click here`](https://example.com)";
407
408 let config = test_config();
409 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
410 let violations = linter.analyze();
411
412 assert_eq!(0, violations.len());
414 }
415
416 #[test]
417 fn test_image_links_ignored() {
418 let input = "";
419
420 let config = test_config();
421 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
422 let violations = linter.analyze();
423
424 assert_eq!(0, violations.len());
426 }
427}