1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
5use crate::utils::range_utils::calculate_match_range;
6use crate::utils::skip_context::{is_in_front_matter, is_in_html_comment, is_in_math_context};
7use lazy_static::lazy_static;
8use regex::Regex;
9
10lazy_static! {
11 static ref REVERSED_LINK_REGEX: Regex =
12 Regex::new(r"\[([^\]]+)\]\(([^)]+)\)|(\([^)]+\))\[([^\]]+)\]").unwrap();
13 static ref REVERSED_LINK_CHECK_REGEX: Regex = Regex::new(
16 r"\(([^)]*(?:\([^)]*\)[^)]*)*)\)\[([^\]]+)\]"
17 ).unwrap();
18
19 static ref ESCAPED_CHARS: Regex = Regex::new(r"\\[\[\]()]").unwrap();
21
22 static ref MALFORMED_LINK_PATTERNS: Vec<(Regex, &'static str)> = vec![
24 (Regex::new(r"\(([^)]+)\)\[([^\]]*$)").unwrap(), "missing closing bracket"),
26 (Regex::new(r"\[([^\]]+)\]\(([^)]*$)").unwrap(), "missing closing parenthesis"),
27
28 (Regex::new(r"\{([^}]+)\}\[([^\]]+)\]").unwrap(), "wrong bracket type (curly instead of parentheses)"),
30 (Regex::new(r"\[([^\]]+)\]\{([^}]+)\}").unwrap(), "wrong bracket type (curly instead of parentheses)"),
31
32 (Regex::new(r"\[(https?://[^\]]+)\]\(([^)]+)\)").unwrap(), "URL and text appear to be swapped"),
34 (Regex::new(r"\[(www\.[^\]]+)\]\(([^)]+)\)").unwrap(), "URL and text appear to be swapped"),
35 (Regex::new(r"\[([^\]]*\.[a-z]{2,4}[^\]]*)\]\(([^)]+)\)").unwrap(), "URL and text appear to be swapped"),
36 ];
37}
38
39#[derive(Clone)]
40pub struct MD011NoReversedLinks;
41
42impl MD011NoReversedLinks {
43 fn is_escaped(content: &str, pos: usize) -> bool {
45 if pos == 0 {
46 return false;
47 }
48
49 let mut backslash_count = 0;
50 let mut check_pos = pos - 1;
51
52 loop {
53 if content.chars().nth(check_pos) == Some('\\') {
54 backslash_count += 1;
55 if check_pos == 0 {
56 break;
57 }
58 check_pos -= 1;
59 } else {
60 break;
61 }
62 }
63
64 backslash_count % 2 == 1
65 }
66
67 fn find_reversed_links(content: &str) -> Vec<(usize, usize, String, String)> {
68 let mut results = Vec::new();
69 let mut line_start = 0;
70 let mut current_line = 1;
71
72 for line in content.lines() {
73 if !line.contains('(') || !line.contains('[') || !line.contains(']') || !line.contains(')') {
75 line_start += line.len() + 1;
76 current_line += 1;
77 continue;
78 }
79
80 for cap in REVERSED_LINK_CHECK_REGEX.captures_iter(line) {
81 let url = &cap[1];
83 let text = &cap[2];
84
85 let start = line_start + cap.get(0).unwrap().start();
86 results.push((current_line, start - line_start + 1, text.to_string(), url.to_string()));
87 }
88 line_start += line.len() + 1; current_line += 1;
90 }
91
92 results
93 }
94
95 fn detect_malformed_link_attempts(&self, line: &str) -> Vec<(usize, usize, String, String)> {
97 let mut results = Vec::new();
98 let mut processed_ranges = Vec::new(); for (pattern, issue_type) in MALFORMED_LINK_PATTERNS.iter() {
101 for cap in pattern.captures_iter(line) {
102 let match_obj = cap.get(0).unwrap();
103 let start = match_obj.start();
104 let len = match_obj.len();
105 let end = start + len;
106
107 if processed_ranges
109 .iter()
110 .any(|(proc_start, proc_end)| (start < *proc_end && end > *proc_start))
111 {
112 continue;
113 }
114
115 if let Some((url, text)) = self.extract_url_and_text_from_match(&cap, issue_type) {
117 if self.looks_like_link_attempt(&url, &text) {
119 results.push((start, len, url, text));
120 processed_ranges.push((start, end));
121 }
122 }
123 }
124 }
125
126 results
127 }
128
129 fn extract_url_and_text_from_match(&self, cap: ®ex::Captures, issue_type: &str) -> Option<(String, String)> {
131 match issue_type {
132 "missing closing bracket" => {
133 Some((cap[1].to_string(), format!("{}]", &cap[2])))
135 }
136 "missing closing parenthesis" => {
137 Some((format!("{})", &cap[2]), cap[1].to_string()))
139 }
140 "wrong bracket type (curly instead of parentheses)" => {
141 if cap.get(0).unwrap().as_str().starts_with('{') {
143 Some((cap[1].to_string(), cap[2].to_string()))
145 } else {
146 Some((cap[2].to_string(), cap[1].to_string()))
148 }
149 }
150 "URL and text appear to be swapped" => {
151 Some((cap[1].to_string(), cap[2].to_string()))
153 }
154 _ => None,
155 }
156 }
157
158 fn looks_like_link_attempt(&self, url: &str, text: &str) -> bool {
160 let url_indicators = [
162 "http://", "https://", "www.", "ftp://", ".com", ".org", ".net", ".edu", ".gov", ".io", ".co",
163 ];
164
165 let has_url_indicator = url_indicators
166 .iter()
167 .any(|indicator| url.to_lowercase().contains(indicator));
168
169 let text_looks_reasonable = text.len() >= 3
171 && text.len() <= 50
172 && !url_indicators
173 .iter()
174 .any(|indicator| text.to_lowercase().contains(indicator))
175 && !text.to_lowercase().starts_with("http")
176 && text.chars().any(|c| c.is_alphabetic()); let url_looks_reasonable =
180 url.len() >= 4 && (has_url_indicator || url.contains('.')) && !url.chars().all(|c| c.is_alphabetic()); has_url_indicator && text_looks_reasonable && url_looks_reasonable
184 }
185}
186
187impl Default for MD011NoReversedLinks {
188 fn default() -> Self {
189 Self
190 }
191}
192
193impl Rule for MD011NoReversedLinks {
194 fn name(&self) -> &'static str {
195 "MD011"
196 }
197
198 fn description(&self) -> &'static str {
199 "Link syntax should not be reversed"
200 }
201
202 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
203 let content = ctx.content;
204 let mut warnings = Vec::new();
205 let mut byte_pos = 0;
206
207 for (line_num, line) in content.lines().enumerate() {
208 for cap in REVERSED_LINK_CHECK_REGEX.captures_iter(line) {
210 let match_obj = cap.get(0).unwrap();
211 let match_start = match_obj.start();
212 let match_end = match_obj.end();
213
214 let match_byte_pos = byte_pos + match_start;
216 if ctx.is_in_code_block_or_span(match_byte_pos) {
217 continue;
218 }
219
220 if is_in_html_comment(content, match_byte_pos) {
222 continue;
223 }
224
225 if is_in_math_context(ctx, match_byte_pos) {
227 continue;
228 }
229
230 if is_in_front_matter(content, line_num) {
232 continue;
233 }
234
235 let match_text = match_obj.as_str();
237
238 if match_start > 0 && Self::is_escaped(line, byte_pos + match_start) {
240 continue;
241 }
242
243 let mut skip_match = false;
245 for esc_match in ESCAPED_CHARS.find_iter(match_text) {
246 let esc_pos = match_start + esc_match.start();
247 if esc_pos > 0 && line.chars().nth(esc_pos.saturating_sub(1)) == Some('\\') {
248 skip_match = true;
249 break;
250 }
251 }
252
253 if skip_match {
254 continue;
255 }
256
257 let remaining = &line[match_end..];
260 if remaining.trim_start().starts_with('(') {
261 continue;
262 }
263
264 let url = &cap[1];
266 let text = &cap[2];
267
268 let (start_line, start_col, end_line, end_col) =
270 calculate_match_range(line_num + 1, line, match_obj.start(), match_obj.len());
271
272 warnings.push(LintWarning {
273 rule_name: Some(self.name()),
274 message: format!("Reversed link syntax: use [{text}]({url}) instead"),
275 line: start_line,
276 column: start_col,
277 end_line,
278 end_column: end_col,
279 severity: Severity::Warning,
280 fix: Some(Fix {
281 range: {
282 let line_start_byte = ctx.line_offsets.get(line_num).copied().unwrap_or(0);
284 let match_start_byte = line_start_byte + match_obj.start();
285 let match_end_byte = match_start_byte + match_obj.len();
286 match_start_byte..match_end_byte
287 },
288 replacement: format!("[{text}]({url})"),
289 }),
290 });
291 }
292
293 let malformed_attempts = self.detect_malformed_link_attempts(line);
295 for (start, len, url, text) in malformed_attempts {
296 let match_byte_pos = byte_pos + start;
298 if ctx.is_in_code_block_or_span(match_byte_pos) {
299 continue;
300 }
301
302 if is_in_html_comment(content, match_byte_pos) {
304 continue;
305 }
306
307 if is_in_math_context(ctx, match_byte_pos) {
309 continue;
310 }
311
312 if is_in_front_matter(content, line_num) {
314 continue;
315 }
316
317 let (start_line, start_col, end_line, end_col) = calculate_match_range(line_num + 1, line, start, len);
319
320 warnings.push(LintWarning {
321 rule_name: Some(self.name()),
322 message: "Malformed link syntax".to_string(),
323 line: start_line,
324 column: start_col,
325 end_line,
326 end_column: end_col,
327 severity: Severity::Warning,
328 fix: Some(Fix {
329 range: {
330 let line_start_byte = ctx.line_offsets.get(line_num).copied().unwrap_or(0);
332 let match_start_byte = line_start_byte + start;
333 let match_end_byte = match_start_byte + len;
334 match_start_byte..match_end_byte
335 },
336 replacement: format!("[{text}]({url})"),
337 }),
338 });
339 }
340
341 byte_pos += line.len() + 1; }
343
344 Ok(warnings)
345 }
346
347 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
348 let content = ctx.content;
349 let mut result = content.to_string();
350 let mut offset: usize = 0;
351
352 for (line_num, column, text, url) in Self::find_reversed_links(content) {
353 let mut pos = 0;
355 for (i, line) in content.lines().enumerate() {
356 if i + 1 == line_num {
357 pos += column - 1;
358 break;
359 }
360 pos += line.len() + 1;
361 }
362
363 if !ctx.is_in_code_block_or_span(pos) {
364 let adjusted_pos = pos + offset;
365 let original_len = format!("({text})[{url}]").len();
366 let replacement = format!("[{text}]({url})");
367 result.replace_range(adjusted_pos..adjusted_pos + original_len, &replacement);
368 if replacement.len() > original_len {
370 offset += replacement.len() - original_len;
371 } else {
372 offset = offset.saturating_sub(original_len - replacement.len());
373 }
374 }
375 }
376
377 Ok(result)
378 }
379
380 fn as_any(&self) -> &dyn std::any::Any {
381 self
382 }
383
384 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
385 ctx.content.is_empty() || !ctx.content.contains('(') || !ctx.content.contains('[')
387 }
388
389 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
390 where
391 Self: Sized,
392 {
393 Box::new(MD011NoReversedLinks)
394 }
395}
396
397#[cfg(test)]
398mod tests {
399 use super::*;
400 use crate::lint_context::LintContext;
401 use crate::utils::skip_context::is_in_front_matter;
402
403 #[test]
404 fn test_capture_group_order_fix() {
405 let rule = MD011NoReversedLinks;
412
413 let content = "Check out (https://example.com)[this link] for more info.";
415 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
416
417 let result = rule.check(&ctx).unwrap();
419 assert_eq!(result.len(), 1);
420 assert!(result[0].message.contains("Reversed link syntax"));
421
422 let fix = result[0].fix.as_ref().unwrap();
424 assert_eq!(fix.replacement, "[this link](https://example.com)");
425 }
426
427 #[test]
428 fn test_multiple_reversed_links() {
429 let rule = MD011NoReversedLinks;
431
432 let content = "Visit (https://example.com)[Example] and (https://test.com)[Test Site].";
433 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
434
435 let result = rule.check(&ctx).unwrap();
436 assert_eq!(result.len(), 2);
437
438 assert_eq!(
440 result[0].fix.as_ref().unwrap().replacement,
441 "[Example](https://example.com)"
442 );
443 assert_eq!(
444 result[1].fix.as_ref().unwrap().replacement,
445 "[Test Site](https://test.com)"
446 );
447 }
448
449 #[test]
450 fn test_normal_links_not_flagged() {
451 let rule = MD011NoReversedLinks;
453
454 let content = "This is a normal [link](https://example.com) and another [link](https://test.com).";
455 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
456
457 let result = rule.check(&ctx).unwrap();
458 assert_eq!(result.len(), 0);
459 }
460
461 #[test]
462 fn debug_capture_groups() {
463 let pattern = r"\(([^)]+)\)\[([^\]]+)\]";
465 let regex = Regex::new(pattern).unwrap();
466
467 let test_text = "(https://example.com)[Click here]";
468
469 if let Some(cap) = regex.captures(test_text) {
470 println!("Full match: {}", &cap[0]);
471 println!("cap[1] (first group): {}", &cap[1]);
472 println!("cap[2] (second group): {}", &cap[2]);
473
474 let current_fix = format!("[{}]({})", &cap[2], &cap[1]);
476 println!("Current fix produces: {current_fix}");
477
478 let rule = MD011NoReversedLinks;
480 let ctx = LintContext::new(test_text, crate::config::MarkdownFlavor::Standard);
481 let result = rule.check(&ctx).unwrap();
482 if !result.is_empty() {
483 println!("Rule fix produces: {}", result[0].fix.as_ref().unwrap().replacement);
484 }
485 }
486 }
487
488 #[test]
489 fn test_front_matter_detection() {
490 let content = r#"---
491title: "My Post"
492tags: ["test", "example"]
493description: "Pattern (like)[this] in frontmatter"
494---
495
496# Content
497
498Regular (https://example.com)[reversed link] that should be flagged.
499
500+++
501title = "TOML frontmatter"
502tags = ["more", "tags"]
503pattern = "(toml)[pattern]"
504+++
505
506# More Content
507
508Another (https://test.com)[reversed] link should be flagged."#;
509
510 for (idx, line) in content.lines().enumerate() {
512 let line_num = idx; let in_fm = is_in_front_matter(content, line_num);
514
515 println!("Line {:2} (0-idx: {:2}): in_fm={:5} | {:?}", idx + 1, idx, in_fm, line);
516
517 if idx <= 4 {
519 assert!(
520 in_fm,
521 "Line {} (0-idx: {}) should be in YAML front matter but got false. Content: {:?}",
522 idx + 1,
523 idx,
524 line
525 );
526 }
527 else if (10..=14).contains(&idx) {
529 assert!(
530 !in_fm,
531 "Line {} (0-idx: {}) should NOT be in front matter (TOML block not at beginning). Content: {:?}",
532 idx + 1,
533 idx,
534 line
535 );
536 }
537 else {
539 assert!(
540 !in_fm,
541 "Line {} (0-idx: {}) should NOT be in front matter but got true. Content: {:?}",
542 idx + 1,
543 idx,
544 line
545 );
546 }
547 }
548 }
549
550 #[test]
551 fn test_malformed_link_detection() {
552 let rule = MD011NoReversedLinks;
553
554 let content = "Check out {https://example.com}[this website].";
556 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
557 let result = rule.check(&ctx).unwrap();
558 assert_eq!(result.len(), 1);
559 assert!(result[0].message.contains("Malformed link syntax"));
560
561 let content = "Visit [https://example.com](Click Here).";
563 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
564 let result = rule.check(&ctx).unwrap();
565 assert_eq!(result.len(), 1);
566 assert!(result[0].message.contains("Malformed link syntax"));
567
568 let content = "This is a [normal link](https://example.com).";
570 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
571 let result = rule.check(&ctx).unwrap();
572 assert_eq!(result.len(), 0);
573
574 let content = "Regular text with [brackets] and (parentheses).";
576 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
577 let result = rule.check(&ctx).unwrap();
578 assert_eq!(result.len(), 0);
579
580 let content = "(example.com)is a test domain.";
582 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
583 let result = rule.check(&ctx).unwrap();
584 assert_eq!(result.len(), 0);
585
586 let content = "(optional)parameter should not be flagged.";
587 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
588 let result = rule.check(&ctx).unwrap();
589 assert_eq!(result.len(), 0);
590 }
591
592 #[test]
593 fn test_malformed_link_fixes() {
594 let rule = MD011NoReversedLinks;
595
596 let content = "Check out {https://example.com}[this website].";
598 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
599 let result = rule.check(&ctx).unwrap();
600 assert_eq!(result.len(), 1);
601 let fix = result[0].fix.as_ref().unwrap();
602 assert_eq!(fix.replacement, "[this website](https://example.com)");
603
604 let content = "Visit [https://example.com](Click Here).";
606 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
607 let result = rule.check(&ctx).unwrap();
608 assert_eq!(result.len(), 1);
609 let fix = result[0].fix.as_ref().unwrap();
610 assert_eq!(fix.replacement, "[Click Here](https://example.com)");
611 }
612
613 #[test]
614 fn test_conservative_detection() {
615 let rule = MD011NoReversedLinks;
616
617 let content = "This (not-a-url)text should be ignored.";
619 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
620 let result = rule.check(&ctx).unwrap();
621 assert_eq!(result.len(), 0);
622
623 let content = "Also [regular text](not a url) should be ignored.";
624 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
625 let result = rule.check(&ctx).unwrap();
626 assert_eq!(result.len(), 0);
627
628 let content = "And {not-url}[not-text] should be ignored.";
629 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
630 let result = rule.check(&ctx).unwrap();
631 assert_eq!(result.len(), 0);
632 }
633
634 #[test]
635 fn test_skip_code_blocks() {
636 let rule = MD011NoReversedLinks;
637
638 let content = r#"Here's an example:
640
641```rust
642// This regex pattern [.!?]+\s*$ should not be flagged
643static ref TRAILING_PUNCTUATION: Regex = Regex::new(r"(?m)[.!?]+\s*$").unwrap();
644```
645
646But this (https://example.com)[reversed link] should be flagged."#;
647
648 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
649 let result = rule.check(&ctx).unwrap();
650
651 assert_eq!(result.len(), 1);
653 assert!(result[0].message.contains("Reversed link syntax"));
654 assert_eq!(result[0].line, 8); }
656
657 #[test]
658 fn test_negative_lookahead() {
659 let rule = MD011NoReversedLinks;
660
661 let content = "This is a reference-style link: (see here)[ref](https://example.com)";
663 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
664 let result = rule.check(&ctx).unwrap();
665 assert_eq!(result.len(), 0, "Should not flag (text)[ref](url) pattern");
666
667 let content = "This is reversed: (https://example.com)[click here]";
669 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
670 let result = rule.check(&ctx).unwrap();
671 assert_eq!(result.len(), 1, "Should flag genuine reversed links");
672
673 let content = "Reference with space: (text)[ref] (url)";
675 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
676 let result = rule.check(&ctx).unwrap();
677 assert_eq!(result.len(), 0, "Should not flag when space before (url)");
678 }
679
680 #[test]
681 fn test_escaped_characters() {
682 let rule = MD011NoReversedLinks;
683
684 let content = r"Escaped: \(not a link\)\[also not\]";
686 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
687 let result = rule.check(&ctx).unwrap();
688 assert_eq!(result.len(), 0, "Should not flag escaped brackets");
689
690 let content = "(https://example.com/path(with)parens)[text]";
692 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
693 let result = rule.check(&ctx).unwrap();
694 assert_eq!(result.len(), 1, "Should still flag URLs with nested parentheses");
695 }
696
697 #[test]
698 fn test_inline_code_patterns() {
699 let rule = MD011NoReversedLinks;
701
702 let content = "I find `inspect.stack()[1].frame` a lot easier to understand (or at least guess about) at a glance than `inspect.stack()[1][0]`.";
704 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
705 let result = rule.check(&ctx).unwrap();
706 assert_eq!(result.len(), 0, "Should not flag ()[1] patterns inside inline code");
707
708 let content = "Use `array()[0]` or `func()[1]` to access elements.";
710 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
711 let result = rule.check(&ctx).unwrap();
712 assert_eq!(result.len(), 0, "Should not flag array access patterns in inline code");
713
714 let content = "Check out (https://example.com)[this link] and use `array()[1]`.";
716 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
717 let result = rule.check(&ctx).unwrap();
718 assert_eq!(result.len(), 1, "Should flag actual reversed link but not code pattern");
719 assert!(result[0].message.contains("Reversed link syntax"));
720
721 let content = r#"
723Here's some code: `func()[1]` and `other()[2]`.
724
725But this is wrong: (https://example.com)[Click here]
726
727```python
728# This should not be flagged
729result = inspect.stack()[1]
730```
731"#;
732 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
733 let result = rule.check(&ctx).unwrap();
734 assert_eq!(result.len(), 1, "Should only flag the actual reversed link");
735 assert_eq!(result[0].line, 4, "Should flag the reversed link on line 4");
736 }
737
738 #[test]
739 fn test_issue_26_specific_case() {
740 let rule = MD011NoReversedLinks;
742
743 let content = r#"The first thing I need to find is the name of the redacted key name, `doc.<key_name_omitted>`. I'll use `SUBSTRING(ATTRIBUTES(doc)[0], 0, 1) == '<c>'` as that test, where `<c>` is different characters. This gets the first attribute from `doc` and uses `SUBSTRING` to get the first character."#;
744 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
745 let result = rule.check(&ctx).unwrap();
746 assert_eq!(
747 result.len(),
748 0,
749 "Should not flag ATTRIBUTES(doc)[0] inside inline code (issue #26)"
750 );
751 }
752}