1use std::sync::LazyLock;
5
6use regex::Regex;
7
8use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
9use crate::utils::range_utils::{LineIndex, calculate_url_range};
10use crate::utils::regex_cache::{
11 EMAIL_PATTERN, URL_IPV6_REGEX, URL_QUICK_CHECK_REGEX, URL_STANDARD_REGEX, URL_WWW_REGEX, XMPP_URI_REGEX,
12};
13
14use crate::filtered_lines::FilteredLinesExt;
15use crate::lint_context::LintContext;
16
17static CUSTOM_PROTOCOL_REGEX: LazyLock<Regex> = LazyLock::new(|| {
19 Regex::new(r#"(?:grpc|ws|wss|ssh|git|svn|file|data|javascript|vscode|chrome|about|slack|discord|matrix|irc|redis|mongodb|postgresql|mysql|kafka|nats|amqp|mqtt|custom|app|api|service)://"#).unwrap()
20});
21static MARKDOWN_LINK_REGEX: LazyLock<Regex> = LazyLock::new(|| {
22 Regex::new(r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#).unwrap()
23});
24static MARKDOWN_EMPTY_LINK_REGEX: LazyLock<Regex> =
25 LazyLock::new(|| Regex::new(r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(\)"#).unwrap());
26static MARKDOWN_EMPTY_REF_REGEX: LazyLock<Regex> =
27 LazyLock::new(|| Regex::new(r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\[\]"#).unwrap());
28static ANGLE_LINK_REGEX: LazyLock<Regex> = LazyLock::new(|| {
29 Regex::new(
30 r#"<((?:https?|ftps?)://(?:\[[0-9a-fA-F:]+(?:%[a-zA-Z0-9]+)?\]|[^>]+)|xmpp:[^>]+|[^@\s]+@[^@\s]+\.[^@\s>]+)>"#,
31 )
32 .unwrap()
33});
34static BADGE_LINK_LINE_REGEX: LazyLock<Regex> =
35 LazyLock::new(|| Regex::new(r#"^\s*\[!\[[^\]]*\]\([^)]*\)\]\([^)]*\)\s*$"#).unwrap());
36static MARKDOWN_IMAGE_REGEX: LazyLock<Regex> =
37 LazyLock::new(|| Regex::new(r#"!\s*\[([^\]]*)\]\s*\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#).unwrap());
38static REFERENCE_DEF_REGEX: LazyLock<Regex> =
39 LazyLock::new(|| Regex::new(r"^\s*\[[^\]]+\]:\s*(?:<|(?:https?|ftps?)://)").unwrap());
40static MULTILINE_LINK_CONTINUATION_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"^[^\[]*\]\(.*\)"#).unwrap());
41static SHORTCUT_REF_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"\[([^\[\]]+)\]"#).unwrap());
42
43#[derive(Default)]
45struct LineCheckBuffers {
46 markdown_link_ranges: Vec<(usize, usize)>,
47 image_ranges: Vec<(usize, usize)>,
48 urls_found: Vec<(usize, usize, String)>,
49}
50
51#[derive(Default, Clone)]
52pub struct MD034NoBareUrls;
53
54impl MD034NoBareUrls {
55 #[inline]
56 pub fn should_skip_content(&self, content: &str) -> bool {
57 let bytes = content.as_bytes();
60 let has_colon = bytes.contains(&b':');
61 let has_at = bytes.contains(&b'@');
62 let has_www = content.contains("www.");
63 !has_colon && !has_at && !has_www
64 }
65
66 fn trim_trailing_punctuation<'a>(&self, url: &'a str) -> &'a str {
68 let mut trimmed = url;
69
70 let open_parens = url.chars().filter(|&c| c == '(').count();
72 let close_parens = url.chars().filter(|&c| c == ')').count();
73
74 if close_parens > open_parens {
75 let mut balance = 0;
77 let mut last_balanced_pos = url.len();
78
79 for (byte_idx, c) in url.char_indices() {
80 if c == '(' {
81 balance += 1;
82 } else if c == ')' {
83 balance -= 1;
84 if balance < 0 {
85 last_balanced_pos = byte_idx;
87 break;
88 }
89 }
90 }
91
92 trimmed = &trimmed[..last_balanced_pos];
93 }
94
95 while let Some(last_char) = trimmed.chars().last() {
97 if matches!(last_char, '.' | ',' | ';' | ':' | '!' | '?') {
98 if last_char == ':' && trimmed.len() > 1 {
101 break;
103 }
104 trimmed = &trimmed[..trimmed.len() - 1];
105 } else {
106 break;
107 }
108 }
109
110 trimmed
111 }
112
113 fn is_reference_definition(&self, line: &str) -> bool {
115 REFERENCE_DEF_REGEX.is_match(line)
116 }
117
118 fn check_line(
119 &self,
120 line: &str,
121 ctx: &LintContext,
122 line_number: usize,
123 code_spans: &[crate::lint_context::CodeSpan],
124 buffers: &mut LineCheckBuffers,
125 line_index: &LineIndex,
126 ) -> Vec<LintWarning> {
127 let mut warnings = Vec::new();
128
129 if self.is_reference_definition(line) {
131 return warnings;
132 }
133
134 if ctx.line_info(line_number).is_some_and(|info| info.in_html_block) {
136 return warnings;
137 }
138
139 if MULTILINE_LINK_CONTINUATION_REGEX.is_match(line) {
142 return warnings;
143 }
144
145 let has_quick_check = URL_QUICK_CHECK_REGEX.is_match(line);
147 let has_www = line.contains("www.");
148 let has_at = line.contains('@');
149
150 if !has_quick_check && !has_at && !has_www {
151 return warnings;
152 }
153
154 buffers.markdown_link_ranges.clear();
156 buffers.image_ranges.clear();
157
158 let has_bracket = line.contains('[');
159 let has_angle = line.contains('<');
160 let has_bang = line.contains('!');
161
162 if has_bracket {
163 for mat in MARKDOWN_LINK_REGEX.find_iter(line) {
164 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
165 }
166
167 for mat in MARKDOWN_EMPTY_LINK_REGEX.find_iter(line) {
169 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
170 }
171
172 for mat in MARKDOWN_EMPTY_REF_REGEX.find_iter(line) {
173 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
174 }
175
176 for mat in SHORTCUT_REF_REGEX.find_iter(line) {
178 let end = mat.end();
179 let next_non_ws = line[end..].bytes().find(|b| !b.is_ascii_whitespace());
180 if next_non_ws == Some(b'(') || next_non_ws == Some(b'[') {
181 continue;
182 }
183 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
184 }
185
186 if has_bang && BADGE_LINK_LINE_REGEX.is_match(line) {
188 return warnings;
189 }
190 }
191
192 if has_angle {
193 for mat in ANGLE_LINK_REGEX.find_iter(line) {
194 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
195 }
196 }
197
198 if has_bang && has_bracket {
200 for mat in MARKDOWN_IMAGE_REGEX.find_iter(line) {
201 buffers.image_ranges.push((mat.start(), mat.end()));
202 }
203 }
204
205 buffers.urls_found.clear();
207
208 for mat in URL_IPV6_REGEX.find_iter(line) {
210 let url_str = mat.as_str();
211 buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
212 }
213
214 for mat in URL_STANDARD_REGEX.find_iter(line) {
216 let url_str = mat.as_str();
217
218 if url_str.contains("://[") {
220 continue;
221 }
222
223 if let Some(host_start) = url_str.find("://") {
226 let after_protocol = &url_str[host_start + 3..];
227 if after_protocol.contains("::") || after_protocol.chars().filter(|&c| c == ':').count() > 1 {
229 if line.as_bytes().get(mat.end()) == Some(&b']') {
231 continue;
233 }
234 }
235 }
236
237 buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
238 }
239
240 for mat in URL_WWW_REGEX.find_iter(line) {
242 let url_str = mat.as_str();
243 let start_pos = mat.start();
244 let end_pos = mat.end();
245
246 if start_pos > 0 {
248 let prev_char = line.as_bytes().get(start_pos - 1).copied();
249 if prev_char == Some(b'/') || prev_char == Some(b'@') {
250 continue;
251 }
252 }
253
254 if start_pos > 0 && end_pos < line.len() {
256 let prev_char = line.as_bytes().get(start_pos - 1).copied();
257 let next_char = line.as_bytes().get(end_pos).copied();
258 if prev_char == Some(b'<') && next_char == Some(b'>') {
259 continue;
260 }
261 }
262
263 buffers.urls_found.push((start_pos, end_pos, url_str.to_string()));
264 }
265
266 for mat in XMPP_URI_REGEX.find_iter(line) {
268 let uri_str = mat.as_str();
269 let start_pos = mat.start();
270 let end_pos = mat.end();
271
272 if start_pos > 0 && end_pos < line.len() {
274 let prev_char = line.as_bytes().get(start_pos - 1).copied();
275 let next_char = line.as_bytes().get(end_pos).copied();
276 if prev_char == Some(b'<') && next_char == Some(b'>') {
277 continue;
278 }
279 }
280
281 buffers.urls_found.push((start_pos, end_pos, uri_str.to_string()));
282 }
283
284 for &(start, _end, ref url_str) in &buffers.urls_found {
286 if CUSTOM_PROTOCOL_REGEX.is_match(url_str) {
288 continue;
289 }
290
291 let is_inside_construct = buffers
297 .markdown_link_ranges
298 .iter()
299 .any(|&(s, e)| start >= s && start < e)
300 || buffers.image_ranges.iter().any(|&(s, e)| start >= s && start < e);
301
302 if is_inside_construct {
303 continue;
304 }
305
306 let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
308 let absolute_pos = line_start_byte + start;
309
310 if ctx.is_in_html_tag(absolute_pos) {
312 continue;
313 }
314
315 if ctx.is_in_html_comment(absolute_pos) || ctx.is_in_mdx_comment(absolute_pos) {
317 continue;
318 }
319
320 if ctx.is_in_shortcode(absolute_pos) {
322 continue;
323 }
324
325 if ctx.flavor.is_pandoc_compatible()
329 && (ctx.is_in_line_block(absolute_pos) || ctx.is_in_pandoc_metadata(absolute_pos))
330 {
331 continue;
332 }
333
334 let trimmed_url = self.trim_trailing_punctuation(url_str);
336
337 if !trimmed_url.is_empty() && trimmed_url != "//" {
339 let trimmed_len = trimmed_url.len();
340 let (start_line, start_col, end_line, end_col) =
341 calculate_url_range(line_number, line, start, trimmed_len);
342
343 let replacement = if trimmed_url.starts_with("www.") {
345 format!("<https://{trimmed_url}>")
346 } else {
347 format!("<{trimmed_url}>")
348 };
349
350 warnings.push(LintWarning {
351 rule_name: Some("MD034".to_string()),
352 line: start_line,
353 column: start_col,
354 end_line,
355 end_column: end_col,
356 message: format!("URL without angle brackets or link formatting: '{trimmed_url}'"),
357 severity: Severity::Warning,
358 fix: Some(Fix::new(
359 {
360 let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
361 (line_start_byte + start)..(line_start_byte + start + trimmed_len)
362 },
363 replacement,
364 )),
365 });
366 }
367 }
368
369 for cap in EMAIL_PATTERN.captures_iter(line) {
371 if let Some(mat) = cap.get(0) {
372 let email = mat.as_str();
373 let start = mat.start();
374 let end = mat.end();
375
376 if start >= 5 && line.is_char_boundary(start - 5) && &line[start - 5..start] == "xmpp:" {
379 continue;
380 }
381
382 let mut is_inside_construct = false;
384 for &(link_start, link_end) in &buffers.markdown_link_ranges {
385 if start >= link_start && end <= link_end {
386 is_inside_construct = true;
387 break;
388 }
389 }
390
391 if !is_inside_construct {
392 let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
394 let absolute_pos = line_start_byte + start;
395
396 if ctx.is_in_html_tag(absolute_pos) {
398 continue;
399 }
400
401 if ctx.flavor.is_pandoc_compatible()
403 && (ctx.is_in_line_block(absolute_pos) || ctx.is_in_pandoc_metadata(absolute_pos))
404 {
405 continue;
406 }
407
408 let is_in_code_span = code_spans
410 .iter()
411 .any(|span| absolute_pos >= span.byte_offset && absolute_pos < span.byte_end);
412
413 if !is_in_code_span {
414 let email_len = end - start;
415 let (start_line, start_col, end_line, end_col) =
416 calculate_url_range(line_number, line, start, email_len);
417
418 warnings.push(LintWarning {
419 rule_name: Some("MD034".to_string()),
420 line: start_line,
421 column: start_col,
422 end_line,
423 end_column: end_col,
424 message: format!("Email address without angle brackets or link formatting: '{email}'"),
425 severity: Severity::Warning,
426 fix: Some(Fix::new(
427 (line_start_byte + start)..(line_start_byte + end),
428 format!("<{email}>"),
429 )),
430 });
431 }
432 }
433 }
434 }
435
436 warnings
437 }
438}
439
440impl Rule for MD034NoBareUrls {
441 #[inline]
442 fn name(&self) -> &'static str {
443 "MD034"
444 }
445
446 fn as_any(&self) -> &dyn std::any::Any {
447 self
448 }
449
450 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
451 where
452 Self: Sized,
453 {
454 Box::new(MD034NoBareUrls)
455 }
456
457 #[inline]
458 fn category(&self) -> RuleCategory {
459 RuleCategory::Link
460 }
461
462 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
463 !ctx.likely_has_links_or_images() && self.should_skip_content(ctx.content)
464 }
465
466 #[inline]
467 fn description(&self) -> &'static str {
468 "No bare URLs - wrap URLs in angle brackets"
469 }
470
471 fn check(&self, ctx: &LintContext) -> LintResult {
472 let mut warnings = Vec::new();
473 let content = ctx.content;
474
475 if self.should_skip_content(content) {
477 return Ok(warnings);
478 }
479
480 let line_index = &ctx.line_index;
482
483 let code_spans = ctx.code_spans();
485
486 let mut buffers = LineCheckBuffers::default();
488
489 for line in ctx
493 .filtered_lines()
494 .skip_front_matter()
495 .skip_code_blocks()
496 .skip_jsx_expressions()
497 .skip_mdx_comments()
498 .skip_obsidian_comments()
499 {
500 let mut line_warnings =
501 self.check_line(line.content, ctx, line.line_num, &code_spans, &mut buffers, line_index);
502
503 line_warnings.retain(|warning| {
505 !code_spans.iter().any(|span| {
506 if let Some(fix) = &warning.fix {
507 fix.range.start >= span.byte_offset && fix.range.start < span.byte_end
509 } else {
510 span.line == warning.line
511 && span.end_line == warning.line
512 && warning.column > 0
513 && (warning.column - 1) >= span.start_col
514 && (warning.column - 1) < span.end_col
515 }
516 })
517 });
518
519 line_warnings.retain(|warning| {
523 if let Some(fix) = &warning.fix {
524 !ctx.links
526 .iter()
527 .any(|link| fix.range.start >= link.byte_offset && fix.range.end <= link.byte_end)
528 } else {
529 true
530 }
531 });
532
533 line_warnings.retain(|warning| !ctx.is_position_in_obsidian_comment(warning.line, warning.column));
536
537 warnings.extend(line_warnings);
538 }
539
540 Ok(warnings)
541 }
542
543 fn fix(&self, ctx: &LintContext) -> Result<String, LintError> {
544 let mut content = ctx.content.to_string();
545 let warnings = self.check(ctx)?;
546 let mut warnings =
547 crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
548
549 warnings.sort_by_key(|w| w.fix.as_ref().map_or(0, |f| f.range.start));
551
552 for warning in warnings.iter().rev() {
554 if let Some(fix) = &warning.fix {
555 let start = fix.range.start;
556 let end = fix.range.end;
557 content.replace_range(start..end, &fix.replacement);
558 }
559 }
560
561 Ok(content)
562 }
563}
564
565#[cfg(test)]
566mod tests {
567 use super::*;
568
569 #[test]
570 fn test_shortcut_ref_at_end_of_line_no_trailing_chars() {
571 let rule = MD034NoBareUrls;
572 let content = "See [https://example.com]";
573 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
574 let result = rule.check(&ctx).unwrap();
575 assert!(
576 result.is_empty(),
577 "[URL] at end of line should be treated as shortcut ref: {result:?}"
578 );
579 }
580
581 #[test]
582 fn test_shortcut_ref_multiple_spaces_before_paren() {
583 let rule = MD034NoBareUrls;
584 let content = "[text] (https://example.com)";
585 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
586 let result = rule.check(&ctx).unwrap();
587 let _ = result; }
592
593 #[test]
594 fn test_shortcut_ref_tab_before_bracket() {
595 let rule = MD034NoBareUrls;
596 let content = "[https://example.com]\t[other]";
597 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
598 let result = rule.check(&ctx).unwrap();
599 assert_eq!(
603 result.len(),
604 1,
605 "Bare URL inside shortcut ref should be detected: {result:?}"
606 );
607 }
608
609 #[test]
610 fn test_shortcut_ref_followed_by_punctuation() {
611 let rule = MD034NoBareUrls;
612 let content = "[https://example.com], see also other things.";
613 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
614 let result = rule.check(&ctx).unwrap();
615 assert!(
616 result.is_empty(),
617 "[URL] followed by comma should be treated as shortcut ref: {result:?}"
618 );
619 }
620
621 #[test]
622 fn test_url_in_backticks_inside_mdx_component_not_flagged() {
623 let rule = MD034NoBareUrls;
627 let content = "# Test\n\nControl: `https://rumdl.example.com/` is fine here.\n\n<ParamField path=\"--stuff\">\n This URL `https://rumdl.example.com/` must not be flagged.\n</ParamField>\n";
628 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
629 let result = rule.check(&ctx).unwrap();
630 assert!(
631 result.is_empty(),
632 "URL in backticks inside MDX component must not be flagged: {result:?}"
633 );
634 }
635
636 #[test]
637 fn test_bare_url_inside_mdx_component_still_flagged() {
638 let rule = MD034NoBareUrls;
641 let content =
642 "# Test\n\n<ParamField path=\"--stuff\">\n Visit https://rumdl.example.com/ for details.\n</ParamField>\n";
643 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
644 let result = rule.check(&ctx).unwrap();
645 assert_eq!(
646 result.len(),
647 1,
648 "Bare URL in MDX component body must still be flagged: {result:?}"
649 );
650 }
651
652 #[test]
653 fn test_url_in_backticks_inside_nested_mdx_component_not_flagged() {
654 let rule = MD034NoBareUrls;
656 let content = "<Outer>\n <Inner>\n Check `https://example.com/` here.\n </Inner>\n</Outer>\n";
657 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
658 let result = rule.check(&ctx).unwrap();
659 assert!(
660 result.is_empty(),
661 "URL in backticks inside nested MDX component must not be flagged: {result:?}"
662 );
663 }
664
665 #[test]
667 fn test_pandoc_skips_urls_in_line_blocks() {
668 use crate::config::MarkdownFlavor;
669 use crate::lint_context::LintContext;
670 let rule = MD034NoBareUrls;
671 let content = "| See https://example.com\n| For details\n";
672 let ctx = LintContext::new(content, MarkdownFlavor::Pandoc, None);
673 let result = rule.check(&ctx).unwrap();
674 assert!(
675 result.is_empty(),
676 "MD034 should skip URLs in Pandoc line blocks: {result:?}"
677 );
678 }
679
680 #[test]
682 fn test_pandoc_skips_urls_in_metadata() {
683 use crate::config::MarkdownFlavor;
684 use crate::lint_context::LintContext;
685 let rule = MD034NoBareUrls;
686 let content = "---\nhomepage: https://example.com\n---\n\nBody.\n";
687 let ctx = LintContext::new(content, MarkdownFlavor::Pandoc, None);
688 let result = rule.check(&ctx).unwrap();
689 assert!(
690 result.is_empty(),
691 "MD034 should skip URLs in Pandoc YAML metadata: {result:?}"
692 );
693 }
694
695 #[test]
698 fn test_standard_still_flags_urls_in_pipe_prefixed_lines() {
699 use crate::config::MarkdownFlavor;
700 use crate::lint_context::LintContext;
701 let rule = MD034NoBareUrls;
702 let content = "| See https://example.com\n";
703 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
704 let result = rule.check(&ctx).unwrap();
705 assert!(
706 !result.is_empty(),
707 "MD034 should still flag URLs in pipe-prefixed lines under Standard flavor"
708 );
709 }
710
711 #[test]
712 fn test_url_in_backticks_after_fenced_code_block_inside_mdx_not_flagged() {
713 let rule = MD034NoBareUrls;
717 let content = "\
718<Component>
719Some intro text.
720
721```
722example code here
723```
724
725Check `https://example.com/` here.
726</Component>
727";
728 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
729 let result = rule.check(&ctx).unwrap();
730 assert!(
731 result.is_empty(),
732 "URL in backticks after a fenced code block inside MDX must not be flagged: {result:?}"
733 );
734 }
735}