1use std::sync::LazyLock;
5
6use regex::Regex;
7
8use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
9use crate::utils::range_utils::{LineIndex, calculate_url_range};
10use crate::utils::regex_cache::{
11 EMAIL_PATTERN, URL_IPV6_REGEX, URL_QUICK_CHECK_REGEX, URL_STANDARD_REGEX, URL_WWW_REGEX, XMPP_URI_REGEX,
12};
13
14use crate::filtered_lines::FilteredLinesExt;
15use crate::lint_context::LintContext;
16
17static CUSTOM_PROTOCOL_REGEX: LazyLock<Regex> = LazyLock::new(|| {
19 Regex::new(r#"(?:grpc|ws|wss|ssh|git|svn|file|data|javascript|vscode|chrome|about|slack|discord|matrix|irc|redis|mongodb|postgresql|mysql|kafka|nats|amqp|mqtt|custom|app|api|service)://"#).unwrap()
20});
21static MARKDOWN_LINK_REGEX: LazyLock<Regex> = LazyLock::new(|| {
22 Regex::new(r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#).unwrap()
23});
24static MARKDOWN_EMPTY_LINK_REGEX: LazyLock<Regex> =
25 LazyLock::new(|| Regex::new(r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(\)"#).unwrap());
26static MARKDOWN_EMPTY_REF_REGEX: LazyLock<Regex> =
27 LazyLock::new(|| Regex::new(r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\[\]"#).unwrap());
28static ANGLE_LINK_REGEX: LazyLock<Regex> = LazyLock::new(|| {
29 Regex::new(
30 r#"<((?:https?|ftps?)://(?:\[[0-9a-fA-F:]+(?:%[a-zA-Z0-9]+)?\]|[^>]+)|xmpp:[^>]+|[^@\s]+@[^@\s]+\.[^@\s>]+)>"#,
31 )
32 .unwrap()
33});
34static BADGE_LINK_LINE_REGEX: LazyLock<Regex> =
35 LazyLock::new(|| Regex::new(r#"^\s*\[!\[[^\]]*\]\([^)]*\)\]\([^)]*\)\s*$"#).unwrap());
36static MARKDOWN_IMAGE_REGEX: LazyLock<Regex> =
37 LazyLock::new(|| Regex::new(r#"!\s*\[([^\]]*)\]\s*\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#).unwrap());
38static REFERENCE_DEF_REGEX: LazyLock<Regex> =
39 LazyLock::new(|| Regex::new(r"^\s*\[[^\]]+\]:\s*(?:<|(?:https?|ftps?)://)").unwrap());
40static MULTILINE_LINK_CONTINUATION_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"^[^\[]*\]\(.*\)"#).unwrap());
41static SHORTCUT_REF_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"\[([^\[\]]+)\]"#).unwrap());
42
43#[derive(Default)]
45struct LineCheckBuffers {
46 markdown_link_ranges: Vec<(usize, usize)>,
47 image_ranges: Vec<(usize, usize)>,
48 urls_found: Vec<(usize, usize, String)>,
49}
50
51#[derive(Default, Clone)]
52pub struct MD034NoBareUrls;
53
54impl MD034NoBareUrls {
55 #[inline]
56 pub fn should_skip_content(&self, content: &str) -> bool {
57 let bytes = content.as_bytes();
60 let has_colon = bytes.contains(&b':');
61 let has_at = bytes.contains(&b'@');
62 let has_www = content.contains("www.");
63 !has_colon && !has_at && !has_www
64 }
65
66 fn trim_trailing_punctuation<'a>(&self, url: &'a str) -> &'a str {
68 let mut trimmed = url;
69
70 let open_parens = url.chars().filter(|&c| c == '(').count();
72 let close_parens = url.chars().filter(|&c| c == ')').count();
73
74 if close_parens > open_parens {
75 let mut balance = 0;
77 let mut last_balanced_pos = url.len();
78
79 for (byte_idx, c) in url.char_indices() {
80 if c == '(' {
81 balance += 1;
82 } else if c == ')' {
83 balance -= 1;
84 if balance < 0 {
85 last_balanced_pos = byte_idx;
87 break;
88 }
89 }
90 }
91
92 trimmed = &trimmed[..last_balanced_pos];
93 }
94
95 while let Some(last_char) = trimmed.chars().last() {
97 if matches!(last_char, '.' | ',' | ';' | ':' | '!' | '?') {
98 if last_char == ':' && trimmed.len() > 1 {
101 break;
103 }
104 trimmed = &trimmed[..trimmed.len() - 1];
105 } else {
106 break;
107 }
108 }
109
110 trimmed
111 }
112
113 fn is_reference_definition(&self, line: &str) -> bool {
115 REFERENCE_DEF_REGEX.is_match(line)
116 }
117
118 fn check_line(
119 &self,
120 line: &str,
121 ctx: &LintContext,
122 line_number: usize,
123 code_spans: &[crate::lint_context::CodeSpan],
124 buffers: &mut LineCheckBuffers,
125 line_index: &LineIndex,
126 ) -> Vec<LintWarning> {
127 let mut warnings = Vec::new();
128
129 if self.is_reference_definition(line) {
131 return warnings;
132 }
133
134 if ctx.line_info(line_number).is_some_and(|info| info.in_html_block) {
136 return warnings;
137 }
138
139 if MULTILINE_LINK_CONTINUATION_REGEX.is_match(line) {
142 return warnings;
143 }
144
145 let has_quick_check = URL_QUICK_CHECK_REGEX.is_match(line);
147 let has_www = line.contains("www.");
148 let has_at = line.contains('@');
149
150 if !has_quick_check && !has_at && !has_www {
151 return warnings;
152 }
153
154 buffers.markdown_link_ranges.clear();
156 buffers.image_ranges.clear();
157
158 let has_bracket = line.contains('[');
159 let has_angle = line.contains('<');
160 let has_bang = line.contains('!');
161
162 if has_bracket {
163 for mat in MARKDOWN_LINK_REGEX.find_iter(line) {
164 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
165 }
166
167 for mat in MARKDOWN_EMPTY_LINK_REGEX.find_iter(line) {
169 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
170 }
171
172 for mat in MARKDOWN_EMPTY_REF_REGEX.find_iter(line) {
173 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
174 }
175
176 for mat in SHORTCUT_REF_REGEX.find_iter(line) {
178 let end = mat.end();
179 let next_non_ws = line[end..].bytes().find(|b| !b.is_ascii_whitespace());
180 if next_non_ws == Some(b'(') || next_non_ws == Some(b'[') {
181 continue;
182 }
183 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
184 }
185
186 if has_bang && BADGE_LINK_LINE_REGEX.is_match(line) {
188 return warnings;
189 }
190 }
191
192 if has_angle {
193 for mat in ANGLE_LINK_REGEX.find_iter(line) {
194 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
195 }
196 }
197
198 if has_bang && has_bracket {
200 for mat in MARKDOWN_IMAGE_REGEX.find_iter(line) {
201 buffers.image_ranges.push((mat.start(), mat.end()));
202 }
203 }
204
205 buffers.urls_found.clear();
207
208 for mat in URL_IPV6_REGEX.find_iter(line) {
210 let url_str = mat.as_str();
211 buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
212 }
213
214 for mat in URL_STANDARD_REGEX.find_iter(line) {
216 let url_str = mat.as_str();
217
218 if url_str.contains("://[") {
220 continue;
221 }
222
223 if let Some(host_start) = url_str.find("://") {
226 let after_protocol = &url_str[host_start + 3..];
227 if after_protocol.contains("::") || after_protocol.chars().filter(|&c| c == ':').count() > 1 {
229 if line.as_bytes().get(mat.end()) == Some(&b']') {
231 continue;
233 }
234 }
235 }
236
237 buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
238 }
239
240 for mat in URL_WWW_REGEX.find_iter(line) {
242 let url_str = mat.as_str();
243 let start_pos = mat.start();
244 let end_pos = mat.end();
245
246 if start_pos > 0 {
248 let prev_char = line.as_bytes().get(start_pos - 1).copied();
249 if prev_char == Some(b'/') || prev_char == Some(b'@') {
250 continue;
251 }
252 }
253
254 if start_pos > 0 && end_pos < line.len() {
256 let prev_char = line.as_bytes().get(start_pos - 1).copied();
257 let next_char = line.as_bytes().get(end_pos).copied();
258 if prev_char == Some(b'<') && next_char == Some(b'>') {
259 continue;
260 }
261 }
262
263 buffers.urls_found.push((start_pos, end_pos, url_str.to_string()));
264 }
265
266 for mat in XMPP_URI_REGEX.find_iter(line) {
268 let uri_str = mat.as_str();
269 let start_pos = mat.start();
270 let end_pos = mat.end();
271
272 if start_pos > 0 && end_pos < line.len() {
274 let prev_char = line.as_bytes().get(start_pos - 1).copied();
275 let next_char = line.as_bytes().get(end_pos).copied();
276 if prev_char == Some(b'<') && next_char == Some(b'>') {
277 continue;
278 }
279 }
280
281 buffers.urls_found.push((start_pos, end_pos, uri_str.to_string()));
282 }
283
284 for &(start, _end, ref url_str) in buffers.urls_found.iter() {
286 if CUSTOM_PROTOCOL_REGEX.is_match(url_str) {
288 continue;
289 }
290
291 let is_inside_construct = buffers
297 .markdown_link_ranges
298 .iter()
299 .any(|&(s, e)| start >= s && start < e)
300 || buffers.image_ranges.iter().any(|&(s, e)| start >= s && start < e);
301
302 if is_inside_construct {
303 continue;
304 }
305
306 let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
308 let absolute_pos = line_start_byte + start;
309
310 if ctx.is_in_html_tag(absolute_pos) {
312 continue;
313 }
314
315 if ctx.is_in_html_comment(absolute_pos) || ctx.is_in_mdx_comment(absolute_pos) {
317 continue;
318 }
319
320 if ctx.is_in_shortcode(absolute_pos) {
322 continue;
323 }
324
325 let trimmed_url = self.trim_trailing_punctuation(url_str);
327
328 if !trimmed_url.is_empty() && trimmed_url != "//" {
330 let trimmed_len = trimmed_url.len();
331 let (start_line, start_col, end_line, end_col) =
332 calculate_url_range(line_number, line, start, trimmed_len);
333
334 let replacement = if trimmed_url.starts_with("www.") {
336 format!("<https://{trimmed_url}>")
337 } else {
338 format!("<{trimmed_url}>")
339 };
340
341 warnings.push(LintWarning {
342 rule_name: Some("MD034".to_string()),
343 line: start_line,
344 column: start_col,
345 end_line,
346 end_column: end_col,
347 message: format!("URL without angle brackets or link formatting: '{trimmed_url}'"),
348 severity: Severity::Warning,
349 fix: Some(Fix {
350 range: {
351 let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
352 (line_start_byte + start)..(line_start_byte + start + trimmed_len)
353 },
354 replacement,
355 }),
356 });
357 }
358 }
359
360 for cap in EMAIL_PATTERN.captures_iter(line) {
362 if let Some(mat) = cap.get(0) {
363 let email = mat.as_str();
364 let start = mat.start();
365 let end = mat.end();
366
367 if start >= 5 && line.is_char_boundary(start - 5) && &line[start - 5..start] == "xmpp:" {
370 continue;
371 }
372
373 let mut is_inside_construct = false;
375 for &(link_start, link_end) in buffers.markdown_link_ranges.iter() {
376 if start >= link_start && end <= link_end {
377 is_inside_construct = true;
378 break;
379 }
380 }
381
382 if !is_inside_construct {
383 let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
385 let absolute_pos = line_start_byte + start;
386
387 if ctx.is_in_html_tag(absolute_pos) {
389 continue;
390 }
391
392 let is_in_code_span = code_spans
394 .iter()
395 .any(|span| absolute_pos >= span.byte_offset && absolute_pos < span.byte_end);
396
397 if !is_in_code_span {
398 let email_len = end - start;
399 let (start_line, start_col, end_line, end_col) =
400 calculate_url_range(line_number, line, start, email_len);
401
402 warnings.push(LintWarning {
403 rule_name: Some("MD034".to_string()),
404 line: start_line,
405 column: start_col,
406 end_line,
407 end_column: end_col,
408 message: format!("Email address without angle brackets or link formatting: '{email}'"),
409 severity: Severity::Warning,
410 fix: Some(Fix {
411 range: (line_start_byte + start)..(line_start_byte + end),
412 replacement: format!("<{email}>"),
413 }),
414 });
415 }
416 }
417 }
418 }
419
420 warnings
421 }
422}
423
424impl Rule for MD034NoBareUrls {
425 #[inline]
426 fn name(&self) -> &'static str {
427 "MD034"
428 }
429
430 fn as_any(&self) -> &dyn std::any::Any {
431 self
432 }
433
434 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
435 where
436 Self: Sized,
437 {
438 Box::new(MD034NoBareUrls)
439 }
440
441 #[inline]
442 fn category(&self) -> RuleCategory {
443 RuleCategory::Link
444 }
445
446 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
447 !ctx.likely_has_links_or_images() && self.should_skip_content(ctx.content)
448 }
449
450 #[inline]
451 fn description(&self) -> &'static str {
452 "No bare URLs - wrap URLs in angle brackets"
453 }
454
455 fn check(&self, ctx: &LintContext) -> LintResult {
456 let mut warnings = Vec::new();
457 let content = ctx.content;
458
459 if self.should_skip_content(content) {
461 return Ok(warnings);
462 }
463
464 let line_index = &ctx.line_index;
466
467 let code_spans = ctx.code_spans();
469
470 let mut buffers = LineCheckBuffers::default();
472
473 for line in ctx
477 .filtered_lines()
478 .skip_front_matter()
479 .skip_code_blocks()
480 .skip_jsx_expressions()
481 .skip_mdx_comments()
482 .skip_obsidian_comments()
483 {
484 let mut line_warnings =
485 self.check_line(line.content, ctx, line.line_num, &code_spans, &mut buffers, line_index);
486
487 line_warnings.retain(|warning| {
489 !code_spans.iter().any(|span| {
490 if let Some(fix) = &warning.fix {
491 fix.range.start >= span.byte_offset && fix.range.start < span.byte_end
493 } else {
494 span.line == warning.line
495 && span.end_line == warning.line
496 && warning.column > 0
497 && (warning.column - 1) >= span.start_col
498 && (warning.column - 1) < span.end_col
499 }
500 })
501 });
502
503 line_warnings.retain(|warning| {
507 if let Some(fix) = &warning.fix {
508 !ctx.links
510 .iter()
511 .any(|link| fix.range.start >= link.byte_offset && fix.range.end <= link.byte_end)
512 } else {
513 true
514 }
515 });
516
517 line_warnings.retain(|warning| !ctx.is_position_in_obsidian_comment(warning.line, warning.column));
520
521 warnings.extend(line_warnings);
522 }
523
524 Ok(warnings)
525 }
526
527 fn fix(&self, ctx: &LintContext) -> Result<String, LintError> {
528 let mut content = ctx.content.to_string();
529 let warnings = self.check(ctx)?;
530 let mut warnings =
531 crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
532
533 warnings.sort_by_key(|w| w.fix.as_ref().map(|f| f.range.start).unwrap_or(0));
535
536 for warning in warnings.iter().rev() {
538 if let Some(fix) = &warning.fix {
539 let start = fix.range.start;
540 let end = fix.range.end;
541 content.replace_range(start..end, &fix.replacement);
542 }
543 }
544
545 Ok(content)
546 }
547}
548
549#[cfg(test)]
550mod tests {
551 use super::*;
552
553 #[test]
554 fn test_shortcut_ref_at_end_of_line_no_trailing_chars() {
555 let rule = MD034NoBareUrls;
556 let content = "See [https://example.com]";
557 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
558 let result = rule.check(&ctx).unwrap();
559 assert!(
560 result.is_empty(),
561 "[URL] at end of line should be treated as shortcut ref: {result:?}"
562 );
563 }
564
565 #[test]
566 fn test_shortcut_ref_multiple_spaces_before_paren() {
567 let rule = MD034NoBareUrls;
568 let content = "[text] (https://example.com)";
569 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
570 let result = rule.check(&ctx).unwrap();
571 let _ = result; }
576
577 #[test]
578 fn test_shortcut_ref_tab_before_bracket() {
579 let rule = MD034NoBareUrls;
580 let content = "[https://example.com]\t[other]";
581 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
582 let result = rule.check(&ctx).unwrap();
583 assert_eq!(
587 result.len(),
588 1,
589 "Bare URL inside shortcut ref should be detected: {result:?}"
590 );
591 }
592
593 #[test]
594 fn test_shortcut_ref_followed_by_punctuation() {
595 let rule = MD034NoBareUrls;
596 let content = "[https://example.com], see also other things.";
597 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
598 let result = rule.check(&ctx).unwrap();
599 assert!(
600 result.is_empty(),
601 "[URL] followed by comma should be treated as shortcut ref: {result:?}"
602 );
603 }
604}