1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::utils::range_utils::{LineIndex, calculate_url_range};
6use crate::utils::regex_cache::{
7 EMAIL_PATTERN, URL_IPV6_STR, URL_QUICK_CHECK_STR, URL_STANDARD_STR, URL_WWW_STR, XMPP_URI_STR,
8 get_cached_fancy_regex, get_cached_regex,
9};
10
11use crate::filtered_lines::FilteredLinesExt;
12use crate::lint_context::LintContext;
13
14const CUSTOM_PROTOCOL_PATTERN_STR: &str = r#"(?:grpc|ws|wss|ssh|git|svn|file|data|javascript|vscode|chrome|about|slack|discord|matrix|irc|redis|mongodb|postgresql|mysql|kafka|nats|amqp|mqtt|custom|app|api|service)://"#;
17const MARKDOWN_LINK_PATTERN_STR: &str = r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#;
18const MARKDOWN_EMPTY_LINK_PATTERN_STR: &str = r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(\)"#;
19const MARKDOWN_EMPTY_REF_PATTERN_STR: &str = r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\[\]"#;
20const ANGLE_LINK_PATTERN_STR: &str =
22 r#"<((?:https?|ftps?)://(?:\[[0-9a-fA-F:]+(?:%[a-zA-Z0-9]+)?\]|[^>]+)|xmpp:[^>]+|[^@\s]+@[^@\s]+\.[^@\s>]+)>"#;
23const BADGE_LINK_LINE_STR: &str = r#"^\s*\[!\[[^\]]*\]\([^)]*\)\]\([^)]*\)\s*$"#;
24const MARKDOWN_IMAGE_PATTERN_STR: &str = r#"!\s*\[([^\]]*)\]\s*\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#;
25const REFERENCE_DEF_RE_STR: &str = r"^\s*\[[^\]]+\]:\s*(?:<|(?:https?|ftps?)://)";
27const MULTILINE_LINK_CONTINUATION_STR: &str = r#"^[^\[]*\]\(.*\)"#;
28const SHORTCUT_REF_PATTERN_STR: &str = r#"\[([^\[\]]+)\](?!\s*[\[(])"#;
30
31#[derive(Default)]
33struct LineCheckBuffers {
34 markdown_link_ranges: Vec<(usize, usize)>,
35 image_ranges: Vec<(usize, usize)>,
36 urls_found: Vec<(usize, usize, String)>,
37}
38
39#[derive(Default, Clone)]
40pub struct MD034NoBareUrls;
41
42impl MD034NoBareUrls {
43 #[inline]
44 pub fn should_skip_content(&self, content: &str) -> bool {
45 let bytes = content.as_bytes();
48 let has_colon = bytes.contains(&b':');
49 let has_at = bytes.contains(&b'@');
50 let has_www = content.contains("www.");
51 !has_colon && !has_at && !has_www
52 }
53
54 fn trim_trailing_punctuation<'a>(&self, url: &'a str) -> &'a str {
56 let mut trimmed = url;
57
58 let open_parens = url.chars().filter(|&c| c == '(').count();
60 let close_parens = url.chars().filter(|&c| c == ')').count();
61
62 if close_parens > open_parens {
63 let mut balance = 0;
65 let mut last_balanced_pos = url.len();
66
67 for (byte_idx, c) in url.char_indices() {
68 if c == '(' {
69 balance += 1;
70 } else if c == ')' {
71 balance -= 1;
72 if balance < 0 {
73 last_balanced_pos = byte_idx;
75 break;
76 }
77 }
78 }
79
80 trimmed = &trimmed[..last_balanced_pos];
81 }
82
83 while let Some(last_char) = trimmed.chars().last() {
85 if matches!(last_char, '.' | ',' | ';' | ':' | '!' | '?') {
86 if last_char == ':' && trimmed.len() > 1 {
89 break;
91 }
92 trimmed = &trimmed[..trimmed.len() - 1];
93 } else {
94 break;
95 }
96 }
97
98 trimmed
99 }
100
101 fn is_reference_definition(&self, line: &str) -> bool {
103 get_cached_regex(REFERENCE_DEF_RE_STR)
104 .map(|re| re.is_match(line))
105 .unwrap_or(false)
106 }
107
108 fn check_line(
109 &self,
110 line: &str,
111 ctx: &LintContext,
112 line_number: usize,
113 code_spans: &[crate::lint_context::CodeSpan],
114 buffers: &mut LineCheckBuffers,
115 line_index: &LineIndex,
116 ) -> Vec<LintWarning> {
117 let mut warnings = Vec::new();
118
119 if self.is_reference_definition(line) {
121 return warnings;
122 }
123
124 if ctx.line_info(line_number).is_some_and(|info| info.in_html_block) {
126 return warnings;
127 }
128
129 if let Ok(re) = get_cached_regex(MULTILINE_LINK_CONTINUATION_STR)
132 && re.is_match(line)
133 {
134 return warnings;
135 }
136
137 let has_quick_check = get_cached_regex(URL_QUICK_CHECK_STR)
139 .map(|re| re.is_match(line))
140 .unwrap_or(false);
141 let has_www = line.contains("www.");
142 let has_at = line.contains('@');
143
144 if !has_quick_check && !has_at && !has_www {
145 return warnings;
146 }
147
148 buffers.markdown_link_ranges.clear();
150 if let Ok(re) = get_cached_regex(MARKDOWN_LINK_PATTERN_STR) {
151 for cap in re.captures_iter(line) {
152 if let Some(mat) = cap.get(0) {
153 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
154 }
155 }
156 }
157
158 if let Ok(re) = get_cached_regex(MARKDOWN_EMPTY_LINK_PATTERN_STR) {
160 for mat in re.find_iter(line) {
161 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
162 }
163 }
164
165 if let Ok(re) = get_cached_regex(MARKDOWN_EMPTY_REF_PATTERN_STR) {
166 for mat in re.find_iter(line) {
167 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
168 }
169 }
170
171 if let Ok(re) = get_cached_fancy_regex(SHORTCUT_REF_PATTERN_STR) {
175 for mat in re.find_iter(line).flatten() {
176 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
177 }
178 }
179
180 if let Ok(re) = get_cached_regex(ANGLE_LINK_PATTERN_STR) {
181 for cap in re.captures_iter(line) {
182 if let Some(mat) = cap.get(0) {
183 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
184 }
185 }
186 }
187
188 buffers.image_ranges.clear();
190 if let Ok(re) = get_cached_regex(MARKDOWN_IMAGE_PATTERN_STR) {
191 for cap in re.captures_iter(line) {
192 if let Some(mat) = cap.get(0) {
193 buffers.image_ranges.push((mat.start(), mat.end()));
194 }
195 }
196 }
197
198 let is_badge_line = get_cached_regex(BADGE_LINK_LINE_STR)
200 .map(|re| re.is_match(line))
201 .unwrap_or(false);
202
203 if is_badge_line {
204 return warnings;
205 }
206
207 buffers.urls_found.clear();
209
210 if let Ok(re) = get_cached_regex(URL_IPV6_STR) {
212 for mat in re.find_iter(line) {
213 let url_str = mat.as_str();
214 buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
215 }
216 }
217
218 if let Ok(re) = get_cached_regex(URL_STANDARD_STR) {
220 for mat in re.find_iter(line) {
221 let url_str = mat.as_str();
222
223 if url_str.contains("://[") {
225 continue;
226 }
227
228 if let Some(host_start) = url_str.find("://") {
231 let after_protocol = &url_str[host_start + 3..];
232 if after_protocol.contains("::") || after_protocol.chars().filter(|&c| c == ':').count() > 1 {
234 if let Some(char_after) = line.chars().nth(mat.end())
236 && char_after == ']'
237 {
238 continue;
240 }
241 }
242 }
243
244 buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
245 }
246 }
247
248 if let Ok(re) = get_cached_regex(URL_WWW_STR) {
250 for mat in re.find_iter(line) {
251 let url_str = mat.as_str();
252 let start_pos = mat.start();
253 let end_pos = mat.end();
254
255 if start_pos > 0 {
257 let prev_char = line.as_bytes().get(start_pos - 1).copied();
258 if prev_char == Some(b'/') || prev_char == Some(b'@') {
259 continue;
260 }
261 }
262
263 if start_pos > 0 && end_pos < line.len() {
265 let prev_char = line.as_bytes().get(start_pos - 1).copied();
266 let next_char = line.as_bytes().get(end_pos).copied();
267 if prev_char == Some(b'<') && next_char == Some(b'>') {
268 continue;
269 }
270 }
271
272 buffers.urls_found.push((start_pos, end_pos, url_str.to_string()));
273 }
274 }
275
276 if let Ok(re) = get_cached_regex(XMPP_URI_STR) {
278 for mat in re.find_iter(line) {
279 let uri_str = mat.as_str();
280 let start_pos = mat.start();
281 let end_pos = mat.end();
282
283 if start_pos > 0 && end_pos < line.len() {
285 let prev_char = line.as_bytes().get(start_pos - 1).copied();
286 let next_char = line.as_bytes().get(end_pos).copied();
287 if prev_char == Some(b'<') && next_char == Some(b'>') {
288 continue;
289 }
290 }
291
292 buffers.urls_found.push((start_pos, end_pos, uri_str.to_string()));
293 }
294 }
295
296 for &(start, _end, ref url_str) in buffers.urls_found.iter() {
298 if get_cached_regex(CUSTOM_PROTOCOL_PATTERN_STR)
300 .map(|re| re.is_match(url_str))
301 .unwrap_or(false)
302 {
303 continue;
304 }
305
306 let mut is_inside_construct = false;
311 for &(link_start, link_end) in buffers.markdown_link_ranges.iter() {
312 if start >= link_start && start < link_end {
313 is_inside_construct = true;
314 break;
315 }
316 }
317
318 for &(img_start, img_end) in buffers.image_ranges.iter() {
319 if start >= img_start && start < img_end {
320 is_inside_construct = true;
321 break;
322 }
323 }
324
325 if is_inside_construct {
326 continue;
327 }
328
329 let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
331 let absolute_pos = line_start_byte + start;
332
333 if ctx.is_in_html_tag(absolute_pos) {
335 continue;
336 }
337
338 if ctx.is_in_html_comment(absolute_pos) {
340 continue;
341 }
342
343 if ctx.is_in_shortcode(absolute_pos) {
345 continue;
346 }
347
348 let trimmed_url = self.trim_trailing_punctuation(url_str);
350
351 if !trimmed_url.is_empty() && trimmed_url != "//" {
353 let trimmed_len = trimmed_url.len();
354 let (start_line, start_col, end_line, end_col) =
355 calculate_url_range(line_number, line, start, trimmed_len);
356
357 let replacement = if trimmed_url.starts_with("www.") {
359 format!("<https://{trimmed_url}>")
360 } else {
361 format!("<{trimmed_url}>")
362 };
363
364 warnings.push(LintWarning {
365 rule_name: Some("MD034".to_string()),
366 line: start_line,
367 column: start_col,
368 end_line,
369 end_column: end_col,
370 message: format!("URL without angle brackets or link formatting: '{trimmed_url}'"),
371 severity: Severity::Warning,
372 fix: Some(Fix {
373 range: {
374 let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
375 (line_start_byte + start)..(line_start_byte + start + trimmed_len)
376 },
377 replacement,
378 }),
379 });
380 }
381 }
382
383 for cap in EMAIL_PATTERN.captures_iter(line) {
385 if let Some(mat) = cap.get(0) {
386 let email = mat.as_str();
387 let start = mat.start();
388 let end = mat.end();
389
390 if start >= 5 && line.is_char_boundary(start - 5) && &line[start - 5..start] == "xmpp:" {
393 continue;
394 }
395
396 let mut is_inside_construct = false;
398 for &(link_start, link_end) in buffers.markdown_link_ranges.iter() {
399 if start >= link_start && end <= link_end {
400 is_inside_construct = true;
401 break;
402 }
403 }
404
405 if !is_inside_construct {
406 let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
408 let absolute_pos = line_start_byte + start;
409
410 if ctx.is_in_html_tag(absolute_pos) {
412 continue;
413 }
414
415 let is_in_code_span = code_spans
417 .iter()
418 .any(|span| span.line == line_number && start >= span.start_col && start < span.end_col);
419
420 if !is_in_code_span {
421 let email_len = end - start;
422 let (start_line, start_col, end_line, end_col) =
423 calculate_url_range(line_number, line, start, email_len);
424
425 warnings.push(LintWarning {
426 rule_name: Some("MD034".to_string()),
427 line: start_line,
428 column: start_col,
429 end_line,
430 end_column: end_col,
431 message: format!("Email address without angle brackets or link formatting: '{email}'"),
432 severity: Severity::Warning,
433 fix: Some(Fix {
434 range: (line_start_byte + start)..(line_start_byte + end),
435 replacement: format!("<{email}>"),
436 }),
437 });
438 }
439 }
440 }
441 }
442
443 warnings
444 }
445}
446
447impl Rule for MD034NoBareUrls {
448 #[inline]
449 fn name(&self) -> &'static str {
450 "MD034"
451 }
452
453 fn as_any(&self) -> &dyn std::any::Any {
454 self
455 }
456
457 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
458 where
459 Self: Sized,
460 {
461 Box::new(MD034NoBareUrls)
462 }
463
464 #[inline]
465 fn category(&self) -> RuleCategory {
466 RuleCategory::Link
467 }
468
469 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
470 !ctx.likely_has_links_or_images() && self.should_skip_content(ctx.content)
471 }
472
473 #[inline]
474 fn description(&self) -> &'static str {
475 "No bare URLs - wrap URLs in angle brackets"
476 }
477
478 fn check(&self, ctx: &LintContext) -> LintResult {
479 let mut warnings = Vec::new();
480 let content = ctx.content;
481
482 if self.should_skip_content(content) {
484 return Ok(warnings);
485 }
486
487 let line_index = &ctx.line_index;
489
490 let code_spans = ctx.code_spans();
492
493 let mut buffers = LineCheckBuffers::default();
495
496 for line in ctx.filtered_lines().skip_front_matter().skip_code_blocks() {
499 let mut line_warnings =
500 self.check_line(line.content, ctx, line.line_num, &code_spans, &mut buffers, line_index);
501
502 line_warnings.retain(|warning| {
504 !code_spans.iter().any(|span| {
506 span.line == warning.line &&
507 warning.column > 0 && (warning.column - 1) >= span.start_col &&
509 (warning.column - 1) < span.end_col
510 })
511 });
512
513 line_warnings.retain(|warning| {
517 if let Some(fix) = &warning.fix {
518 !ctx.links
520 .iter()
521 .any(|link| fix.range.start >= link.byte_offset && fix.range.end <= link.byte_end)
522 } else {
523 true
524 }
525 });
526
527 warnings.extend(line_warnings);
528 }
529
530 Ok(warnings)
531 }
532
533 fn fix(&self, ctx: &LintContext) -> Result<String, LintError> {
534 let mut content = ctx.content.to_string();
535 let mut warnings = self.check(ctx)?;
536
537 warnings.sort_by_key(|w| w.fix.as_ref().map(|f| f.range.start).unwrap_or(0));
539
540 for warning in warnings.iter().rev() {
542 if let Some(fix) = &warning.fix {
543 let start = fix.range.start;
544 let end = fix.range.end;
545 content.replace_range(start..end, &fix.replacement);
546 }
547 }
548
549 Ok(content)
550 }
551}