1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::utils::range_utils::{LineIndex, calculate_url_range};
6use crate::utils::regex_cache::{
7 EMAIL_PATTERN, URL_IPV6_STR, URL_QUICK_CHECK_STR, URL_STANDARD_STR, URL_WWW_STR, get_cached_fancy_regex,
8 get_cached_regex,
9};
10
11use crate::filtered_lines::FilteredLinesExt;
12use crate::lint_context::LintContext;
13
14const CUSTOM_PROTOCOL_PATTERN_STR: &str = r#"(?:grpc|ws|wss|ssh|git|svn|file|data|javascript|vscode|chrome|about|slack|discord|matrix|irc|redis|mongodb|postgresql|mysql|kafka|nats|amqp|mqtt|custom|app|api|service)://"#;
17const MARKDOWN_LINK_PATTERN_STR: &str = r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#;
18const MARKDOWN_EMPTY_LINK_PATTERN_STR: &str = r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(\)"#;
19const MARKDOWN_EMPTY_REF_PATTERN_STR: &str = r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\[\]"#;
20const ANGLE_LINK_PATTERN_STR: &str =
21 r#"<((?:https?|ftps?)://(?:\[[0-9a-fA-F:]+(?:%[a-zA-Z0-9]+)?\]|[^>]+)|[^@\s]+@[^@\s]+\.[^@\s>]+)>"#;
22const BADGE_LINK_LINE_STR: &str = r#"^\s*\[!\[[^\]]*\]\([^)]*\)\]\([^)]*\)\s*$"#;
23const MARKDOWN_IMAGE_PATTERN_STR: &str = r#"!\s*\[([^\]]*)\]\s*\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#;
24const REFERENCE_DEF_RE_STR: &str = r"^\s*\[[^\]]+\]:\s*(?:<|(?:https?|ftps?)://)";
26const MULTILINE_LINK_CONTINUATION_STR: &str = r#"^[^\[]*\]\(.*\)"#;
27const SHORTCUT_REF_PATTERN_STR: &str = r#"\[([^\[\]]+)\](?!\s*[\[(])"#;
29
30#[derive(Default)]
32struct LineCheckBuffers {
33 markdown_link_ranges: Vec<(usize, usize)>,
34 image_ranges: Vec<(usize, usize)>,
35 urls_found: Vec<(usize, usize, String)>,
36}
37
38#[derive(Default, Clone)]
39pub struct MD034NoBareUrls;
40
41impl MD034NoBareUrls {
42 #[inline]
43 pub fn should_skip_content(&self, content: &str) -> bool {
44 let bytes = content.as_bytes();
47 let has_colon = bytes.contains(&b':');
48 let has_at = bytes.contains(&b'@');
49 let has_www = content.contains("www.");
50 !has_colon && !has_at && !has_www
51 }
52
53 fn trim_trailing_punctuation<'a>(&self, url: &'a str) -> &'a str {
55 let mut trimmed = url;
56
57 let open_parens = url.chars().filter(|&c| c == '(').count();
59 let close_parens = url.chars().filter(|&c| c == ')').count();
60
61 if close_parens > open_parens {
62 let mut balance = 0;
64 let mut last_balanced_pos = url.len();
65
66 for (byte_idx, c) in url.char_indices() {
67 if c == '(' {
68 balance += 1;
69 } else if c == ')' {
70 balance -= 1;
71 if balance < 0 {
72 last_balanced_pos = byte_idx;
74 break;
75 }
76 }
77 }
78
79 trimmed = &trimmed[..last_balanced_pos];
80 }
81
82 while let Some(last_char) = trimmed.chars().last() {
84 if matches!(last_char, '.' | ',' | ';' | ':' | '!' | '?') {
85 if last_char == ':' && trimmed.len() > 1 {
88 break;
90 }
91 trimmed = &trimmed[..trimmed.len() - 1];
92 } else {
93 break;
94 }
95 }
96
97 trimmed
98 }
99
100 fn is_reference_definition(&self, line: &str) -> bool {
102 get_cached_regex(REFERENCE_DEF_RE_STR)
103 .map(|re| re.is_match(line))
104 .unwrap_or(false)
105 }
106
107 fn check_line(
108 &self,
109 line: &str,
110 ctx: &LintContext,
111 line_number: usize,
112 code_spans: &[crate::lint_context::CodeSpan],
113 buffers: &mut LineCheckBuffers,
114 line_index: &LineIndex,
115 ) -> Vec<LintWarning> {
116 let mut warnings = Vec::new();
117
118 if self.is_reference_definition(line) {
120 return warnings;
121 }
122
123 if ctx.line_info(line_number).is_some_and(|info| info.in_html_block) {
125 return warnings;
126 }
127
128 if let Ok(re) = get_cached_regex(MULTILINE_LINK_CONTINUATION_STR)
131 && re.is_match(line)
132 {
133 return warnings;
134 }
135
136 let has_quick_check = get_cached_regex(URL_QUICK_CHECK_STR)
138 .map(|re| re.is_match(line))
139 .unwrap_or(false);
140 let has_www = line.contains("www.");
141 let has_at = line.contains('@');
142
143 if !has_quick_check && !has_at && !has_www {
144 return warnings;
145 }
146
147 buffers.markdown_link_ranges.clear();
149 if let Ok(re) = get_cached_regex(MARKDOWN_LINK_PATTERN_STR) {
150 for cap in re.captures_iter(line) {
151 if let Some(mat) = cap.get(0) {
152 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
153 }
154 }
155 }
156
157 if let Ok(re) = get_cached_regex(MARKDOWN_EMPTY_LINK_PATTERN_STR) {
159 for mat in re.find_iter(line) {
160 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
161 }
162 }
163
164 if let Ok(re) = get_cached_regex(MARKDOWN_EMPTY_REF_PATTERN_STR) {
165 for mat in re.find_iter(line) {
166 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
167 }
168 }
169
170 if let Ok(re) = get_cached_fancy_regex(SHORTCUT_REF_PATTERN_STR) {
174 for mat in re.find_iter(line).flatten() {
175 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
176 }
177 }
178
179 if let Ok(re) = get_cached_regex(ANGLE_LINK_PATTERN_STR) {
180 for cap in re.captures_iter(line) {
181 if let Some(mat) = cap.get(0) {
182 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
183 }
184 }
185 }
186
187 buffers.image_ranges.clear();
189 if let Ok(re) = get_cached_regex(MARKDOWN_IMAGE_PATTERN_STR) {
190 for cap in re.captures_iter(line) {
191 if let Some(mat) = cap.get(0) {
192 buffers.image_ranges.push((mat.start(), mat.end()));
193 }
194 }
195 }
196
197 let is_badge_line = get_cached_regex(BADGE_LINK_LINE_STR)
199 .map(|re| re.is_match(line))
200 .unwrap_or(false);
201
202 if is_badge_line {
203 return warnings;
204 }
205
206 buffers.urls_found.clear();
208
209 if let Ok(re) = get_cached_regex(URL_IPV6_STR) {
211 for mat in re.find_iter(line) {
212 let url_str = mat.as_str();
213 buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
214 }
215 }
216
217 if let Ok(re) = get_cached_regex(URL_STANDARD_STR) {
219 for mat in re.find_iter(line) {
220 let url_str = mat.as_str();
221
222 if url_str.contains("://[") {
224 continue;
225 }
226
227 if let Some(host_start) = url_str.find("://") {
230 let after_protocol = &url_str[host_start + 3..];
231 if after_protocol.contains("::") || after_protocol.chars().filter(|&c| c == ':').count() > 1 {
233 if let Some(char_after) = line.chars().nth(mat.end())
235 && char_after == ']'
236 {
237 continue;
239 }
240 }
241 }
242
243 buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
244 }
245 }
246
247 if let Ok(re) = get_cached_regex(URL_WWW_STR) {
249 for mat in re.find_iter(line) {
250 let url_str = mat.as_str();
251 let start_pos = mat.start();
252 let end_pos = mat.end();
253
254 if start_pos > 0 {
256 let prev_char = line.as_bytes().get(start_pos - 1).copied();
257 if prev_char == Some(b'/') || prev_char == Some(b'@') {
258 continue;
259 }
260 }
261
262 if start_pos > 0 && end_pos < line.len() {
264 let prev_char = line.as_bytes().get(start_pos - 1).copied();
265 let next_char = line.as_bytes().get(end_pos).copied();
266 if prev_char == Some(b'<') && next_char == Some(b'>') {
267 continue;
268 }
269 }
270
271 buffers.urls_found.push((start_pos, end_pos, url_str.to_string()));
272 }
273 }
274
275 for &(start, _end, ref url_str) in buffers.urls_found.iter() {
277 if get_cached_regex(CUSTOM_PROTOCOL_PATTERN_STR)
279 .map(|re| re.is_match(url_str))
280 .unwrap_or(false)
281 {
282 continue;
283 }
284
285 let mut is_inside_construct = false;
290 for &(link_start, link_end) in buffers.markdown_link_ranges.iter() {
291 if start >= link_start && start < link_end {
292 is_inside_construct = true;
293 break;
294 }
295 }
296
297 for &(img_start, img_end) in buffers.image_ranges.iter() {
298 if start >= img_start && start < img_end {
299 is_inside_construct = true;
300 break;
301 }
302 }
303
304 if is_inside_construct {
305 continue;
306 }
307
308 let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
310 let absolute_pos = line_start_byte + start;
311
312 if ctx.is_in_html_tag(absolute_pos) {
314 continue;
315 }
316
317 if ctx.is_in_html_comment(absolute_pos) {
319 continue;
320 }
321
322 let trimmed_url = self.trim_trailing_punctuation(url_str);
324
325 if !trimmed_url.is_empty() && trimmed_url != "//" {
327 let trimmed_len = trimmed_url.len();
328 let (start_line, start_col, end_line, end_col) =
329 calculate_url_range(line_number, line, start, trimmed_len);
330
331 warnings.push(LintWarning {
332 rule_name: Some("MD034".to_string()),
333 line: start_line,
334 column: start_col,
335 end_line,
336 end_column: end_col,
337 message: format!("URL without angle brackets or link formatting: '{trimmed_url}'"),
338 severity: Severity::Warning,
339 fix: Some(Fix {
340 range: {
341 let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
342 (line_start_byte + start)..(line_start_byte + start + trimmed_len)
343 },
344 replacement: format!("<{trimmed_url}>"),
345 }),
346 });
347 }
348 }
349
350 for cap in EMAIL_PATTERN.captures_iter(line) {
352 if let Some(mat) = cap.get(0) {
353 let email = mat.as_str();
354 let start = mat.start();
355 let end = mat.end();
356
357 let mut is_inside_construct = false;
359 for &(link_start, link_end) in buffers.markdown_link_ranges.iter() {
360 if start >= link_start && end <= link_end {
361 is_inside_construct = true;
362 break;
363 }
364 }
365
366 if !is_inside_construct {
367 let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
369 let absolute_pos = line_start_byte + start;
370
371 if ctx.is_in_html_tag(absolute_pos) {
373 continue;
374 }
375
376 let is_in_code_span = code_spans
378 .iter()
379 .any(|span| span.line == line_number && start >= span.start_col && start < span.end_col);
380
381 if !is_in_code_span {
382 let email_len = end - start;
383 let (start_line, start_col, end_line, end_col) =
384 calculate_url_range(line_number, line, start, email_len);
385
386 warnings.push(LintWarning {
387 rule_name: Some("MD034".to_string()),
388 line: start_line,
389 column: start_col,
390 end_line,
391 end_column: end_col,
392 message: format!("Email address without angle brackets or link formatting: '{email}'"),
393 severity: Severity::Warning,
394 fix: Some(Fix {
395 range: (line_start_byte + start)..(line_start_byte + end),
396 replacement: format!("<{email}>"),
397 }),
398 });
399 }
400 }
401 }
402 }
403
404 warnings
405 }
406}
407
408impl Rule for MD034NoBareUrls {
409 #[inline]
410 fn name(&self) -> &'static str {
411 "MD034"
412 }
413
414 fn as_any(&self) -> &dyn std::any::Any {
415 self
416 }
417
418 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
419 where
420 Self: Sized,
421 {
422 Box::new(MD034NoBareUrls)
423 }
424
425 #[inline]
426 fn category(&self) -> RuleCategory {
427 RuleCategory::Link
428 }
429
430 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
431 !ctx.likely_has_links_or_images() && self.should_skip_content(ctx.content)
432 }
433
434 #[inline]
435 fn description(&self) -> &'static str {
436 "No bare URLs - wrap URLs in angle brackets"
437 }
438
439 fn check(&self, ctx: &LintContext) -> LintResult {
440 let mut warnings = Vec::new();
441 let content = ctx.content;
442
443 if self.should_skip_content(content) {
445 return Ok(warnings);
446 }
447
448 let line_index = &ctx.line_index;
450
451 let code_spans = ctx.code_spans();
453
454 let mut buffers = LineCheckBuffers::default();
456
457 for line in ctx.filtered_lines().skip_front_matter().skip_code_blocks() {
460 let mut line_warnings =
461 self.check_line(line.content, ctx, line.line_num, &code_spans, &mut buffers, line_index);
462
463 line_warnings.retain(|warning| {
465 !code_spans.iter().any(|span| {
467 span.line == warning.line &&
468 warning.column > 0 && (warning.column - 1) >= span.start_col &&
470 (warning.column - 1) < span.end_col
471 })
472 });
473
474 line_warnings.retain(|warning| {
478 if let Some(fix) = &warning.fix {
479 !ctx.links
481 .iter()
482 .any(|link| fix.range.start >= link.byte_offset && fix.range.end <= link.byte_end)
483 } else {
484 true
485 }
486 });
487
488 warnings.extend(line_warnings);
489 }
490
491 Ok(warnings)
492 }
493
494 fn fix(&self, ctx: &LintContext) -> Result<String, LintError> {
495 let mut content = ctx.content.to_string();
496 let mut warnings = self.check(ctx)?;
497
498 warnings.sort_by_key(|w| w.fix.as_ref().map(|f| f.range.start).unwrap_or(0));
500
501 for warning in warnings.iter().rev() {
503 if let Some(fix) = &warning.fix {
504 let start = fix.range.start;
505 let end = fix.range.end;
506 content.replace_range(start..end, &fix.replacement);
507 }
508 }
509
510 Ok(content)
511 }
512}