1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::utils::range_utils::{LineIndex, calculate_url_range};
6use crate::utils::regex_cache::{EMAIL_PATTERN, get_cached_fancy_regex, get_cached_regex};
7
8use crate::filtered_lines::FilteredLinesExt;
9use crate::lint_context::LintContext;
10
11const URL_QUICK_CHECK_STR: &str = r#"(?:https?|ftps?)://|@|www\."#;
13const CUSTOM_PROTOCOL_PATTERN_STR: &str = r#"(?:grpc|ws|wss|ssh|git|svn|file|data|javascript|vscode|chrome|about|slack|discord|matrix|irc|redis|mongodb|postgresql|mysql|kafka|nats|amqp|mqtt|custom|app|api|service)://"#;
14const MARKDOWN_LINK_PATTERN_STR: &str = r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#;
15const MARKDOWN_EMPTY_LINK_PATTERN_STR: &str = r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(\)"#;
16const MARKDOWN_EMPTY_REF_PATTERN_STR: &str = r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\[\]"#;
17const ANGLE_LINK_PATTERN_STR: &str =
18 r#"<((?:https?|ftps?)://(?:\[[0-9a-fA-F:]+(?:%[a-zA-Z0-9]+)?\]|[^>]+)|[^@\s]+@[^@\s]+\.[^@\s>]+)>"#;
19const BADGE_LINK_LINE_STR: &str = r#"^\s*\[!\[[^\]]*\]\([^)]*\)\]\([^)]*\)\s*$"#;
20const MARKDOWN_IMAGE_PATTERN_STR: &str = r#"!\s*\[([^\]]*)\]\s*\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#;
21const SIMPLE_URL_REGEX_STR: &str = r#"(https?|ftps?)://(?:\[[0-9a-fA-F:%.]+\](?::\d+)?|[^\s<>\[\]()\\'\"`\]]+)(?:/[^\s<>\[\]()\\'\"`]*)?(?:\?[^\s<>\[\]()\\'\"`]*)?(?:#[^\s<>\[\]()\\'\"`]*)?"#;
22const WWW_URL_REGEX_STR: &str = r#"www\.(?:[a-zA-Z0-9][-a-zA-Z0-9]*\.)+[a-zA-Z]{2,}(?:/[^\s<>\[\]()\\'\"`]*)?"#;
26const IPV6_URL_REGEX_STR: &str = r#"(https?|ftps?)://\[[0-9a-fA-F:%.\-a-zA-Z]+\](?::\d+)?(?:/[^\s<>\[\]()\\'\"`]*)?(?:\?[^\s<>\[\]()\\'\"`]*)?(?:#[^\s<>\[\]()\\'\"`]*)?"#;
27const REFERENCE_DEF_RE_STR: &str = r"^\s*\[[^\]]+\]:\s*(?:<|(?:https?|ftps?)://)";
30const MULTILINE_LINK_CONTINUATION_STR: &str = r#"^[^\[]*\]\(.*\)"#;
31const SHORTCUT_REF_PATTERN_STR: &str = r#"\[([^\[\]]+)\](?!\s*[\[(])"#;
34
35#[derive(Default)]
37struct LineCheckBuffers {
38 markdown_link_ranges: Vec<(usize, usize)>,
39 image_ranges: Vec<(usize, usize)>,
40 urls_found: Vec<(usize, usize, String)>,
41}
42
43#[derive(Default, Clone)]
44pub struct MD034NoBareUrls;
45
46impl MD034NoBareUrls {
47 #[inline]
48 pub fn should_skip_content(&self, content: &str) -> bool {
49 let bytes = content.as_bytes();
52 let has_colon = bytes.contains(&b':');
53 let has_at = bytes.contains(&b'@');
54 let has_www = content.contains("www.");
55 !has_colon && !has_at && !has_www
56 }
57
58 fn trim_trailing_punctuation<'a>(&self, url: &'a str) -> &'a str {
60 let mut trimmed = url;
61
62 let open_parens = url.chars().filter(|&c| c == '(').count();
64 let close_parens = url.chars().filter(|&c| c == ')').count();
65
66 if close_parens > open_parens {
67 let mut balance = 0;
69 let mut last_balanced_pos = url.len();
70
71 for (i, c) in url.chars().enumerate() {
72 if c == '(' {
73 balance += 1;
74 } else if c == ')' {
75 balance -= 1;
76 if balance < 0 {
77 last_balanced_pos = i;
79 break;
80 }
81 }
82 }
83
84 trimmed = &trimmed[..last_balanced_pos];
85 }
86
87 while let Some(last_char) = trimmed.chars().last() {
89 if matches!(last_char, '.' | ',' | ';' | ':' | '!' | '?') {
90 if last_char == ':' && trimmed.len() > 1 {
93 break;
95 }
96 trimmed = &trimmed[..trimmed.len() - 1];
97 } else {
98 break;
99 }
100 }
101
102 trimmed
103 }
104
105 fn is_reference_definition(&self, line: &str) -> bool {
107 get_cached_regex(REFERENCE_DEF_RE_STR)
108 .map(|re| re.is_match(line))
109 .unwrap_or(false)
110 }
111
112 fn check_line(
113 &self,
114 line: &str,
115 ctx: &LintContext,
116 line_number: usize,
117 code_spans: &[crate::lint_context::CodeSpan],
118 buffers: &mut LineCheckBuffers,
119 line_index: &LineIndex,
120 ) -> Vec<LintWarning> {
121 let mut warnings = Vec::new();
122
123 if self.is_reference_definition(line) {
125 return warnings;
126 }
127
128 if ctx.line_info(line_number).is_some_and(|info| info.in_html_block) {
130 return warnings;
131 }
132
133 if let Ok(re) = get_cached_regex(MULTILINE_LINK_CONTINUATION_STR)
136 && re.is_match(line)
137 {
138 return warnings;
139 }
140
141 let has_quick_check = get_cached_regex(URL_QUICK_CHECK_STR)
143 .map(|re| re.is_match(line))
144 .unwrap_or(false);
145 let has_www = line.contains("www.");
146 let has_at = line.contains('@');
147
148 if !has_quick_check && !has_at && !has_www {
149 return warnings;
150 }
151
152 buffers.markdown_link_ranges.clear();
154 if let Ok(re) = get_cached_regex(MARKDOWN_LINK_PATTERN_STR) {
155 for cap in re.captures_iter(line) {
156 if let Some(mat) = cap.get(0) {
157 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
158 }
159 }
160 }
161
162 if let Ok(re) = get_cached_regex(MARKDOWN_EMPTY_LINK_PATTERN_STR) {
164 for mat in re.find_iter(line) {
165 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
166 }
167 }
168
169 if let Ok(re) = get_cached_regex(MARKDOWN_EMPTY_REF_PATTERN_STR) {
170 for mat in re.find_iter(line) {
171 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
172 }
173 }
174
175 if let Ok(re) = get_cached_fancy_regex(SHORTCUT_REF_PATTERN_STR) {
179 for mat in re.find_iter(line).flatten() {
180 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
181 }
182 }
183
184 if let Ok(re) = get_cached_regex(ANGLE_LINK_PATTERN_STR) {
185 for cap in re.captures_iter(line) {
186 if let Some(mat) = cap.get(0) {
187 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
188 }
189 }
190 }
191
192 buffers.image_ranges.clear();
194 if let Ok(re) = get_cached_regex(MARKDOWN_IMAGE_PATTERN_STR) {
195 for cap in re.captures_iter(line) {
196 if let Some(mat) = cap.get(0) {
197 buffers.image_ranges.push((mat.start(), mat.end()));
198 }
199 }
200 }
201
202 let is_badge_line = get_cached_regex(BADGE_LINK_LINE_STR)
204 .map(|re| re.is_match(line))
205 .unwrap_or(false);
206
207 if is_badge_line {
208 return warnings;
209 }
210
211 buffers.urls_found.clear();
213
214 if let Ok(re) = get_cached_regex(IPV6_URL_REGEX_STR) {
216 for mat in re.find_iter(line) {
217 let url_str = mat.as_str();
218 buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
219 }
220 }
221
222 if let Ok(re) = get_cached_regex(SIMPLE_URL_REGEX_STR) {
224 for mat in re.find_iter(line) {
225 let url_str = mat.as_str();
226
227 if url_str.contains("://[") {
229 continue;
230 }
231
232 if let Some(host_start) = url_str.find("://") {
235 let after_protocol = &url_str[host_start + 3..];
236 if after_protocol.contains("::") || after_protocol.chars().filter(|&c| c == ':').count() > 1 {
238 if let Some(char_after) = line.chars().nth(mat.end())
240 && char_after == ']'
241 {
242 continue;
244 }
245 }
246 }
247
248 buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
249 }
250 }
251
252 if let Ok(re) = get_cached_regex(WWW_URL_REGEX_STR) {
254 for mat in re.find_iter(line) {
255 let url_str = mat.as_str();
256 let start_pos = mat.start();
257 let end_pos = mat.end();
258
259 if start_pos > 0 {
261 let prev_char = line.as_bytes().get(start_pos - 1).copied();
262 if prev_char == Some(b'/') || prev_char == Some(b'@') {
263 continue;
264 }
265 }
266
267 if start_pos > 0 && end_pos < line.len() {
269 let prev_char = line.as_bytes().get(start_pos - 1).copied();
270 let next_char = line.as_bytes().get(end_pos).copied();
271 if prev_char == Some(b'<') && next_char == Some(b'>') {
272 continue;
273 }
274 }
275
276 buffers.urls_found.push((start_pos, end_pos, url_str.to_string()));
277 }
278 }
279
280 for &(start, end, ref url_str) in buffers.urls_found.iter() {
282 if get_cached_regex(CUSTOM_PROTOCOL_PATTERN_STR)
284 .map(|re| re.is_match(url_str))
285 .unwrap_or(false)
286 {
287 continue;
288 }
289
290 let mut is_inside_construct = false;
292 for &(link_start, link_end) in buffers.markdown_link_ranges.iter() {
293 if start >= link_start && end <= link_end {
294 is_inside_construct = true;
295 break;
296 }
297 }
298
299 for &(img_start, img_end) in buffers.image_ranges.iter() {
300 if start >= img_start && end <= img_end {
301 is_inside_construct = true;
302 break;
303 }
304 }
305
306 if is_inside_construct {
307 continue;
308 }
309
310 let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
312 let absolute_pos = line_start_byte + start;
313
314 if ctx.is_in_html_tag(absolute_pos) {
316 continue;
317 }
318
319 if ctx.is_in_html_comment(absolute_pos) {
321 continue;
322 }
323
324 let trimmed_url = self.trim_trailing_punctuation(url_str);
326
327 if !trimmed_url.is_empty() && trimmed_url != "//" {
329 let trimmed_len = trimmed_url.len();
330 let (start_line, start_col, end_line, end_col) =
331 calculate_url_range(line_number, line, start, trimmed_len);
332
333 warnings.push(LintWarning {
334 rule_name: Some("MD034".to_string()),
335 line: start_line,
336 column: start_col,
337 end_line,
338 end_column: end_col,
339 message: format!("URL without angle brackets or link formatting: '{trimmed_url}'"),
340 severity: Severity::Warning,
341 fix: Some(Fix {
342 range: {
343 let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
344 (line_start_byte + start)..(line_start_byte + start + trimmed_len)
345 },
346 replacement: format!("<{trimmed_url}>"),
347 }),
348 });
349 }
350 }
351
352 for cap in EMAIL_PATTERN.captures_iter(line) {
354 if let Some(mat) = cap.get(0) {
355 let email = mat.as_str();
356 let start = mat.start();
357 let end = mat.end();
358
359 let mut is_inside_construct = false;
361 for &(link_start, link_end) in buffers.markdown_link_ranges.iter() {
362 if start >= link_start && end <= link_end {
363 is_inside_construct = true;
364 break;
365 }
366 }
367
368 if !is_inside_construct {
369 let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
371 let absolute_pos = line_start_byte + start;
372
373 if ctx.is_in_html_tag(absolute_pos) {
375 continue;
376 }
377
378 let is_in_code_span = code_spans
380 .iter()
381 .any(|span| span.line == line_number && start >= span.start_col && start < span.end_col);
382
383 if !is_in_code_span {
384 let email_len = end - start;
385 let (start_line, start_col, end_line, end_col) =
386 calculate_url_range(line_number, line, start, email_len);
387
388 warnings.push(LintWarning {
389 rule_name: Some("MD034".to_string()),
390 line: start_line,
391 column: start_col,
392 end_line,
393 end_column: end_col,
394 message: format!("Email address without angle brackets or link formatting: '{email}'"),
395 severity: Severity::Warning,
396 fix: Some(Fix {
397 range: (line_start_byte + start)..(line_start_byte + end),
398 replacement: format!("<{email}>"),
399 }),
400 });
401 }
402 }
403 }
404 }
405
406 warnings
407 }
408}
409
410impl Rule for MD034NoBareUrls {
411 #[inline]
412 fn name(&self) -> &'static str {
413 "MD034"
414 }
415
416 fn as_any(&self) -> &dyn std::any::Any {
417 self
418 }
419
420 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
421 where
422 Self: Sized,
423 {
424 Box::new(MD034NoBareUrls)
425 }
426
427 #[inline]
428 fn category(&self) -> RuleCategory {
429 RuleCategory::Link
430 }
431
432 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
433 !ctx.likely_has_links_or_images() && self.should_skip_content(ctx.content)
434 }
435
436 #[inline]
437 fn description(&self) -> &'static str {
438 "No bare URLs - wrap URLs in angle brackets"
439 }
440
441 fn check(&self, ctx: &LintContext) -> LintResult {
442 let mut warnings = Vec::new();
443 let content = ctx.content;
444
445 if self.should_skip_content(content) {
447 return Ok(warnings);
448 }
449
450 let line_index = &ctx.line_index;
452
453 let code_spans = ctx.code_spans();
455
456 let mut buffers = LineCheckBuffers::default();
458
459 for line in ctx.filtered_lines().skip_front_matter().skip_code_blocks() {
462 let mut line_warnings =
463 self.check_line(line.content, ctx, line.line_num, &code_spans, &mut buffers, line_index);
464
465 line_warnings.retain(|warning| {
467 !code_spans.iter().any(|span| {
469 span.line == warning.line &&
470 warning.column > 0 && (warning.column - 1) >= span.start_col &&
472 (warning.column - 1) < span.end_col
473 })
474 });
475
476 line_warnings.retain(|warning| {
480 if let Some(fix) = &warning.fix {
481 !ctx.links
483 .iter()
484 .any(|link| fix.range.start >= link.byte_offset && fix.range.end <= link.byte_end)
485 } else {
486 true
487 }
488 });
489
490 warnings.extend(line_warnings);
491 }
492
493 Ok(warnings)
494 }
495
496 fn fix(&self, ctx: &LintContext) -> Result<String, LintError> {
497 let mut content = ctx.content.to_string();
498 let mut warnings = self.check(ctx)?;
499
500 warnings.sort_by_key(|w| w.fix.as_ref().map(|f| f.range.start).unwrap_or(0));
502
503 for warning in warnings.iter().rev() {
505 if let Some(fix) = &warning.fix {
506 let start = fix.range.start;
507 let end = fix.range.end;
508 content.replace_range(start..end, &fix.replacement);
509 }
510 }
511
512 Ok(content)
513 }
514}