1use std::sync::LazyLock;
5
6use fancy_regex::Regex as FancyRegex;
7use regex::Regex;
8
9use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
10use crate::utils::range_utils::{LineIndex, calculate_url_range};
11use crate::utils::regex_cache::{
12 EMAIL_PATTERN, URL_IPV6_REGEX, URL_QUICK_CHECK_REGEX, URL_STANDARD_REGEX, URL_WWW_REGEX, XMPP_URI_REGEX,
13};
14
15use crate::filtered_lines::FilteredLinesExt;
16use crate::lint_context::LintContext;
17
18static CUSTOM_PROTOCOL_REGEX: LazyLock<Regex> = LazyLock::new(|| {
20 Regex::new(r#"(?:grpc|ws|wss|ssh|git|svn|file|data|javascript|vscode|chrome|about|slack|discord|matrix|irc|redis|mongodb|postgresql|mysql|kafka|nats|amqp|mqtt|custom|app|api|service)://"#).unwrap()
21});
22static MARKDOWN_LINK_REGEX: LazyLock<Regex> = LazyLock::new(|| {
23 Regex::new(r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#).unwrap()
24});
25static MARKDOWN_EMPTY_LINK_REGEX: LazyLock<Regex> =
26 LazyLock::new(|| Regex::new(r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(\)"#).unwrap());
27static MARKDOWN_EMPTY_REF_REGEX: LazyLock<Regex> =
28 LazyLock::new(|| Regex::new(r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\[\]"#).unwrap());
29static ANGLE_LINK_REGEX: LazyLock<Regex> = LazyLock::new(|| {
30 Regex::new(
31 r#"<((?:https?|ftps?)://(?:\[[0-9a-fA-F:]+(?:%[a-zA-Z0-9]+)?\]|[^>]+)|xmpp:[^>]+|[^@\s]+@[^@\s]+\.[^@\s>]+)>"#,
32 )
33 .unwrap()
34});
35static BADGE_LINK_LINE_REGEX: LazyLock<Regex> =
36 LazyLock::new(|| Regex::new(r#"^\s*\[!\[[^\]]*\]\([^)]*\)\]\([^)]*\)\s*$"#).unwrap());
37static MARKDOWN_IMAGE_REGEX: LazyLock<Regex> =
38 LazyLock::new(|| Regex::new(r#"!\s*\[([^\]]*)\]\s*\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#).unwrap());
39static REFERENCE_DEF_REGEX: LazyLock<Regex> =
40 LazyLock::new(|| Regex::new(r"^\s*\[[^\]]+\]:\s*(?:<|(?:https?|ftps?)://)").unwrap());
41static MULTILINE_LINK_CONTINUATION_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"^[^\[]*\]\(.*\)"#).unwrap());
42static SHORTCUT_REF_FANCY_REGEX: LazyLock<FancyRegex> =
44 LazyLock::new(|| FancyRegex::new(r#"\[([^\[\]]+)\](?!\s*[\[(])"#).unwrap());
45
46#[derive(Default)]
48struct LineCheckBuffers {
49 markdown_link_ranges: Vec<(usize, usize)>,
50 image_ranges: Vec<(usize, usize)>,
51 urls_found: Vec<(usize, usize, String)>,
52}
53
54#[derive(Default, Clone)]
55pub struct MD034NoBareUrls;
56
57impl MD034NoBareUrls {
58 #[inline]
59 pub fn should_skip_content(&self, content: &str) -> bool {
60 let bytes = content.as_bytes();
63 let has_colon = bytes.contains(&b':');
64 let has_at = bytes.contains(&b'@');
65 let has_www = content.contains("www.");
66 !has_colon && !has_at && !has_www
67 }
68
69 fn trim_trailing_punctuation<'a>(&self, url: &'a str) -> &'a str {
71 let mut trimmed = url;
72
73 let open_parens = url.chars().filter(|&c| c == '(').count();
75 let close_parens = url.chars().filter(|&c| c == ')').count();
76
77 if close_parens > open_parens {
78 let mut balance = 0;
80 let mut last_balanced_pos = url.len();
81
82 for (byte_idx, c) in url.char_indices() {
83 if c == '(' {
84 balance += 1;
85 } else if c == ')' {
86 balance -= 1;
87 if balance < 0 {
88 last_balanced_pos = byte_idx;
90 break;
91 }
92 }
93 }
94
95 trimmed = &trimmed[..last_balanced_pos];
96 }
97
98 while let Some(last_char) = trimmed.chars().last() {
100 if matches!(last_char, '.' | ',' | ';' | ':' | '!' | '?') {
101 if last_char == ':' && trimmed.len() > 1 {
104 break;
106 }
107 trimmed = &trimmed[..trimmed.len() - 1];
108 } else {
109 break;
110 }
111 }
112
113 trimmed
114 }
115
116 fn is_reference_definition(&self, line: &str) -> bool {
118 REFERENCE_DEF_REGEX.is_match(line)
119 }
120
121 fn check_line(
122 &self,
123 line: &str,
124 ctx: &LintContext,
125 line_number: usize,
126 code_spans: &[crate::lint_context::CodeSpan],
127 buffers: &mut LineCheckBuffers,
128 line_index: &LineIndex,
129 ) -> Vec<LintWarning> {
130 let mut warnings = Vec::new();
131
132 if self.is_reference_definition(line) {
134 return warnings;
135 }
136
137 if ctx.line_info(line_number).is_some_and(|info| info.in_html_block) {
139 return warnings;
140 }
141
142 if MULTILINE_LINK_CONTINUATION_REGEX.is_match(line) {
145 return warnings;
146 }
147
148 let has_quick_check = URL_QUICK_CHECK_REGEX.is_match(line);
150 let has_www = line.contains("www.");
151 let has_at = line.contains('@');
152
153 if !has_quick_check && !has_at && !has_www {
154 return warnings;
155 }
156
157 buffers.markdown_link_ranges.clear();
159 for cap in MARKDOWN_LINK_REGEX.captures_iter(line) {
160 if let Some(mat) = cap.get(0) {
161 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
162 }
163 }
164
165 for mat in MARKDOWN_EMPTY_LINK_REGEX.find_iter(line) {
167 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
168 }
169
170 for mat in MARKDOWN_EMPTY_REF_REGEX.find_iter(line) {
171 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
172 }
173
174 for mat in SHORTCUT_REF_FANCY_REGEX.find_iter(line).flatten() {
178 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
179 }
180
181 for cap in ANGLE_LINK_REGEX.captures_iter(line) {
182 if let Some(mat) = cap.get(0) {
183 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
184 }
185 }
186
187 buffers.image_ranges.clear();
189 for cap in MARKDOWN_IMAGE_REGEX.captures_iter(line) {
190 if let Some(mat) = cap.get(0) {
191 buffers.image_ranges.push((mat.start(), mat.end()));
192 }
193 }
194
195 if BADGE_LINK_LINE_REGEX.is_match(line) {
197 return warnings;
198 }
199
200 buffers.urls_found.clear();
202
203 for mat in URL_IPV6_REGEX.find_iter(line) {
205 let url_str = mat.as_str();
206 buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
207 }
208
209 for mat in URL_STANDARD_REGEX.find_iter(line) {
211 let url_str = mat.as_str();
212
213 if url_str.contains("://[") {
215 continue;
216 }
217
218 if let Some(host_start) = url_str.find("://") {
221 let after_protocol = &url_str[host_start + 3..];
222 if after_protocol.contains("::") || after_protocol.chars().filter(|&c| c == ':').count() > 1 {
224 if line.as_bytes().get(mat.end()) == Some(&b']') {
226 continue;
228 }
229 }
230 }
231
232 buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
233 }
234
235 for mat in URL_WWW_REGEX.find_iter(line) {
237 let url_str = mat.as_str();
238 let start_pos = mat.start();
239 let end_pos = mat.end();
240
241 if start_pos > 0 {
243 let prev_char = line.as_bytes().get(start_pos - 1).copied();
244 if prev_char == Some(b'/') || prev_char == Some(b'@') {
245 continue;
246 }
247 }
248
249 if start_pos > 0 && end_pos < line.len() {
251 let prev_char = line.as_bytes().get(start_pos - 1).copied();
252 let next_char = line.as_bytes().get(end_pos).copied();
253 if prev_char == Some(b'<') && next_char == Some(b'>') {
254 continue;
255 }
256 }
257
258 buffers.urls_found.push((start_pos, end_pos, url_str.to_string()));
259 }
260
261 for mat in XMPP_URI_REGEX.find_iter(line) {
263 let uri_str = mat.as_str();
264 let start_pos = mat.start();
265 let end_pos = mat.end();
266
267 if start_pos > 0 && end_pos < line.len() {
269 let prev_char = line.as_bytes().get(start_pos - 1).copied();
270 let next_char = line.as_bytes().get(end_pos).copied();
271 if prev_char == Some(b'<') && next_char == Some(b'>') {
272 continue;
273 }
274 }
275
276 buffers.urls_found.push((start_pos, end_pos, uri_str.to_string()));
277 }
278
279 for &(start, _end, ref url_str) in buffers.urls_found.iter() {
281 if CUSTOM_PROTOCOL_REGEX.is_match(url_str) {
283 continue;
284 }
285
286 let mut is_inside_construct = false;
291 for &(link_start, link_end) in buffers.markdown_link_ranges.iter() {
292 if start >= link_start && start < link_end {
293 is_inside_construct = true;
294 break;
295 }
296 }
297
298 for &(img_start, img_end) in buffers.image_ranges.iter() {
299 if start >= img_start && start < img_end {
300 is_inside_construct = true;
301 break;
302 }
303 }
304
305 if is_inside_construct {
306 continue;
307 }
308
309 let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
311 let absolute_pos = line_start_byte + start;
312
313 if ctx.is_in_html_tag(absolute_pos) {
315 continue;
316 }
317
318 if ctx.is_in_html_comment(absolute_pos) {
320 continue;
321 }
322
323 if ctx.is_in_shortcode(absolute_pos) {
325 continue;
326 }
327
328 let trimmed_url = self.trim_trailing_punctuation(url_str);
330
331 if !trimmed_url.is_empty() && trimmed_url != "//" {
333 let trimmed_len = trimmed_url.len();
334 let (start_line, start_col, end_line, end_col) =
335 calculate_url_range(line_number, line, start, trimmed_len);
336
337 let replacement = if trimmed_url.starts_with("www.") {
339 format!("<https://{trimmed_url}>")
340 } else {
341 format!("<{trimmed_url}>")
342 };
343
344 warnings.push(LintWarning {
345 rule_name: Some("MD034".to_string()),
346 line: start_line,
347 column: start_col,
348 end_line,
349 end_column: end_col,
350 message: format!("URL without angle brackets or link formatting: '{trimmed_url}'"),
351 severity: Severity::Warning,
352 fix: Some(Fix {
353 range: {
354 let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
355 (line_start_byte + start)..(line_start_byte + start + trimmed_len)
356 },
357 replacement,
358 }),
359 });
360 }
361 }
362
363 for cap in EMAIL_PATTERN.captures_iter(line) {
365 if let Some(mat) = cap.get(0) {
366 let email = mat.as_str();
367 let start = mat.start();
368 let end = mat.end();
369
370 if start >= 5 && line.is_char_boundary(start - 5) && &line[start - 5..start] == "xmpp:" {
373 continue;
374 }
375
376 let mut is_inside_construct = false;
378 for &(link_start, link_end) in buffers.markdown_link_ranges.iter() {
379 if start >= link_start && end <= link_end {
380 is_inside_construct = true;
381 break;
382 }
383 }
384
385 if !is_inside_construct {
386 let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
388 let absolute_pos = line_start_byte + start;
389
390 if ctx.is_in_html_tag(absolute_pos) {
392 continue;
393 }
394
395 let is_in_code_span = code_spans
397 .iter()
398 .any(|span| span.line == line_number && start >= span.start_col && start < span.end_col);
399
400 if !is_in_code_span {
401 let email_len = end - start;
402 let (start_line, start_col, end_line, end_col) =
403 calculate_url_range(line_number, line, start, email_len);
404
405 warnings.push(LintWarning {
406 rule_name: Some("MD034".to_string()),
407 line: start_line,
408 column: start_col,
409 end_line,
410 end_column: end_col,
411 message: format!("Email address without angle brackets or link formatting: '{email}'"),
412 severity: Severity::Warning,
413 fix: Some(Fix {
414 range: (line_start_byte + start)..(line_start_byte + end),
415 replacement: format!("<{email}>"),
416 }),
417 });
418 }
419 }
420 }
421 }
422
423 warnings
424 }
425}
426
427impl Rule for MD034NoBareUrls {
428 #[inline]
429 fn name(&self) -> &'static str {
430 "MD034"
431 }
432
433 fn as_any(&self) -> &dyn std::any::Any {
434 self
435 }
436
437 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
438 where
439 Self: Sized,
440 {
441 Box::new(MD034NoBareUrls)
442 }
443
444 #[inline]
445 fn category(&self) -> RuleCategory {
446 RuleCategory::Link
447 }
448
449 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
450 !ctx.likely_has_links_or_images() && self.should_skip_content(ctx.content)
451 }
452
453 #[inline]
454 fn description(&self) -> &'static str {
455 "No bare URLs - wrap URLs in angle brackets"
456 }
457
458 fn check(&self, ctx: &LintContext) -> LintResult {
459 let mut warnings = Vec::new();
460 let content = ctx.content;
461
462 if self.should_skip_content(content) {
464 return Ok(warnings);
465 }
466
467 let line_index = &ctx.line_index;
469
470 let code_spans = ctx.code_spans();
472
473 let mut buffers = LineCheckBuffers::default();
475
476 for line in ctx
480 .filtered_lines()
481 .skip_front_matter()
482 .skip_code_blocks()
483 .skip_obsidian_comments()
484 {
485 let mut line_warnings =
486 self.check_line(line.content, ctx, line.line_num, &code_spans, &mut buffers, line_index);
487
488 line_warnings.retain(|warning| {
490 !code_spans.iter().any(|span| {
492 span.line == warning.line &&
493 warning.column > 0 && (warning.column - 1) >= span.start_col &&
495 (warning.column - 1) < span.end_col
496 })
497 });
498
499 line_warnings.retain(|warning| {
503 if let Some(fix) = &warning.fix {
504 !ctx.links
506 .iter()
507 .any(|link| fix.range.start >= link.byte_offset && fix.range.end <= link.byte_end)
508 } else {
509 true
510 }
511 });
512
513 line_warnings.retain(|warning| !ctx.is_position_in_obsidian_comment(warning.line, warning.column));
516
517 warnings.extend(line_warnings);
518 }
519
520 Ok(warnings)
521 }
522
523 fn fix(&self, ctx: &LintContext) -> Result<String, LintError> {
524 let mut content = ctx.content.to_string();
525 let mut warnings = self.check(ctx)?;
526
527 warnings.sort_by_key(|w| w.fix.as_ref().map(|f| f.range.start).unwrap_or(0));
529
530 for warning in warnings.iter().rev() {
532 if let Some(fix) = &warning.fix {
533 let start = fix.range.start;
534 let end = fix.range.end;
535 content.replace_range(start..end, &fix.replacement);
536 }
537 }
538
539 Ok(content)
540 }
541}