1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::utils::range_utils::calculate_url_range;
6use crate::utils::regex_cache::{EMAIL_PATTERN, get_cached_regex};
7
8use crate::lint_context::LintContext;
9
10const URL_QUICK_CHECK_STR: &str = r#"(?:https?|ftps?)://|@"#;
12const CUSTOM_PROTOCOL_PATTERN_STR: &str = r#"(?:grpc|ws|wss|ssh|git|svn|file|data|javascript|vscode|chrome|about|slack|discord|matrix|irc|redis|mongodb|postgresql|mysql|kafka|nats|amqp|mqtt|custom|app|api|service)://"#;
13const MARKDOWN_LINK_PATTERN_STR: &str = r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#;
14const MARKDOWN_EMPTY_LINK_PATTERN_STR: &str = r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(\)"#;
15const MARKDOWN_EMPTY_REF_PATTERN_STR: &str = r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\[\]"#;
16const ANGLE_LINK_PATTERN_STR: &str =
17 r#"<((?:https?|ftps?)://(?:\[[0-9a-fA-F:]+(?:%[a-zA-Z0-9]+)?\]|[^>]+)|[^@\s]+@[^@\s]+\.[^@\s>]+)>"#;
18const BADGE_LINK_LINE_STR: &str = r#"^\s*\[!\[[^\]]*\]\([^)]*\)\]\([^)]*\)\s*$"#;
19const MARKDOWN_IMAGE_PATTERN_STR: &str = r#"!\s*\[([^\]]*)\]\s*\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#;
20const SIMPLE_URL_REGEX_STR: &str = r#"(https?|ftps?)://(?:\[[0-9a-fA-F:%.]+\](?::\d+)?|[^\s<>\[\]()\\'\"`\]]+)(?:/[^\s<>\[\]()\\'\"`]*)?(?:\?[^\s<>\[\]()\\'\"`]*)?(?:#[^\s<>\[\]()\\'\"`]*)?"#;
21const IPV6_URL_REGEX_STR: &str = r#"(https?|ftps?)://\[[0-9a-fA-F:%.\-a-zA-Z]+\](?::\d+)?(?:/[^\s<>\[\]()\\'\"`]*)?(?:\?[^\s<>\[\]()\\'\"`]*)?(?:#[^\s<>\[\]()\\'\"`]*)?"#;
22const REFERENCE_DEF_RE_STR: &str = r"^\s*\[[^\]]+\]:\s*(?:https?|ftps?)://\S+$";
23const HTML_COMMENT_PATTERN_STR: &str = r#"<!--[\s\S]*?-->"#;
24const HTML_TAG_PATTERN_STR: &str = r#"<[^>]*>"#;
25const MULTILINE_LINK_CONTINUATION_STR: &str = r#"^[^\[]*\]\(.*\)"#;
26
27#[derive(Default)]
29struct LineCheckBuffers {
30 markdown_link_ranges: Vec<(usize, usize)>,
31 image_ranges: Vec<(usize, usize)>,
32 urls_found: Vec<(usize, usize, String)>,
33}
34
35#[derive(Default, Clone)]
36pub struct MD034NoBareUrls;
37
38impl MD034NoBareUrls {
39 #[inline]
40 pub fn should_skip_content(&self, content: &str) -> bool {
41 let bytes = content.as_bytes();
44 !bytes.contains(&b':') && !bytes.contains(&b'@')
45 }
46
47 fn trim_trailing_punctuation<'a>(&self, url: &'a str) -> &'a str {
49 let mut trimmed = url;
50
51 let open_parens = url.chars().filter(|&c| c == '(').count();
53 let close_parens = url.chars().filter(|&c| c == ')').count();
54
55 if close_parens > open_parens {
56 let mut balance = 0;
58 let mut last_balanced_pos = url.len();
59
60 for (i, c) in url.chars().enumerate() {
61 if c == '(' {
62 balance += 1;
63 } else if c == ')' {
64 balance -= 1;
65 if balance < 0 {
66 last_balanced_pos = i;
68 break;
69 }
70 }
71 }
72
73 trimmed = &trimmed[..last_balanced_pos];
74 }
75
76 while let Some(last_char) = trimmed.chars().last() {
78 if matches!(last_char, '.' | ',' | ';' | ':' | '!' | '?') {
79 if last_char == ':' && trimmed.len() > 1 {
82 break;
84 }
85 trimmed = &trimmed[..trimmed.len() - 1];
86 } else {
87 break;
88 }
89 }
90
91 trimmed
92 }
93
94 fn is_reference_definition(&self, line: &str) -> bool {
96 get_cached_regex(REFERENCE_DEF_RE_STR)
97 .map(|re| re.is_match(line))
98 .unwrap_or(false)
99 }
100
101 fn is_in_html_comment(&self, content: &str, pos: usize) -> bool {
103 if let Ok(re) = get_cached_regex(HTML_COMMENT_PATTERN_STR) {
105 for mat in re.find_iter(content) {
106 if pos >= mat.start() && pos < mat.end() {
107 return true;
108 }
109 }
110 }
111 false
112 }
113
114 fn is_in_html_tag(&self, line: &str, pos: usize) -> bool {
116 if let Ok(re) = get_cached_regex(HTML_TAG_PATTERN_STR) {
118 for mat in re.find_iter(line) {
119 if pos >= mat.start() && pos < mat.end() {
120 return true;
121 }
122 }
123 }
124 false
125 }
126
127 fn check_line(
128 &self,
129 line: &str,
130 content: &str,
131 line_number: usize,
132 code_spans: &[crate::lint_context::CodeSpan],
133 buffers: &mut LineCheckBuffers,
134 ) -> Vec<LintWarning> {
135 let mut warnings = Vec::new();
136
137 if self.is_reference_definition(line) {
139 return warnings;
140 }
141
142 if let Ok(re) = get_cached_regex(MULTILINE_LINK_CONTINUATION_STR)
145 && re.is_match(line)
146 {
147 return warnings;
148 }
149
150 if let Ok(re) = get_cached_regex(URL_QUICK_CHECK_STR)
152 && !re.is_match(line)
153 && !line.contains('@')
154 {
155 return warnings;
156 }
157
158 buffers.markdown_link_ranges.clear();
160 if let Ok(re) = get_cached_regex(MARKDOWN_LINK_PATTERN_STR) {
161 for cap in re.captures_iter(line) {
162 if let Some(mat) = cap.get(0) {
163 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
164 }
165 }
166 }
167
168 if let Ok(re) = get_cached_regex(MARKDOWN_EMPTY_LINK_PATTERN_STR) {
170 for mat in re.find_iter(line) {
171 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
172 }
173 }
174
175 if let Ok(re) = get_cached_regex(MARKDOWN_EMPTY_REF_PATTERN_STR) {
176 for mat in re.find_iter(line) {
177 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
178 }
179 }
180
181 if let Ok(re) = get_cached_regex(ANGLE_LINK_PATTERN_STR) {
182 for cap in re.captures_iter(line) {
183 if let Some(mat) = cap.get(0) {
184 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
185 }
186 }
187 }
188
189 buffers.image_ranges.clear();
191 if let Ok(re) = get_cached_regex(MARKDOWN_IMAGE_PATTERN_STR) {
192 for cap in re.captures_iter(line) {
193 if let Some(mat) = cap.get(0) {
194 buffers.image_ranges.push((mat.start(), mat.end()));
195 }
196 }
197 }
198
199 let is_badge_line = get_cached_regex(BADGE_LINK_LINE_STR)
201 .map(|re| re.is_match(line))
202 .unwrap_or(false);
203
204 if is_badge_line {
205 return warnings;
206 }
207
208 buffers.urls_found.clear();
210
211 if let Ok(re) = get_cached_regex(IPV6_URL_REGEX_STR) {
213 for mat in re.find_iter(line) {
214 let url_str = mat.as_str();
215 buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
216 }
217 }
218
219 if let Ok(re) = get_cached_regex(SIMPLE_URL_REGEX_STR) {
221 for mat in re.find_iter(line) {
222 let url_str = mat.as_str();
223
224 if url_str.contains("://[") {
226 continue;
227 }
228
229 if let Some(host_start) = url_str.find("://") {
232 let after_protocol = &url_str[host_start + 3..];
233 if after_protocol.contains("::") || after_protocol.chars().filter(|&c| c == ':').count() > 1 {
235 if let Some(char_after) = line.chars().nth(mat.end())
237 && char_after == ']'
238 {
239 continue;
241 }
242 }
243 }
244
245 buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
246 }
247 }
248
249 for &(start, end, ref url_str) in buffers.urls_found.iter() {
251 if get_cached_regex(CUSTOM_PROTOCOL_PATTERN_STR)
253 .map(|re| re.is_match(url_str))
254 .unwrap_or(false)
255 {
256 continue;
257 }
258
259 let mut is_inside_construct = false;
261 for &(link_start, link_end) in buffers.markdown_link_ranges.iter() {
262 if start >= link_start && end <= link_end {
263 is_inside_construct = true;
264 break;
265 }
266 }
267
268 for &(img_start, img_end) in buffers.image_ranges.iter() {
269 if start >= img_start && end <= img_end {
270 is_inside_construct = true;
271 break;
272 }
273 }
274
275 if is_inside_construct {
276 continue;
277 }
278
279 if self.is_in_html_tag(line, start) {
281 continue;
282 }
283
284 let absolute_pos = content
286 .lines()
287 .take(line_number - 1)
288 .map(|l| l.len() + 1)
289 .sum::<usize>()
290 + start;
291 if self.is_in_html_comment(content, absolute_pos) {
292 continue;
293 }
294
295 let trimmed_url = self.trim_trailing_punctuation(url_str);
297
298 if !trimmed_url.is_empty() && trimmed_url != "//" {
300 let trimmed_len = trimmed_url.len();
301 let (start_line, start_col, end_line, end_col) =
302 calculate_url_range(line_number, line, start, trimmed_len);
303
304 warnings.push(LintWarning {
305 rule_name: Some("MD034"),
306 line: start_line,
307 column: start_col,
308 end_line,
309 end_column: end_col,
310 message: format!("URL without angle brackets or link formatting: '{trimmed_url}'"),
311 severity: Severity::Warning,
312 fix: Some(Fix {
313 range: {
314 let line_start_byte = content
315 .lines()
316 .take(line_number - 1)
317 .map(|l| l.len() + 1)
318 .sum::<usize>();
319 (line_start_byte + start)..(line_start_byte + start + trimmed_len)
320 },
321 replacement: format!("<{trimmed_url}>"),
322 }),
323 });
324 }
325 }
326
327 for cap in EMAIL_PATTERN.captures_iter(line) {
329 if let Some(mat) = cap.get(0) {
330 let email = mat.as_str();
331 let start = mat.start();
332 let end = mat.end();
333
334 let mut is_inside_construct = false;
336 for &(link_start, link_end) in buffers.markdown_link_ranges.iter() {
337 if start >= link_start && end <= link_end {
338 is_inside_construct = true;
339 break;
340 }
341 }
342
343 if !is_inside_construct {
344 if self.is_in_html_tag(line, start) {
346 continue;
347 }
348
349 let is_in_code_span = code_spans
351 .iter()
352 .any(|span| span.line == line_number && start >= span.start_col && start < span.end_col);
353
354 if !is_in_code_span {
355 let email_len = end - start;
356 let (start_line, start_col, end_line, end_col) =
357 calculate_url_range(line_number, line, start, email_len);
358
359 warnings.push(LintWarning {
360 rule_name: Some("MD034"),
361 line: start_line,
362 column: start_col,
363 end_line,
364 end_column: end_col,
365 message: format!("Email address without angle brackets or link formatting: '{email}'"),
366 severity: Severity::Warning,
367 fix: Some(Fix {
368 range: {
369 let line_start_byte = content
370 .lines()
371 .take(line_number - 1)
372 .map(|l| l.len() + 1)
373 .sum::<usize>();
374 (line_start_byte + start)..(line_start_byte + end)
375 },
376 replacement: format!("<{email}>"),
377 }),
378 });
379 }
380 }
381 }
382 }
383
384 warnings
385 }
386}
387
388impl Rule for MD034NoBareUrls {
389 #[inline]
390 fn name(&self) -> &'static str {
391 "MD034"
392 }
393
394 fn as_any(&self) -> &dyn std::any::Any {
395 self
396 }
397
398 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
399 where
400 Self: Sized,
401 {
402 Box::new(MD034NoBareUrls)
403 }
404
405 #[inline]
406 fn category(&self) -> RuleCategory {
407 RuleCategory::Link
408 }
409
410 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
411 !ctx.likely_has_links_or_images() && self.should_skip_content(ctx.content)
412 }
413
414 #[inline]
415 fn description(&self) -> &'static str {
416 "No bare URLs - wrap URLs in angle brackets"
417 }
418
419 fn check(&self, ctx: &LintContext) -> LintResult {
420 let mut warnings = Vec::new();
421 let content = ctx.content;
422
423 if self.should_skip_content(content) {
425 return Ok(warnings);
426 }
427
428 let code_spans = ctx.code_spans();
430
431 let mut buffers = LineCheckBuffers::default();
433
434 for (line_num, line) in content.lines().enumerate() {
436 if ctx.is_in_code_block(line_num + 1) {
438 continue;
439 }
440
441 let mut line_warnings = self.check_line(line, content, line_num + 1, &code_spans, &mut buffers);
442
443 line_warnings.retain(|warning| {
445 !code_spans.iter().any(|span| {
447 span.line == warning.line &&
448 warning.column > 0 && (warning.column - 1) >= span.start_col &&
450 (warning.column - 1) < span.end_col
451 })
452 });
453
454 warnings.extend(line_warnings);
455 }
456
457 Ok(warnings)
458 }
459
460 fn fix(&self, ctx: &LintContext) -> Result<String, LintError> {
461 let mut content = ctx.content.to_string();
462 let mut warnings = self.check(ctx)?;
463
464 warnings.sort_by_key(|w| w.fix.as_ref().map(|f| f.range.start).unwrap_or(0));
466
467 for warning in warnings.iter().rev() {
469 if let Some(fix) = &warning.fix {
470 let start = fix.range.start;
471 let end = fix.range.end;
472 content.replace_range(start..end, &fix.replacement);
473 }
474 }
475
476 Ok(content)
477 }
478}