1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::utils::range_utils::calculate_url_range;
6use crate::utils::regex_cache::{EMAIL_PATTERN, get_cached_regex};
7
8use crate::lint_context::LintContext;
9
10const URL_QUICK_CHECK_STR: &str = r#"(?:https?|ftps?)://|@"#;
12const CUSTOM_PROTOCOL_PATTERN_STR: &str = r#"(?:grpc|ws|wss|ssh|git|svn|file|data|javascript|vscode|chrome|about|slack|discord|matrix|irc|redis|mongodb|postgresql|mysql|kafka|nats|amqp|mqtt|custom|app|api|service)://"#;
13const MARKDOWN_LINK_PATTERN_STR: &str = r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#;
14const ANGLE_LINK_PATTERN_STR: &str =
15 r#"<((?:https?|ftps?)://(?:\[[0-9a-fA-F:]+(?:%[a-zA-Z0-9]+)?\]|[^>]+)|[^@\s]+@[^@\s]+\.[^@\s>]+)>"#;
16const BADGE_LINK_LINE_STR: &str = r#"^\s*\[!\[[^\]]*\]\([^)]*\)\]\([^)]*\)\s*$"#;
17const MARKDOWN_IMAGE_PATTERN_STR: &str = r#"!\s*\[([^\]]*)\]\s*\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#;
18const SIMPLE_URL_REGEX_STR: &str = r#"(https?|ftps?)://(?:\[[0-9a-fA-F:%.]+\](?::\d+)?|[^\s<>\[\]()\\'\"`\]]+)(?:/[^\s<>\[\]()\\'\"`]*)?(?:\?[^\s<>\[\]()\\'\"`]*)?(?:#[^\s<>\[\]()\\'\"`]*)?"#;
19const IPV6_URL_REGEX_STR: &str = r#"(https?|ftps?)://\[[0-9a-fA-F:%.\-a-zA-Z]+\](?::\d+)?(?:/[^\s<>\[\]()\\'\"`]*)?(?:\?[^\s<>\[\]()\\'\"`]*)?(?:#[^\s<>\[\]()\\'\"`]*)?"#;
20const REFERENCE_DEF_RE_STR: &str = r"^\s*\[[^\]]+\]:\s*(?:https?|ftps?)://\S+$";
21const HTML_COMMENT_PATTERN_STR: &str = r#"<!--[\s\S]*?-->"#;
22const HTML_TAG_PATTERN_STR: &str = r#"<[^>]*>"#;
23const MULTILINE_LINK_CONTINUATION_STR: &str = r#"^[^\[]*\]\(.*\)"#;
24
25#[derive(Default)]
27struct LineCheckBuffers {
28 markdown_link_ranges: Vec<(usize, usize)>,
29 image_ranges: Vec<(usize, usize)>,
30 urls_found: Vec<(usize, usize, String)>,
31}
32
33#[derive(Default, Clone)]
34pub struct MD034NoBareUrls;
35
36impl MD034NoBareUrls {
37 #[inline]
38 pub fn should_skip_content(&self, content: &str) -> bool {
39 let bytes = content.as_bytes();
42 !bytes.contains(&b':') && !bytes.contains(&b'@')
43 }
44
45 fn trim_trailing_punctuation<'a>(&self, url: &'a str) -> &'a str {
47 let mut trimmed = url;
48
49 let open_parens = url.chars().filter(|&c| c == '(').count();
51 let close_parens = url.chars().filter(|&c| c == ')').count();
52
53 if close_parens > open_parens {
54 let mut balance = 0;
56 let mut last_balanced_pos = url.len();
57
58 for (i, c) in url.chars().enumerate() {
59 if c == '(' {
60 balance += 1;
61 } else if c == ')' {
62 balance -= 1;
63 if balance < 0 {
64 last_balanced_pos = i;
66 break;
67 }
68 }
69 }
70
71 trimmed = &trimmed[..last_balanced_pos];
72 }
73
74 while let Some(last_char) = trimmed.chars().last() {
76 if matches!(last_char, '.' | ',' | ';' | ':' | '!' | '?') {
77 if last_char == ':' && trimmed.len() > 1 {
80 break;
82 }
83 trimmed = &trimmed[..trimmed.len() - 1];
84 } else {
85 break;
86 }
87 }
88
89 trimmed
90 }
91
92 fn is_reference_definition(&self, line: &str) -> bool {
94 get_cached_regex(REFERENCE_DEF_RE_STR)
95 .map(|re| re.is_match(line))
96 .unwrap_or(false)
97 }
98
99 fn is_in_html_comment(&self, content: &str, pos: usize) -> bool {
101 if let Ok(re) = get_cached_regex(HTML_COMMENT_PATTERN_STR) {
103 for mat in re.find_iter(content) {
104 if pos >= mat.start() && pos < mat.end() {
105 return true;
106 }
107 }
108 }
109 false
110 }
111
112 fn is_in_html_tag(&self, line: &str, pos: usize) -> bool {
114 if let Ok(re) = get_cached_regex(HTML_TAG_PATTERN_STR) {
116 for mat in re.find_iter(line) {
117 if pos >= mat.start() && pos < mat.end() {
118 return true;
119 }
120 }
121 }
122 false
123 }
124
125 fn check_line(
126 &self,
127 line: &str,
128 content: &str,
129 line_number: usize,
130 code_spans: &[crate::lint_context::CodeSpan],
131 buffers: &mut LineCheckBuffers,
132 ) -> Vec<LintWarning> {
133 let mut warnings = Vec::new();
134
135 if self.is_reference_definition(line) {
137 return warnings;
138 }
139
140 if let Ok(re) = get_cached_regex(MULTILINE_LINK_CONTINUATION_STR)
143 && re.is_match(line)
144 {
145 return warnings;
146 }
147
148 if let Ok(re) = get_cached_regex(URL_QUICK_CHECK_STR)
150 && !re.is_match(line)
151 && !line.contains('@')
152 {
153 return warnings;
154 }
155
156 buffers.markdown_link_ranges.clear();
158 if let Ok(re) = get_cached_regex(MARKDOWN_LINK_PATTERN_STR) {
159 for cap in re.captures_iter(line) {
160 if let Some(mat) = cap.get(0) {
161 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
162 }
163 }
164 }
165
166 if let Ok(re) = get_cached_regex(ANGLE_LINK_PATTERN_STR) {
167 for cap in re.captures_iter(line) {
168 if let Some(mat) = cap.get(0) {
169 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
170 }
171 }
172 }
173
174 buffers.image_ranges.clear();
176 if let Ok(re) = get_cached_regex(MARKDOWN_IMAGE_PATTERN_STR) {
177 for cap in re.captures_iter(line) {
178 if let Some(mat) = cap.get(0) {
179 buffers.image_ranges.push((mat.start(), mat.end()));
180 }
181 }
182 }
183
184 let is_badge_line = get_cached_regex(BADGE_LINK_LINE_STR)
186 .map(|re| re.is_match(line))
187 .unwrap_or(false);
188
189 if is_badge_line {
190 return warnings;
191 }
192
193 buffers.urls_found.clear();
195
196 if let Ok(re) = get_cached_regex(IPV6_URL_REGEX_STR) {
198 for mat in re.find_iter(line) {
199 let url_str = mat.as_str();
200 buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
201 }
202 }
203
204 if let Ok(re) = get_cached_regex(SIMPLE_URL_REGEX_STR) {
206 for mat in re.find_iter(line) {
207 let url_str = mat.as_str();
208
209 if url_str.contains("://[") {
211 continue;
212 }
213
214 if let Some(host_start) = url_str.find("://") {
217 let after_protocol = &url_str[host_start + 3..];
218 if after_protocol.contains("::") || after_protocol.chars().filter(|&c| c == ':').count() > 1 {
220 if let Some(char_after) = line.chars().nth(mat.end())
222 && char_after == ']'
223 {
224 continue;
226 }
227 }
228 }
229
230 buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
231 }
232 }
233
234 for &(start, end, ref url_str) in buffers.urls_found.iter() {
236 if get_cached_regex(CUSTOM_PROTOCOL_PATTERN_STR)
238 .map(|re| re.is_match(url_str))
239 .unwrap_or(false)
240 {
241 continue;
242 }
243
244 let mut is_inside_construct = false;
246 for &(link_start, link_end) in buffers.markdown_link_ranges.iter() {
247 if start >= link_start && end <= link_end {
248 is_inside_construct = true;
249 break;
250 }
251 }
252
253 for &(img_start, img_end) in buffers.image_ranges.iter() {
254 if start >= img_start && end <= img_end {
255 is_inside_construct = true;
256 break;
257 }
258 }
259
260 if is_inside_construct {
261 continue;
262 }
263
264 if self.is_in_html_tag(line, start) {
266 continue;
267 }
268
269 let absolute_pos = content
271 .lines()
272 .take(line_number - 1)
273 .map(|l| l.len() + 1)
274 .sum::<usize>()
275 + start;
276 if self.is_in_html_comment(content, absolute_pos) {
277 continue;
278 }
279
280 let trimmed_url = self.trim_trailing_punctuation(url_str);
282
283 if !trimmed_url.is_empty() && trimmed_url != "//" {
285 let trimmed_len = trimmed_url.len();
286 let (start_line, start_col, end_line, end_col) =
287 calculate_url_range(line_number, line, start, trimmed_len);
288
289 warnings.push(LintWarning {
290 rule_name: Some("MD034"),
291 line: start_line,
292 column: start_col,
293 end_line,
294 end_column: end_col,
295 message: format!("URL without angle brackets or link formatting: '{trimmed_url}'"),
296 severity: Severity::Warning,
297 fix: Some(Fix {
298 range: {
299 let line_start_byte = content
300 .lines()
301 .take(line_number - 1)
302 .map(|l| l.len() + 1)
303 .sum::<usize>();
304 (line_start_byte + start)..(line_start_byte + start + trimmed_len)
305 },
306 replacement: format!("<{trimmed_url}>"),
307 }),
308 });
309 }
310 }
311
312 for cap in EMAIL_PATTERN.captures_iter(line) {
314 if let Some(mat) = cap.get(0) {
315 let email = mat.as_str();
316 let start = mat.start();
317 let end = mat.end();
318
319 let mut is_inside_construct = false;
321 for &(link_start, link_end) in buffers.markdown_link_ranges.iter() {
322 if start >= link_start && end <= link_end {
323 is_inside_construct = true;
324 break;
325 }
326 }
327
328 if !is_inside_construct {
329 if self.is_in_html_tag(line, start) {
331 continue;
332 }
333
334 let is_in_code_span = code_spans
336 .iter()
337 .any(|span| span.line == line_number && start >= span.start_col && start < span.end_col);
338
339 if !is_in_code_span {
340 let email_len = end - start;
341 let (start_line, start_col, end_line, end_col) =
342 calculate_url_range(line_number, line, start, email_len);
343
344 warnings.push(LintWarning {
345 rule_name: Some("MD034"),
346 line: start_line,
347 column: start_col,
348 end_line,
349 end_column: end_col,
350 message: format!("Email address without angle brackets or link formatting: '{email}'"),
351 severity: Severity::Warning,
352 fix: Some(Fix {
353 range: {
354 let line_start_byte = content
355 .lines()
356 .take(line_number - 1)
357 .map(|l| l.len() + 1)
358 .sum::<usize>();
359 (line_start_byte + start)..(line_start_byte + end)
360 },
361 replacement: format!("<{email}>"),
362 }),
363 });
364 }
365 }
366 }
367 }
368
369 warnings
370 }
371}
372
373impl Rule for MD034NoBareUrls {
374 #[inline]
375 fn name(&self) -> &'static str {
376 "MD034"
377 }
378
379 fn as_any(&self) -> &dyn std::any::Any {
380 self
381 }
382
383 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
384 where
385 Self: Sized,
386 {
387 Box::new(MD034NoBareUrls)
388 }
389
390 #[inline]
391 fn category(&self) -> RuleCategory {
392 RuleCategory::Link
393 }
394
395 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
396 !ctx.likely_has_links_or_images() && self.should_skip_content(ctx.content)
397 }
398
399 #[inline]
400 fn description(&self) -> &'static str {
401 "No bare URLs - wrap URLs in angle brackets"
402 }
403
404 fn check(&self, ctx: &LintContext) -> LintResult {
405 let mut warnings = Vec::new();
406 let content = ctx.content;
407
408 if self.should_skip_content(content) {
410 return Ok(warnings);
411 }
412
413 let code_spans = ctx.code_spans();
415
416 let mut buffers = LineCheckBuffers::default();
418
419 for (line_num, line) in content.lines().enumerate() {
421 if ctx.is_in_code_block(line_num + 1) {
423 continue;
424 }
425
426 let mut line_warnings = self.check_line(line, content, line_num + 1, &code_spans, &mut buffers);
427
428 line_warnings.retain(|warning| {
430 !code_spans.iter().any(|span| {
432 span.line == warning.line &&
433 warning.column > 0 && (warning.column - 1) >= span.start_col &&
435 (warning.column - 1) < span.end_col
436 })
437 });
438
439 warnings.extend(line_warnings);
440 }
441
442 Ok(warnings)
443 }
444
445 fn fix(&self, ctx: &LintContext) -> Result<String, LintError> {
446 let mut content = ctx.content.to_string();
447 let mut warnings = self.check(ctx)?;
448
449 warnings.sort_by_key(|w| w.fix.as_ref().map(|f| f.range.start).unwrap_or(0));
451
452 for warning in warnings.iter().rev() {
454 if let Some(fix) = &warning.fix {
455 let start = fix.range.start;
456 let end = fix.range.end;
457 content.replace_range(start..end, &fix.replacement);
458 }
459 }
460
461 Ok(content)
462 }
463}