1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::utils::range_utils::{LineIndex, calculate_url_range};
6use crate::utils::regex_cache::{EMAIL_PATTERN, get_cached_regex};
7
8use crate::filtered_lines::FilteredLinesExt;
9use crate::lint_context::LintContext;
10
11const URL_QUICK_CHECK_STR: &str = r#"(?:https?|ftps?)://|@"#;
13const CUSTOM_PROTOCOL_PATTERN_STR: &str = r#"(?:grpc|ws|wss|ssh|git|svn|file|data|javascript|vscode|chrome|about|slack|discord|matrix|irc|redis|mongodb|postgresql|mysql|kafka|nats|amqp|mqtt|custom|app|api|service)://"#;
14const MARKDOWN_LINK_PATTERN_STR: &str = r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#;
15const MARKDOWN_EMPTY_LINK_PATTERN_STR: &str = r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(\)"#;
16const MARKDOWN_EMPTY_REF_PATTERN_STR: &str = r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\[\]"#;
17const ANGLE_LINK_PATTERN_STR: &str =
18 r#"<((?:https?|ftps?)://(?:\[[0-9a-fA-F:]+(?:%[a-zA-Z0-9]+)?\]|[^>]+)|[^@\s]+@[^@\s]+\.[^@\s>]+)>"#;
19const BADGE_LINK_LINE_STR: &str = r#"^\s*\[!\[[^\]]*\]\([^)]*\)\]\([^)]*\)\s*$"#;
20const MARKDOWN_IMAGE_PATTERN_STR: &str = r#"!\s*\[([^\]]*)\]\s*\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#;
21const SIMPLE_URL_REGEX_STR: &str = r#"(https?|ftps?)://(?:\[[0-9a-fA-F:%.]+\](?::\d+)?|[^\s<>\[\]()\\'\"`\]]+)(?:/[^\s<>\[\]()\\'\"`]*)?(?:\?[^\s<>\[\]()\\'\"`]*)?(?:#[^\s<>\[\]()\\'\"`]*)?"#;
22const IPV6_URL_REGEX_STR: &str = r#"(https?|ftps?)://\[[0-9a-fA-F:%.\-a-zA-Z]+\](?::\d+)?(?:/[^\s<>\[\]()\\'\"`]*)?(?:\?[^\s<>\[\]()\\'\"`]*)?(?:#[^\s<>\[\]()\\'\"`]*)?"#;
23const REFERENCE_DEF_RE_STR: &str = r"^\s*\[[^\]]+\]:\s*(?:https?|ftps?)://\S+$";
24const HTML_TAG_PATTERN_STR: &str = r#"<[^>]*>"#;
25const MULTILINE_LINK_CONTINUATION_STR: &str = r#"^[^\[]*\]\(.*\)"#;
26
27#[derive(Default)]
29struct LineCheckBuffers {
30 markdown_link_ranges: Vec<(usize, usize)>,
31 image_ranges: Vec<(usize, usize)>,
32 urls_found: Vec<(usize, usize, String)>,
33}
34
35#[derive(Default, Clone)]
36pub struct MD034NoBareUrls;
37
38impl MD034NoBareUrls {
39 #[inline]
40 pub fn should_skip_content(&self, content: &str) -> bool {
41 let bytes = content.as_bytes();
44 !bytes.contains(&b':') && !bytes.contains(&b'@')
45 }
46
47 fn trim_trailing_punctuation<'a>(&self, url: &'a str) -> &'a str {
49 let mut trimmed = url;
50
51 let open_parens = url.chars().filter(|&c| c == '(').count();
53 let close_parens = url.chars().filter(|&c| c == ')').count();
54
55 if close_parens > open_parens {
56 let mut balance = 0;
58 let mut last_balanced_pos = url.len();
59
60 for (i, c) in url.chars().enumerate() {
61 if c == '(' {
62 balance += 1;
63 } else if c == ')' {
64 balance -= 1;
65 if balance < 0 {
66 last_balanced_pos = i;
68 break;
69 }
70 }
71 }
72
73 trimmed = &trimmed[..last_balanced_pos];
74 }
75
76 while let Some(last_char) = trimmed.chars().last() {
78 if matches!(last_char, '.' | ',' | ';' | ':' | '!' | '?') {
79 if last_char == ':' && trimmed.len() > 1 {
82 break;
84 }
85 trimmed = &trimmed[..trimmed.len() - 1];
86 } else {
87 break;
88 }
89 }
90
91 trimmed
92 }
93
94 fn is_reference_definition(&self, line: &str) -> bool {
96 get_cached_regex(REFERENCE_DEF_RE_STR)
97 .map(|re| re.is_match(line))
98 .unwrap_or(false)
99 }
100
101 fn is_in_html_tag(&self, line: &str, pos: usize) -> bool {
103 if let Ok(re) = get_cached_regex(HTML_TAG_PATTERN_STR) {
105 for mat in re.find_iter(line) {
106 if pos >= mat.start() && pos < mat.end() {
107 return true;
108 }
109 }
110 }
111 false
112 }
113
114 fn check_line(
115 &self,
116 line: &str,
117 ctx: &LintContext,
118 line_number: usize,
119 code_spans: &[crate::lint_context::CodeSpan],
120 buffers: &mut LineCheckBuffers,
121 line_index: &LineIndex,
122 ) -> Vec<LintWarning> {
123 let mut warnings = Vec::new();
124
125 if self.is_reference_definition(line) {
127 return warnings;
128 }
129
130 if ctx.line_info(line_number).is_some_and(|info| info.in_html_block) {
132 return warnings;
133 }
134
135 if let Ok(re) = get_cached_regex(MULTILINE_LINK_CONTINUATION_STR)
138 && re.is_match(line)
139 {
140 return warnings;
141 }
142
143 if let Ok(re) = get_cached_regex(URL_QUICK_CHECK_STR)
145 && !re.is_match(line)
146 && !line.contains('@')
147 {
148 return warnings;
149 }
150
151 buffers.markdown_link_ranges.clear();
153 if let Ok(re) = get_cached_regex(MARKDOWN_LINK_PATTERN_STR) {
154 for cap in re.captures_iter(line) {
155 if let Some(mat) = cap.get(0) {
156 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
157 }
158 }
159 }
160
161 if let Ok(re) = get_cached_regex(MARKDOWN_EMPTY_LINK_PATTERN_STR) {
163 for mat in re.find_iter(line) {
164 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
165 }
166 }
167
168 if let Ok(re) = get_cached_regex(MARKDOWN_EMPTY_REF_PATTERN_STR) {
169 for mat in re.find_iter(line) {
170 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
171 }
172 }
173
174 if let Ok(re) = get_cached_regex(ANGLE_LINK_PATTERN_STR) {
175 for cap in re.captures_iter(line) {
176 if let Some(mat) = cap.get(0) {
177 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
178 }
179 }
180 }
181
182 buffers.image_ranges.clear();
184 if let Ok(re) = get_cached_regex(MARKDOWN_IMAGE_PATTERN_STR) {
185 for cap in re.captures_iter(line) {
186 if let Some(mat) = cap.get(0) {
187 buffers.image_ranges.push((mat.start(), mat.end()));
188 }
189 }
190 }
191
192 let is_badge_line = get_cached_regex(BADGE_LINK_LINE_STR)
194 .map(|re| re.is_match(line))
195 .unwrap_or(false);
196
197 if is_badge_line {
198 return warnings;
199 }
200
201 buffers.urls_found.clear();
203
204 if let Ok(re) = get_cached_regex(IPV6_URL_REGEX_STR) {
206 for mat in re.find_iter(line) {
207 let url_str = mat.as_str();
208 buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
209 }
210 }
211
212 if let Ok(re) = get_cached_regex(SIMPLE_URL_REGEX_STR) {
214 for mat in re.find_iter(line) {
215 let url_str = mat.as_str();
216
217 if url_str.contains("://[") {
219 continue;
220 }
221
222 if let Some(host_start) = url_str.find("://") {
225 let after_protocol = &url_str[host_start + 3..];
226 if after_protocol.contains("::") || after_protocol.chars().filter(|&c| c == ':').count() > 1 {
228 if let Some(char_after) = line.chars().nth(mat.end())
230 && char_after == ']'
231 {
232 continue;
234 }
235 }
236 }
237
238 buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
239 }
240 }
241
242 for &(start, end, ref url_str) in buffers.urls_found.iter() {
244 if get_cached_regex(CUSTOM_PROTOCOL_PATTERN_STR)
246 .map(|re| re.is_match(url_str))
247 .unwrap_or(false)
248 {
249 continue;
250 }
251
252 let mut is_inside_construct = false;
254 for &(link_start, link_end) in buffers.markdown_link_ranges.iter() {
255 if start >= link_start && end <= link_end {
256 is_inside_construct = true;
257 break;
258 }
259 }
260
261 for &(img_start, img_end) in buffers.image_ranges.iter() {
262 if start >= img_start && end <= img_end {
263 is_inside_construct = true;
264 break;
265 }
266 }
267
268 if is_inside_construct {
269 continue;
270 }
271
272 if self.is_in_html_tag(line, start) {
274 continue;
275 }
276
277 let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
279 let absolute_pos = line_start_byte + start;
280 if ctx.is_in_html_comment(absolute_pos) {
281 continue;
282 }
283
284 let trimmed_url = self.trim_trailing_punctuation(url_str);
286
287 if !trimmed_url.is_empty() && trimmed_url != "//" {
289 let trimmed_len = trimmed_url.len();
290 let (start_line, start_col, end_line, end_col) =
291 calculate_url_range(line_number, line, start, trimmed_len);
292
293 warnings.push(LintWarning {
294 rule_name: Some("MD034".to_string()),
295 line: start_line,
296 column: start_col,
297 end_line,
298 end_column: end_col,
299 message: format!("URL without angle brackets or link formatting: '{trimmed_url}'"),
300 severity: Severity::Warning,
301 fix: Some(Fix {
302 range: {
303 let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
304 (line_start_byte + start)..(line_start_byte + start + trimmed_len)
305 },
306 replacement: format!("<{trimmed_url}>"),
307 }),
308 });
309 }
310 }
311
312 for cap in EMAIL_PATTERN.captures_iter(line) {
314 if let Some(mat) = cap.get(0) {
315 let email = mat.as_str();
316 let start = mat.start();
317 let end = mat.end();
318
319 let mut is_inside_construct = false;
321 for &(link_start, link_end) in buffers.markdown_link_ranges.iter() {
322 if start >= link_start && end <= link_end {
323 is_inside_construct = true;
324 break;
325 }
326 }
327
328 if !is_inside_construct {
329 if self.is_in_html_tag(line, start) {
331 continue;
332 }
333
334 let is_in_code_span = code_spans
336 .iter()
337 .any(|span| span.line == line_number && start >= span.start_col && start < span.end_col);
338
339 if !is_in_code_span {
340 let email_len = end - start;
341 let (start_line, start_col, end_line, end_col) =
342 calculate_url_range(line_number, line, start, email_len);
343
344 warnings.push(LintWarning {
345 rule_name: Some("MD034".to_string()),
346 line: start_line,
347 column: start_col,
348 end_line,
349 end_column: end_col,
350 message: format!("Email address without angle brackets or link formatting: '{email}'"),
351 severity: Severity::Warning,
352 fix: Some(Fix {
353 range: {
354 let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
355 (line_start_byte + start)..(line_start_byte + end)
356 },
357 replacement: format!("<{email}>"),
358 }),
359 });
360 }
361 }
362 }
363 }
364
365 warnings
366 }
367}
368
369impl Rule for MD034NoBareUrls {
370 #[inline]
371 fn name(&self) -> &'static str {
372 "MD034"
373 }
374
375 fn as_any(&self) -> &dyn std::any::Any {
376 self
377 }
378
379 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
380 where
381 Self: Sized,
382 {
383 Box::new(MD034NoBareUrls)
384 }
385
386 #[inline]
387 fn category(&self) -> RuleCategory {
388 RuleCategory::Link
389 }
390
391 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
392 !ctx.likely_has_links_or_images() && self.should_skip_content(ctx.content)
393 }
394
395 #[inline]
396 fn description(&self) -> &'static str {
397 "No bare URLs - wrap URLs in angle brackets"
398 }
399
400 fn check(&self, ctx: &LintContext) -> LintResult {
401 let mut warnings = Vec::new();
402 let content = ctx.content;
403
404 if self.should_skip_content(content) {
406 return Ok(warnings);
407 }
408
409 let line_index = &ctx.line_index;
411
412 let code_spans = ctx.code_spans();
414
415 let mut buffers = LineCheckBuffers::default();
417
418 for line in ctx.filtered_lines().skip_front_matter().skip_code_blocks() {
421 let mut line_warnings =
422 self.check_line(line.content, ctx, line.line_num, &code_spans, &mut buffers, line_index);
423
424 line_warnings.retain(|warning| {
426 !code_spans.iter().any(|span| {
428 span.line == warning.line &&
429 warning.column > 0 && (warning.column - 1) >= span.start_col &&
431 (warning.column - 1) < span.end_col
432 })
433 });
434
435 line_warnings.retain(|warning| {
439 if let Some(fix) = &warning.fix {
440 !ctx.links
442 .iter()
443 .any(|link| fix.range.start >= link.byte_offset && fix.range.end <= link.byte_end)
444 } else {
445 true
446 }
447 });
448
449 warnings.extend(line_warnings);
450 }
451
452 Ok(warnings)
453 }
454
455 fn fix(&self, ctx: &LintContext) -> Result<String, LintError> {
456 let mut content = ctx.content.to_string();
457 let mut warnings = self.check(ctx)?;
458
459 warnings.sort_by_key(|w| w.fix.as_ref().map(|f| f.range.start).unwrap_or(0));
461
462 for warning in warnings.iter().rev() {
464 if let Some(fix) = &warning.fix {
465 let start = fix.range.start;
466 let end = fix.range.end;
467 content.replace_range(start..end, &fix.replacement);
468 }
469 }
470
471 Ok(content)
472 }
473}