1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::utils::range_utils::{LineIndex, calculate_url_range};
6use crate::utils::regex_cache::{EMAIL_PATTERN, get_cached_regex};
7
8use crate::filtered_lines::FilteredLinesExt;
9use crate::lint_context::LintContext;
10
11const URL_QUICK_CHECK_STR: &str = r#"(?:https?|ftps?)://|@"#;
13const CUSTOM_PROTOCOL_PATTERN_STR: &str = r#"(?:grpc|ws|wss|ssh|git|svn|file|data|javascript|vscode|chrome|about|slack|discord|matrix|irc|redis|mongodb|postgresql|mysql|kafka|nats|amqp|mqtt|custom|app|api|service)://"#;
14const MARKDOWN_LINK_PATTERN_STR: &str = r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#;
15const MARKDOWN_EMPTY_LINK_PATTERN_STR: &str = r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(\)"#;
16const MARKDOWN_EMPTY_REF_PATTERN_STR: &str = r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\[\]"#;
17const ANGLE_LINK_PATTERN_STR: &str =
18 r#"<((?:https?|ftps?)://(?:\[[0-9a-fA-F:]+(?:%[a-zA-Z0-9]+)?\]|[^>]+)|[^@\s]+@[^@\s]+\.[^@\s>]+)>"#;
19const BADGE_LINK_LINE_STR: &str = r#"^\s*\[!\[[^\]]*\]\([^)]*\)\]\([^)]*\)\s*$"#;
20const MARKDOWN_IMAGE_PATTERN_STR: &str = r#"!\s*\[([^\]]*)\]\s*\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#;
21const SIMPLE_URL_REGEX_STR: &str = r#"(https?|ftps?)://(?:\[[0-9a-fA-F:%.]+\](?::\d+)?|[^\s<>\[\]()\\'\"`\]]+)(?:/[^\s<>\[\]()\\'\"`]*)?(?:\?[^\s<>\[\]()\\'\"`]*)?(?:#[^\s<>\[\]()\\'\"`]*)?"#;
22const IPV6_URL_REGEX_STR: &str = r#"(https?|ftps?)://\[[0-9a-fA-F:%.\-a-zA-Z]+\](?::\d+)?(?:/[^\s<>\[\]()\\'\"`]*)?(?:\?[^\s<>\[\]()\\'\"`]*)?(?:#[^\s<>\[\]()\\'\"`]*)?"#;
23const REFERENCE_DEF_RE_STR: &str = r"^\s*\[[^\]]+\]:\s*(?:https?|ftps?)://\S+$";
24const HTML_COMMENT_PATTERN_STR: &str = r#"<!--[\s\S]*?-->"#;
25const HTML_TAG_PATTERN_STR: &str = r#"<[^>]*>"#;
26const MULTILINE_LINK_CONTINUATION_STR: &str = r#"^[^\[]*\]\(.*\)"#;
27
28#[derive(Default)]
30struct LineCheckBuffers {
31 markdown_link_ranges: Vec<(usize, usize)>,
32 image_ranges: Vec<(usize, usize)>,
33 urls_found: Vec<(usize, usize, String)>,
34}
35
36#[derive(Default, Clone)]
37pub struct MD034NoBareUrls;
38
39impl MD034NoBareUrls {
40 #[inline]
41 pub fn should_skip_content(&self, content: &str) -> bool {
42 let bytes = content.as_bytes();
45 !bytes.contains(&b':') && !bytes.contains(&b'@')
46 }
47
48 fn trim_trailing_punctuation<'a>(&self, url: &'a str) -> &'a str {
50 let mut trimmed = url;
51
52 let open_parens = url.chars().filter(|&c| c == '(').count();
54 let close_parens = url.chars().filter(|&c| c == ')').count();
55
56 if close_parens > open_parens {
57 let mut balance = 0;
59 let mut last_balanced_pos = url.len();
60
61 for (i, c) in url.chars().enumerate() {
62 if c == '(' {
63 balance += 1;
64 } else if c == ')' {
65 balance -= 1;
66 if balance < 0 {
67 last_balanced_pos = i;
69 break;
70 }
71 }
72 }
73
74 trimmed = &trimmed[..last_balanced_pos];
75 }
76
77 while let Some(last_char) = trimmed.chars().last() {
79 if matches!(last_char, '.' | ',' | ';' | ':' | '!' | '?') {
80 if last_char == ':' && trimmed.len() > 1 {
83 break;
85 }
86 trimmed = &trimmed[..trimmed.len() - 1];
87 } else {
88 break;
89 }
90 }
91
92 trimmed
93 }
94
95 fn is_reference_definition(&self, line: &str) -> bool {
97 get_cached_regex(REFERENCE_DEF_RE_STR)
98 .map(|re| re.is_match(line))
99 .unwrap_or(false)
100 }
101
102 fn is_in_html_comment(&self, content: &str, pos: usize) -> bool {
104 if let Ok(re) = get_cached_regex(HTML_COMMENT_PATTERN_STR) {
106 for mat in re.find_iter(content) {
107 if pos >= mat.start() && pos < mat.end() {
108 return true;
109 }
110 }
111 }
112 false
113 }
114
115 fn is_in_html_tag(&self, line: &str, pos: usize) -> bool {
117 if let Ok(re) = get_cached_regex(HTML_TAG_PATTERN_STR) {
119 for mat in re.find_iter(line) {
120 if pos >= mat.start() && pos < mat.end() {
121 return true;
122 }
123 }
124 }
125 false
126 }
127
128 fn check_line(
129 &self,
130 line: &str,
131 content: &str,
132 line_number: usize,
133 code_spans: &[crate::lint_context::CodeSpan],
134 buffers: &mut LineCheckBuffers,
135 line_index: &LineIndex,
136 ) -> Vec<LintWarning> {
137 let mut warnings = Vec::new();
138
139 if self.is_reference_definition(line) {
141 return warnings;
142 }
143
144 if let Ok(re) = get_cached_regex(MULTILINE_LINK_CONTINUATION_STR)
147 && re.is_match(line)
148 {
149 return warnings;
150 }
151
152 if let Ok(re) = get_cached_regex(URL_QUICK_CHECK_STR)
154 && !re.is_match(line)
155 && !line.contains('@')
156 {
157 return warnings;
158 }
159
160 buffers.markdown_link_ranges.clear();
162 if let Ok(re) = get_cached_regex(MARKDOWN_LINK_PATTERN_STR) {
163 for cap in re.captures_iter(line) {
164 if let Some(mat) = cap.get(0) {
165 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
166 }
167 }
168 }
169
170 if let Ok(re) = get_cached_regex(MARKDOWN_EMPTY_LINK_PATTERN_STR) {
172 for mat in re.find_iter(line) {
173 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
174 }
175 }
176
177 if let Ok(re) = get_cached_regex(MARKDOWN_EMPTY_REF_PATTERN_STR) {
178 for mat in re.find_iter(line) {
179 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
180 }
181 }
182
183 if let Ok(re) = get_cached_regex(ANGLE_LINK_PATTERN_STR) {
184 for cap in re.captures_iter(line) {
185 if let Some(mat) = cap.get(0) {
186 buffers.markdown_link_ranges.push((mat.start(), mat.end()));
187 }
188 }
189 }
190
191 buffers.image_ranges.clear();
193 if let Ok(re) = get_cached_regex(MARKDOWN_IMAGE_PATTERN_STR) {
194 for cap in re.captures_iter(line) {
195 if let Some(mat) = cap.get(0) {
196 buffers.image_ranges.push((mat.start(), mat.end()));
197 }
198 }
199 }
200
201 let is_badge_line = get_cached_regex(BADGE_LINK_LINE_STR)
203 .map(|re| re.is_match(line))
204 .unwrap_or(false);
205
206 if is_badge_line {
207 return warnings;
208 }
209
210 buffers.urls_found.clear();
212
213 if let Ok(re) = get_cached_regex(IPV6_URL_REGEX_STR) {
215 for mat in re.find_iter(line) {
216 let url_str = mat.as_str();
217 buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
218 }
219 }
220
221 if let Ok(re) = get_cached_regex(SIMPLE_URL_REGEX_STR) {
223 for mat in re.find_iter(line) {
224 let url_str = mat.as_str();
225
226 if url_str.contains("://[") {
228 continue;
229 }
230
231 if let Some(host_start) = url_str.find("://") {
234 let after_protocol = &url_str[host_start + 3..];
235 if after_protocol.contains("::") || after_protocol.chars().filter(|&c| c == ':').count() > 1 {
237 if let Some(char_after) = line.chars().nth(mat.end())
239 && char_after == ']'
240 {
241 continue;
243 }
244 }
245 }
246
247 buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
248 }
249 }
250
251 for &(start, end, ref url_str) in buffers.urls_found.iter() {
253 if get_cached_regex(CUSTOM_PROTOCOL_PATTERN_STR)
255 .map(|re| re.is_match(url_str))
256 .unwrap_or(false)
257 {
258 continue;
259 }
260
261 let mut is_inside_construct = false;
263 for &(link_start, link_end) in buffers.markdown_link_ranges.iter() {
264 if start >= link_start && end <= link_end {
265 is_inside_construct = true;
266 break;
267 }
268 }
269
270 for &(img_start, img_end) in buffers.image_ranges.iter() {
271 if start >= img_start && end <= img_end {
272 is_inside_construct = true;
273 break;
274 }
275 }
276
277 if is_inside_construct {
278 continue;
279 }
280
281 if self.is_in_html_tag(line, start) {
283 continue;
284 }
285
286 let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
288 let absolute_pos = line_start_byte + start;
289 if self.is_in_html_comment(content, absolute_pos) {
290 continue;
291 }
292
293 let trimmed_url = self.trim_trailing_punctuation(url_str);
295
296 if !trimmed_url.is_empty() && trimmed_url != "//" {
298 let trimmed_len = trimmed_url.len();
299 let (start_line, start_col, end_line, end_col) =
300 calculate_url_range(line_number, line, start, trimmed_len);
301
302 warnings.push(LintWarning {
303 rule_name: Some("MD034".to_string()),
304 line: start_line,
305 column: start_col,
306 end_line,
307 end_column: end_col,
308 message: format!("URL without angle brackets or link formatting: '{trimmed_url}'"),
309 severity: Severity::Warning,
310 fix: Some(Fix {
311 range: {
312 let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
313 (line_start_byte + start)..(line_start_byte + start + trimmed_len)
314 },
315 replacement: format!("<{trimmed_url}>"),
316 }),
317 });
318 }
319 }
320
321 for cap in EMAIL_PATTERN.captures_iter(line) {
323 if let Some(mat) = cap.get(0) {
324 let email = mat.as_str();
325 let start = mat.start();
326 let end = mat.end();
327
328 let mut is_inside_construct = false;
330 for &(link_start, link_end) in buffers.markdown_link_ranges.iter() {
331 if start >= link_start && end <= link_end {
332 is_inside_construct = true;
333 break;
334 }
335 }
336
337 if !is_inside_construct {
338 if self.is_in_html_tag(line, start) {
340 continue;
341 }
342
343 let is_in_code_span = code_spans
345 .iter()
346 .any(|span| span.line == line_number && start >= span.start_col && start < span.end_col);
347
348 if !is_in_code_span {
349 let email_len = end - start;
350 let (start_line, start_col, end_line, end_col) =
351 calculate_url_range(line_number, line, start, email_len);
352
353 warnings.push(LintWarning {
354 rule_name: Some("MD034".to_string()),
355 line: start_line,
356 column: start_col,
357 end_line,
358 end_column: end_col,
359 message: format!("Email address without angle brackets or link formatting: '{email}'"),
360 severity: Severity::Warning,
361 fix: Some(Fix {
362 range: {
363 let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
364 (line_start_byte + start)..(line_start_byte + end)
365 },
366 replacement: format!("<{email}>"),
367 }),
368 });
369 }
370 }
371 }
372 }
373
374 warnings
375 }
376}
377
378impl Rule for MD034NoBareUrls {
379 #[inline]
380 fn name(&self) -> &'static str {
381 "MD034"
382 }
383
384 fn as_any(&self) -> &dyn std::any::Any {
385 self
386 }
387
388 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
389 where
390 Self: Sized,
391 {
392 Box::new(MD034NoBareUrls)
393 }
394
395 #[inline]
396 fn category(&self) -> RuleCategory {
397 RuleCategory::Link
398 }
399
400 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
401 !ctx.likely_has_links_or_images() && self.should_skip_content(ctx.content)
402 }
403
404 #[inline]
405 fn description(&self) -> &'static str {
406 "No bare URLs - wrap URLs in angle brackets"
407 }
408
409 fn check(&self, ctx: &LintContext) -> LintResult {
410 let mut warnings = Vec::new();
411 let content = ctx.content;
412
413 if self.should_skip_content(content) {
415 return Ok(warnings);
416 }
417
418 let line_index = LineIndex::new(content.to_string());
420
421 let code_spans = ctx.code_spans();
423
424 let mut buffers = LineCheckBuffers::default();
426
427 for line in ctx.filtered_lines().skip_front_matter().skip_code_blocks() {
430 let mut line_warnings = self.check_line(
431 line.content,
432 content,
433 line.line_num,
434 &code_spans,
435 &mut buffers,
436 &line_index,
437 );
438
439 line_warnings.retain(|warning| {
441 !code_spans.iter().any(|span| {
443 span.line == warning.line &&
444 warning.column > 0 && (warning.column - 1) >= span.start_col &&
446 (warning.column - 1) < span.end_col
447 })
448 });
449
450 warnings.extend(line_warnings);
451 }
452
453 Ok(warnings)
454 }
455
456 fn fix(&self, ctx: &LintContext) -> Result<String, LintError> {
457 let mut content = ctx.content.to_string();
458 let mut warnings = self.check(ctx)?;
459
460 warnings.sort_by_key(|w| w.fix.as_ref().map(|f| f.range.start).unwrap_or(0));
462
463 for warning in warnings.iter().rev() {
465 if let Some(fix) = &warning.fix {
466 let start = fix.range.start;
467 let end = fix.range.end;
468 content.replace_range(start..end, &fix.replacement);
469 }
470 }
471
472 Ok(content)
473 }
474}