rumdl_lib/rules/
md034_no_bare_urls.rs1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::utils::range_utils::calculate_url_range;
6use crate::utils::regex_cache::{EMAIL_PATTERN, get_cached_regex};
7
8use crate::lint_context::LintContext;
9
10const URL_QUICK_CHECK_STR: &str = r#"(?:https?|ftps?)://|@"#;
12const CUSTOM_PROTOCOL_PATTERN_STR: &str = r#"(?:grpc|ws|wss|ssh|git|svn|file|data|javascript|vscode|chrome|about|slack|discord|matrix|irc|redis|mongodb|postgresql|mysql|kafka|nats|amqp|mqtt|custom|app|api|service)://"#;
13const MARKDOWN_LINK_PATTERN_STR: &str = r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#;
14const ANGLE_LINK_PATTERN_STR: &str =
15 r#"<((?:https?|ftps?)://(?:\[[0-9a-fA-F:]+(?:%[a-zA-Z0-9]+)?\]|[^>]+)|[^@\s]+@[^@\s]+\.[^@\s>]+)>"#;
16const BADGE_LINK_LINE_STR: &str = r#"^\s*\[!\[[^\]]*\]\([^)]*\)\]\([^)]*\)\s*$"#;
17const MARKDOWN_IMAGE_PATTERN_STR: &str = r#"!\s*\[([^\]]*)\]\s*\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#;
18const SIMPLE_URL_REGEX_STR: &str = r#"(https?|ftps?)://(?:\[[0-9a-fA-F:%.]+\](?::\d+)?|[^\s<>\[\]()\\'\"`\]]+)(?:/[^\s<>\[\]()\\'\"`]*)?(?:\?[^\s<>\[\]()\\'\"`]*)?(?:#[^\s<>\[\]()\\'\"`]*)?"#;
19const IPV6_URL_REGEX_STR: &str = r#"(https?|ftps?)://\[[0-9a-fA-F:%.\-a-zA-Z]+\](?::\d+)?(?:/[^\s<>\[\]()\\'\"`]*)?(?:\?[^\s<>\[\]()\\'\"`]*)?(?:#[^\s<>\[\]()\\'\"`]*)?"#;
20const REFERENCE_DEF_RE_STR: &str = r"^\s*\[[^\]]+\]:\s*(?:https?|ftps?)://\S+$";
21const HTML_COMMENT_PATTERN_STR: &str = r#"<!--[\s\S]*?-->"#;
22const HTML_TAG_PATTERN_STR: &str = r#"<[^>]*>"#;
23const MULTILINE_LINK_CONTINUATION_STR: &str = r#"^[^\[]*\]\(.*\)"#;
24
25#[derive(Default, Clone)]
26pub struct MD034NoBareUrls;
27
28impl MD034NoBareUrls {
29 #[inline]
30 pub fn should_skip(&self, content: &str) -> bool {
31 let bytes = content.as_bytes();
34 !bytes.contains(&b':') && !bytes.contains(&b'@')
35 }
36
37 fn trim_trailing_punctuation<'a>(&self, url: &'a str) -> &'a str {
39 let mut trimmed = url;
40
41 let open_parens = url.chars().filter(|&c| c == '(').count();
43 let close_parens = url.chars().filter(|&c| c == ')').count();
44
45 if close_parens > open_parens {
46 let mut balance = 0;
48 let mut last_balanced_pos = url.len();
49
50 for (i, c) in url.chars().enumerate() {
51 if c == '(' {
52 balance += 1;
53 } else if c == ')' {
54 balance -= 1;
55 if balance < 0 {
56 last_balanced_pos = i;
58 break;
59 }
60 }
61 }
62
63 trimmed = &trimmed[..last_balanced_pos];
64 }
65
66 while let Some(last_char) = trimmed.chars().last() {
68 if matches!(last_char, '.' | ',' | ';' | ':' | '!' | '?') {
69 if last_char == ':' && trimmed.len() > 1 {
72 break;
74 }
75 trimmed = &trimmed[..trimmed.len() - 1];
76 } else {
77 break;
78 }
79 }
80
81 trimmed
82 }
83
84 fn is_reference_definition(&self, line: &str) -> bool {
86 get_cached_regex(REFERENCE_DEF_RE_STR)
87 .map(|re| re.is_match(line))
88 .unwrap_or(false)
89 }
90
91 fn is_in_html_comment(&self, content: &str, pos: usize) -> bool {
93 if let Ok(re) = get_cached_regex(HTML_COMMENT_PATTERN_STR) {
95 for mat in re.find_iter(content) {
96 if pos >= mat.start() && pos < mat.end() {
97 return true;
98 }
99 }
100 }
101 false
102 }
103
104 fn is_in_html_tag(&self, line: &str, pos: usize) -> bool {
106 if let Ok(re) = get_cached_regex(HTML_TAG_PATTERN_STR) {
108 for mat in re.find_iter(line) {
109 if pos >= mat.start() && pos < mat.end() {
110 return true;
111 }
112 }
113 }
114 false
115 }
116
117 fn check_line(
118 &self,
119 line: &str,
120 content: &str,
121 line_number: usize,
122 code_spans: &[crate::lint_context::CodeSpan],
123 ) -> Vec<LintWarning> {
124 let mut warnings = Vec::new();
125
126 if self.is_reference_definition(line) {
128 return warnings;
129 }
130
131 if let Ok(re) = get_cached_regex(MULTILINE_LINK_CONTINUATION_STR)
134 && re.is_match(line)
135 {
136 return warnings;
137 }
138
139 if let Ok(re) = get_cached_regex(URL_QUICK_CHECK_STR)
141 && !re.is_match(line)
142 && !line.contains('@')
143 {
144 return warnings;
145 }
146
147 let mut markdown_link_ranges = Vec::new();
149 if let Ok(re) = get_cached_regex(MARKDOWN_LINK_PATTERN_STR) {
150 for cap in re.captures_iter(line) {
151 if let Some(mat) = cap.get(0) {
152 markdown_link_ranges.push((mat.start(), mat.end()));
153 }
154 }
155 }
156
157 if let Ok(re) = get_cached_regex(ANGLE_LINK_PATTERN_STR) {
158 for cap in re.captures_iter(line) {
159 if let Some(mat) = cap.get(0) {
160 markdown_link_ranges.push((mat.start(), mat.end()));
161 }
162 }
163 }
164
165 let mut image_ranges = Vec::new();
167 if let Ok(re) = get_cached_regex(MARKDOWN_IMAGE_PATTERN_STR) {
168 for cap in re.captures_iter(line) {
169 if let Some(mat) = cap.get(0) {
170 image_ranges.push((mat.start(), mat.end()));
171 }
172 }
173 }
174
175 let is_badge_line = get_cached_regex(BADGE_LINK_LINE_STR)
177 .map(|re| re.is_match(line))
178 .unwrap_or(false);
179
180 if is_badge_line {
181 return warnings;
182 }
183
184 let mut urls_found = Vec::new();
186
187 if let Ok(re) = get_cached_regex(IPV6_URL_REGEX_STR) {
189 for mat in re.find_iter(line) {
190 let url_str = mat.as_str();
191 urls_found.push((mat.start(), mat.end(), url_str.to_string()));
192 }
193 }
194
195 if let Ok(re) = get_cached_regex(SIMPLE_URL_REGEX_STR) {
197 for mat in re.find_iter(line) {
198 let url_str = mat.as_str();
199
200 if url_str.contains("://[") {
202 continue;
203 }
204
205 if let Some(host_start) = url_str.find("://") {
208 let after_protocol = &url_str[host_start + 3..];
209 if after_protocol.contains("::") || after_protocol.chars().filter(|&c| c == ':').count() > 1 {
211 if let Some(char_after) = line.chars().nth(mat.end())
213 && char_after == ']'
214 {
215 continue;
217 }
218 }
219 }
220
221 urls_found.push((mat.start(), mat.end(), url_str.to_string()));
222 }
223 }
224
225 for (start, end, url_str) in urls_found {
227 if get_cached_regex(CUSTOM_PROTOCOL_PATTERN_STR)
229 .map(|re| re.is_match(&url_str))
230 .unwrap_or(false)
231 {
232 continue;
233 }
234
235 let mut is_inside_construct = false;
237 for &(link_start, link_end) in &markdown_link_ranges {
238 if start >= link_start && end <= link_end {
239 is_inside_construct = true;
240 break;
241 }
242 }
243
244 for &(img_start, img_end) in &image_ranges {
245 if start >= img_start && end <= img_end {
246 is_inside_construct = true;
247 break;
248 }
249 }
250
251 if is_inside_construct {
252 continue;
253 }
254
255 if self.is_in_html_tag(line, start) {
257 continue;
258 }
259
260 let absolute_pos = content
262 .lines()
263 .take(line_number - 1)
264 .map(|l| l.len() + 1)
265 .sum::<usize>()
266 + start;
267 if self.is_in_html_comment(content, absolute_pos) {
268 continue;
269 }
270
271 let trimmed_url = self.trim_trailing_punctuation(&url_str);
273
274 if !trimmed_url.is_empty() && trimmed_url != "//" {
276 let trimmed_len = trimmed_url.len();
277 let (start_line, start_col, end_line, end_col) =
278 calculate_url_range(line_number, line, start, trimmed_len);
279
280 warnings.push(LintWarning {
281 rule_name: Some("MD034"),
282 line: start_line,
283 column: start_col,
284 end_line,
285 end_column: end_col,
286 message: format!("URL without angle brackets or link formatting: '{trimmed_url}'"),
287 severity: Severity::Warning,
288 fix: Some(Fix {
289 range: {
290 let line_start_byte = content
291 .lines()
292 .take(line_number - 1)
293 .map(|l| l.len() + 1)
294 .sum::<usize>();
295 (line_start_byte + start)..(line_start_byte + start + trimmed_len)
296 },
297 replacement: format!("<{trimmed_url}>"),
298 }),
299 });
300 }
301 }
302
303 for cap in EMAIL_PATTERN.captures_iter(line) {
305 if let Some(mat) = cap.get(0) {
306 let email = mat.as_str();
307 let start = mat.start();
308 let end = mat.end();
309
310 let mut is_inside_construct = false;
312 for &(link_start, link_end) in &markdown_link_ranges {
313 if start >= link_start && end <= link_end {
314 is_inside_construct = true;
315 break;
316 }
317 }
318
319 if !is_inside_construct {
320 if self.is_in_html_tag(line, start) {
322 continue;
323 }
324
325 let is_in_code_span = code_spans
327 .iter()
328 .any(|span| span.line == line_number && start >= span.start_col && start < span.end_col);
329
330 if !is_in_code_span {
331 let email_len = end - start;
332 let (start_line, start_col, end_line, end_col) =
333 calculate_url_range(line_number, line, start, email_len);
334
335 warnings.push(LintWarning {
336 rule_name: Some("MD034"),
337 line: start_line,
338 column: start_col,
339 end_line,
340 end_column: end_col,
341 message: format!("Email address without angle brackets or link formatting: '{email}'"),
342 severity: Severity::Warning,
343 fix: Some(Fix {
344 range: {
345 let line_start_byte = content
346 .lines()
347 .take(line_number - 1)
348 .map(|l| l.len() + 1)
349 .sum::<usize>();
350 (line_start_byte + start)..(line_start_byte + end)
351 },
352 replacement: format!("<{email}>"),
353 }),
354 });
355 }
356 }
357 }
358 }
359
360 warnings
361 }
362}
363
364impl Rule for MD034NoBareUrls {
365 #[inline]
366 fn name(&self) -> &'static str {
367 "MD034"
368 }
369
370 fn as_any(&self) -> &dyn std::any::Any {
371 self
372 }
373
374 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
375 where
376 Self: Sized,
377 {
378 Box::new(MD034NoBareUrls)
379 }
380
381 #[inline]
382 fn category(&self) -> RuleCategory {
383 RuleCategory::Link
384 }
385
386 #[inline]
387 fn description(&self) -> &'static str {
388 "No bare URLs - wrap URLs in angle brackets"
389 }
390
391 fn check(&self, ctx: &LintContext) -> LintResult {
392 let mut warnings = Vec::new();
393 let content = ctx.content;
394
395 if self.should_skip(content) {
397 return Ok(warnings);
398 }
399
400 let code_spans = ctx.code_spans();
402
403 for (line_num, line) in content.lines().enumerate() {
405 if ctx.is_in_code_block(line_num + 1) {
407 continue;
408 }
409
410 let mut line_warnings = self.check_line(line, content, line_num + 1, &code_spans);
411
412 line_warnings.retain(|warning| {
414 !code_spans.iter().any(|span| {
416 span.line == warning.line &&
417 warning.column > 0 && (warning.column - 1) >= span.start_col &&
419 (warning.column - 1) < span.end_col
420 })
421 });
422
423 warnings.extend(line_warnings);
424 }
425
426 Ok(warnings)
427 }
428
429 fn fix(&self, ctx: &LintContext) -> Result<String, LintError> {
430 let mut content = ctx.content.to_string();
431 let mut warnings = self.check(ctx)?;
432
433 warnings.sort_by_key(|w| w.fix.as_ref().map(|f| f.range.start).unwrap_or(0));
435
436 for warning in warnings.iter().rev() {
438 if let Some(fix) = &warning.fix {
439 let start = fix.range.start;
440 let end = fix.range.end;
441 content.replace_range(start..end, &fix.replacement);
442 }
443 }
444
445 Ok(content)
446 }
447}