rumdl_lib/rules/
md034_no_bare_urls.rs

1/// Rule MD034: No unformatted URLs
2///
3/// See [docs/md034.md](../../docs/md034.md) for full documentation, configuration, and examples.
4use crate::rule::{
5    AstExtensions, Fix, LintError, LintResult, LintWarning, MarkdownAst, MaybeAst, Rule, RuleCategory, Severity,
6};
7use crate::utils::range_utils::calculate_url_range;
8use crate::utils::regex_cache::EMAIL_PATTERN;
9
10use crate::lint_context::LintContext;
11use fancy_regex::Regex as FancyRegex;
12use lazy_static::lazy_static;
13use markdown::mdast::Node;
14use regex::Regex;
15
16lazy_static! {
17    // Simple pattern to quickly check if a line might contain a URL or email
18    static ref URL_QUICK_CHECK: Regex = Regex::new(r#"(?:https?|ftps?)://|@"#).unwrap();
19
20    // Use fancy-regex for look-behind/look-ahead
21    // Updated to support IPv6 addresses in square brackets
22    static ref URL_REGEX: FancyRegex = FancyRegex::new(r#"(?<![\w\[\(\<])((?:https?|ftps?)://(?:\[[0-9a-fA-F:%]+\]|[^\s<>\[\]()\\'\"]+)(?::\d+)?(?:/[^\s<>\[\]()\\'\"]*)?(?:\?[^\s<>\[\]()\\'\"]*)?(?:#[^\s<>\[\]()\\'\"]*)?)"#).unwrap();
23    static ref URL_FIX_REGEX: FancyRegex = FancyRegex::new(r#"(?<![\w\[\(\<])((?:https?|ftps?)://(?:\[[0-9a-fA-F:%]+\]|[^\s<>\[\]()\\'\"]+)(?::\d+)?(?:/[^\s<>\[\]()\\'\"]*)?(?:\?[^\s<>\[\]()\\'\"]*)?(?:#[^\s<>\[\]()\\'\"]*)?)"#).unwrap();
24
25    // Pattern to detect custom protocol patterns that shouldn't be flagged
26    // These are commonly used in documentation but aren't actual browsable URLs
27    static ref CUSTOM_PROTOCOL_PATTERN: Regex = Regex::new(r#"(?:grpc|ws|wss|ssh|git|svn|file|data|javascript|vscode|chrome|about|slack|discord|matrix|irc|redis|mongodb|postgresql|mysql|kafka|nats|amqp|mqtt|custom|app|api|service)://"#).unwrap();
28
29    // Pattern to match markdown link format - capture destination in Group 1
30    // Updated to handle nested brackets in badge links like [![badge](img)](link)
31    static ref MARKDOWN_LINK_PATTERN: Regex = Regex::new(r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#).unwrap();
32
33    // Pattern to match angle bracket link format (URLs and emails)
34    // Updated to support IPv6 addresses
35    static ref ANGLE_LINK_PATTERN: Regex = Regex::new(r#"<((?:https?|ftps?)://(?:\[[0-9a-fA-F:]+(?:%[a-zA-Z0-9]+)?\]|[^>]+)|[^@\s]+@[^@\s]+\.[^@\s>]+)>"#).unwrap();
36
37    // Add regex to identify lines containing only a badge link
38    static ref BADGE_LINK_LINE: Regex = Regex::new(r#"^\s*\[!\[[^\]]*\]\([^)]*\)\]\([^)]*\)\s*$"#).unwrap();
39
40    // Add pattern to check if link text is *only* an image
41    static ref IMAGE_ONLY_LINK_TEXT_PATTERN: Regex = Regex::new(r#"^!\s*\[[^\]]*\]\s*\([^)]*\)$"#).unwrap();
42
43    // Captures full image in 0, alt text in 1, src in 2
44    static ref MARKDOWN_IMAGE_PATTERN: Regex = Regex::new(r#"!\s*\[([^\]]*)\]\s*\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#).unwrap();
45
46    // Add a simple regex for candidate URLs (no look-behind/look-ahead)
47    // Updated to match markdownlint's behavior: URLs can have domains without dots
48    // Handles URL components properly: scheme://domain[:port][/path][?query][#fragment]
49    // Will post-process to remove trailing sentence punctuation
50    // Now supports IPv6 addresses in square brackets
51    // Note: We need two separate patterns - one for IPv6 and one for regular URLs
52    // Updated to avoid matching partial IPv6 patterns (e.g., "https://::1]" without opening bracket)
53    static ref SIMPLE_URL_REGEX: Regex = Regex::new(r#"(https?|ftps?)://(?:\[[0-9a-fA-F:%.]+\](?::\d+)?|[^\s<>\[\]()\\'\"`:\]]+(?::\d+)?)(?:/[^\s<>\[\]()\\'\"`]*)?(?:\?[^\s<>\[\]()\\'\"`]*)?(?:#[^\s<>\[\]()\\'\"`]*)?"#).unwrap();
54
55    // Special pattern just for IPv6 URLs to handle them separately
56    // Note: This is permissive to match markdownlint behavior, allowing technically invalid IPv6 for examples
57    static ref IPV6_URL_REGEX: Regex = Regex::new(r#"(https?|ftps?)://\[[0-9a-fA-F:%.\-a-zA-Z]+\](?::\d+)?(?:/[^\s<>\[\]()\\'\"`]*)?(?:\?[^\s<>\[\]()\\'\"`]*)?(?:#[^\s<>\[\]()\\'\"`]*)?"#).unwrap();
58
59    // Add regex for reference definitions
60    // Updated to support IPv6 addresses
61    static ref REFERENCE_DEF_RE: Regex = Regex::new(r"^\s*\[[^\]]+\]:\s*(?:https?|ftps?)://\S+$").unwrap();
62
63    // Pattern to match HTML comments
64    static ref HTML_COMMENT_PATTERN: Regex = Regex::new(r#"<!--[\s\S]*?-->"#).unwrap();
65}
66
67#[derive(Default, Clone)]
68pub struct MD034NoBareUrls;
69
70impl MD034NoBareUrls {
71    #[inline]
72    pub fn should_skip(&self, content: &str) -> bool {
73        // Skip if content has no URLs and no email addresses
74        // Fast byte scanning for common URL/email indicators
75        let bytes = content.as_bytes();
76        !bytes.contains(&b':') && !bytes.contains(&b'@')
77    }
78
79    /// Remove trailing punctuation that is likely sentence punctuation, not part of the URL
80    fn trim_trailing_punctuation<'a>(&self, url: &'a str) -> &'a str {
81        let trailing_punct = ['.', ',', ';', ':', '!', '?'];
82        let mut end = url.len();
83
84        // Remove trailing punctuation characters
85        while end > 0 {
86            // Get the last character of the current substring safely
87            let current_url = &url[..end];
88            if let Some((last_char_pos, last_char)) = current_url.char_indices().next_back() {
89                if trailing_punct.contains(&last_char) {
90                    end = last_char_pos;
91                } else {
92                    break;
93                }
94            } else {
95                break;
96            }
97        }
98
99        &url[..end]
100    }
101
102    /// AST-based bare URL detection: only flag URLs in text nodes not inside links/images/code/html
103    fn find_bare_urls_in_ast(
104        &self,
105        node: &Node,
106        parent_is_link_or_image: bool,
107        _content: &str,
108        warnings: &mut Vec<LintWarning>,
109        ctx: &LintContext,
110    ) {
111        use markdown::mdast::Node::*;
112        match node {
113            Text(text) if !parent_is_link_or_image => {
114                let text_str = &text.value;
115
116                // Check for URLs
117                for url_match in SIMPLE_URL_REGEX.find_iter(text_str) {
118                    let url_start = url_match.start();
119                    let mut url_end = url_match.end();
120
121                    // Trim trailing punctuation that's likely sentence punctuation
122                    let raw_url = &text_str[url_start..url_end];
123                    let trimmed_url = self.trim_trailing_punctuation(raw_url);
124                    url_end = url_start + trimmed_url.len();
125
126                    // Skip if URL became empty after trimming
127                    if url_end <= url_start {
128                        continue;
129                    }
130
131                    let before = if url_start == 0 {
132                        None
133                    } else {
134                        text_str.get(url_start - 1..url_start)
135                    };
136                    let after = text_str.get(url_end..url_end + 1);
137                    let is_valid_boundary = before
138                        .is_none_or(|c| !c.chars().next().unwrap().is_alphanumeric() && c != "_")
139                        && after.is_none_or(|c| !c.chars().next().unwrap().is_alphanumeric() && c != "_");
140                    if !is_valid_boundary {
141                        continue;
142                    }
143                    if let Some(pos) = &text.position {
144                        let offset = pos.start.offset + url_start;
145                        let (line, column) = ctx.offset_to_line_col(offset);
146                        let url_text = &text_str[url_start..url_end];
147                        let (start_line, start_col, end_line, end_col) =
148                            (line, column, line, column + url_text.chars().count());
149                        warnings.push(LintWarning {
150                            rule_name: Some(self.name()),
151                            line: start_line,
152                            column: start_col,
153                            end_line,
154                            end_column: end_col,
155                            message: "URL without angle brackets or link formatting".to_string(),
156                            severity: Severity::Warning,
157                            fix: Some(Fix {
158                                range: offset..(offset + url_text.len()),
159                                replacement: format!("<{url_text}>"),
160                            }),
161                        });
162                    }
163                }
164
165                // Check for email addresses
166                for email_match in EMAIL_PATTERN.find_iter(text_str) {
167                    let email_start = email_match.start();
168                    let email_end = email_match.end();
169                    let before = if email_start == 0 {
170                        None
171                    } else {
172                        text_str.get(email_start - 1..email_start)
173                    };
174                    let after = text_str.get(email_end..email_end + 1);
175                    let is_valid_boundary = before
176                        .is_none_or(|c| !c.chars().next().unwrap().is_alphanumeric() && c != "_" && c != ".")
177                        && after.is_none_or(|c| !c.chars().next().unwrap().is_alphanumeric() && c != "_" && c != ".");
178                    if !is_valid_boundary {
179                        continue;
180                    }
181                    if let Some(pos) = &text.position {
182                        let offset = pos.start.offset + email_start;
183                        let (line, column) = ctx.offset_to_line_col(offset);
184                        let email_text = &text_str[email_start..email_end];
185                        let (start_line, start_col, end_line, end_col) =
186                            (line, column, line, column + email_text.chars().count());
187                        warnings.push(LintWarning {
188                            rule_name: Some(self.name()),
189                            line: start_line,
190                            column: start_col,
191                            end_line,
192                            end_column: end_col,
193                            message: "Email address without angle brackets or link formatting (wrap like: <email>)"
194                                .to_string(),
195                            severity: Severity::Warning,
196                            fix: Some(Fix {
197                                range: offset..(offset + email_text.len()),
198                                replacement: format!("<{email_text}>"),
199                            }),
200                        });
201                    }
202                }
203            }
204            Link(link) => {
205                for child in &link.children {
206                    self.find_bare_urls_in_ast(child, true, _content, warnings, ctx);
207                }
208            }
209            Image(image) => {
210                // Only check alt text for bare URLs (rare, but possible)
211                let alt_str = &image.alt;
212                for url_match in SIMPLE_URL_REGEX.find_iter(alt_str) {
213                    let url_start = url_match.start();
214                    let mut url_end = url_match.end();
215
216                    // Trim trailing punctuation that's likely sentence punctuation
217                    let raw_url = &alt_str[url_start..url_end];
218                    let trimmed_url = self.trim_trailing_punctuation(raw_url);
219                    url_end = url_start + trimmed_url.len();
220
221                    // Skip if URL became empty after trimming
222                    if url_end <= url_start {
223                        continue;
224                    }
225
226                    let before = if url_start == 0 {
227                        None
228                    } else {
229                        alt_str.get(url_start - 1..url_start)
230                    };
231                    let after = alt_str.get(url_end..url_end + 1);
232                    let is_valid_boundary = before
233                        .is_none_or(|c| !c.chars().next().unwrap().is_alphanumeric() && c != "_")
234                        && after.is_none_or(|c| !c.chars().next().unwrap().is_alphanumeric() && c != "_");
235                    if !is_valid_boundary {
236                        continue;
237                    }
238                    if let Some(pos) = &image.position {
239                        let offset = pos.start.offset + url_start;
240                        let (line, column) = ctx.offset_to_line_col(offset);
241                        let url_text = &alt_str[url_start..url_end];
242                        let (start_line, start_col, end_line, end_col) =
243                            (line, column, line, column + url_text.chars().count());
244                        warnings.push(LintWarning {
245                            rule_name: Some(self.name()),
246                            line: start_line,
247                            column: start_col,
248                            end_line,
249                            end_column: end_col,
250                            message: "URL without angle brackets or link formatting".to_string(),
251                            severity: Severity::Warning,
252                            fix: Some(Fix {
253                                range: offset..(offset + url_text.len()),
254                                replacement: format!("<{url_text}>"),
255                            }),
256                        });
257                    }
258                }
259            }
260            Code(_) | InlineCode(_) | Html(_) => {
261                // Skip code and HTML nodes
262            }
263            _ => {
264                if let Some(children) = node.children() {
265                    for child in children {
266                        self.find_bare_urls_in_ast(child, false, _content, warnings, ctx);
267                    }
268                }
269            }
270        }
271    }
272
273    /// AST-based check method for MD034
274    pub fn check_ast(&self, ctx: &LintContext, ast: &Node) -> LintResult {
275        let mut warnings = Vec::new();
276        self.find_bare_urls_in_ast(ast, false, ctx.content, &mut warnings, ctx);
277        Ok(warnings)
278    }
279}
280
281impl Rule for MD034NoBareUrls {
282    fn name(&self) -> &'static str {
283        "MD034"
284    }
285
286    fn description(&self) -> &'static str {
287        "URL without angle brackets or link formatting"
288    }
289
290    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
291        // Use line-based detection to properly distinguish between bare URLs and autolinks
292        // AST-based approach doesn't work because CommonMark parser converts bare URLs to links
293        let content = ctx.content;
294
295        // Fast path: Early return for empty content
296        if content.is_empty() || self.should_skip(content) {
297            return Ok(Vec::new());
298        }
299        let mut warnings = Vec::new();
300
301        // First, find all markdown link ranges across the entire content
302        let mut excluded_ranges: Vec<(usize, usize)> = Vec::new();
303
304        // Markdown links: [text](url) - exclude both destination and entire link text
305        for cap in MARKDOWN_LINK_PATTERN.captures_iter(content) {
306            if let Some(dest) = cap.get(1) {
307                excluded_ranges.push((dest.start(), dest.end()));
308            }
309            // Also exclude the entire link to handle URLs in link text
310            if let Some(full_match) = cap.get(0) {
311                excluded_ranges.push((full_match.start(), full_match.end()));
312            }
313        }
314
315        // Markdown images: ![alt](url)
316        for cap in MARKDOWN_IMAGE_PATTERN.captures_iter(content) {
317            if let Some(dest) = cap.get(2) {
318                excluded_ranges.push((dest.start(), dest.end()));
319            }
320        }
321
322        // Angle-bracket links: <url>
323        for cap in ANGLE_LINK_PATTERN.captures_iter(content) {
324            if let Some(m) = cap.get(1) {
325                excluded_ranges.push((m.start(), m.end()));
326            }
327        }
328
329        // HTML tags: exclude everything inside them
330        for html_tag in ctx.html_tags().iter() {
331            excluded_ranges.push((html_tag.byte_offset, html_tag.byte_end));
332        }
333
334        // HTML comments: <!-- url -->
335        for cap in HTML_COMMENT_PATTERN.captures_iter(content) {
336            if let Some(comment) = cap.get(0) {
337                excluded_ranges.push((comment.start(), comment.end()));
338            }
339        }
340
341        // Sort and merge overlapping ranges
342        excluded_ranges.sort_by_key(|r| r.0);
343        let mut merged: Vec<(usize, usize)> = Vec::new();
344        for (start, end) in excluded_ranges {
345            if let Some((_, last_end)) = merged.last_mut()
346                && *last_end >= start
347            {
348                *last_end = (*last_end).max(end);
349                continue;
350            }
351            merged.push((start, end));
352        }
353
354        // Now find all URLs and emails in the content and check if they're excluded
355        // We'll combine URL and email detection for efficiency
356        let mut all_matches: Vec<(usize, usize, bool)> = Vec::new(); // (start, end, is_email)
357
358        // Early exit if no potential URLs/emails based on quick check
359        if !content.contains("://") && !content.contains('@') {
360            return Ok(warnings);
361        }
362
363        // Pre-filter lines that might contain URLs or emails
364        let mut candidate_lines = Vec::new();
365        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
366            // Skip lines in code blocks
367            if line_info.in_code_block {
368                continue;
369            }
370
371            let line_content = &line_info.content;
372            let bytes = line_content.as_bytes();
373
374            // Fast byte-level check for potential URLs/emails
375            let has_url = bytes.contains(&b':') && line_content.contains("://");
376            let has_email = bytes.contains(&b'@');
377
378            if has_url || has_email {
379                candidate_lines.push(line_idx);
380            }
381        }
382
383        // Process only candidate lines
384        for &line_idx in &candidate_lines {
385            let line_info = &ctx.lines[line_idx];
386            let line_content = &line_info.content;
387
388            // Check for URLs in this line
389            for url_match in SIMPLE_URL_REGEX.find_iter(line_content) {
390                let start_in_line = url_match.start();
391                let end_in_line = url_match.end();
392                let matched_str = &line_content[start_in_line..end_in_line];
393
394                // Skip invalid IPv6 patterns
395                if matched_str.contains("::") && !matched_str.contains('[') && matched_str.contains(']') {
396                    continue;
397                }
398
399                // Skip custom protocols that aren't standard web protocols
400                // Check if there's a custom protocol pattern before this match
401                if start_in_line > 0 {
402                    // Look back to see if this is part of a custom protocol URL
403                    let prefix_start = start_in_line.saturating_sub(20); // Look back up to 20 chars
404
405                    // Ensure we're on a character boundary
406                    let prefix_start = if prefix_start == 0 {
407                        0
408                    } else {
409                        // Find the nearest character boundary at or after prefix_start
410                        let mut adjusted_start = prefix_start;
411                        while adjusted_start < start_in_line && !line_content.is_char_boundary(adjusted_start) {
412                            adjusted_start += 1;
413                        }
414                        adjusted_start
415                    };
416
417                    let prefix = &line_content[prefix_start..start_in_line];
418                    if CUSTOM_PROTOCOL_PATTERN.is_match(prefix) {
419                        continue;
420                    }
421                }
422
423                let global_start = line_info.byte_offset + start_in_line;
424                let global_end = line_info.byte_offset + end_in_line;
425                all_matches.push((global_start, global_end, false));
426            }
427
428            // Check for IPv6 URLs
429            for url_match in IPV6_URL_REGEX.find_iter(line_content) {
430                let global_start = line_info.byte_offset + url_match.start();
431                let global_end = line_info.byte_offset + url_match.end();
432
433                // Remove any overlapping regular URL matches
434                all_matches.retain(|(start, end, _)| !(*start < global_end && *end > global_start));
435
436                all_matches.push((global_start, global_end, false));
437            }
438
439            // Check for emails in this line
440            for email_match in EMAIL_PATTERN.find_iter(line_content) {
441                let global_start = line_info.byte_offset + email_match.start();
442                let global_end = line_info.byte_offset + email_match.end();
443                all_matches.push((global_start, global_end, true));
444            }
445        }
446
447        // Process all matches
448        for (match_start, match_end_orig, is_email) in all_matches {
449            let mut match_end = match_end_orig;
450
451            // For URLs, trim trailing punctuation
452            if !is_email {
453                let raw_url = &content[match_start..match_end];
454                let trimmed_url = self.trim_trailing_punctuation(raw_url);
455                match_end = match_start + trimmed_url.len();
456            }
457
458            // Skip if became empty after trimming
459            if match_end <= match_start {
460                continue;
461            }
462
463            // Manual boundary check: not part of a larger word
464            // Use bytes for ASCII checks (more efficient)
465            let bytes = content.as_bytes();
466            let before_byte = if match_start == 0 {
467                None
468            } else {
469                bytes.get(match_start - 1).copied()
470            };
471            let after_byte = bytes.get(match_end).copied();
472
473            let is_valid_boundary = if is_email {
474                before_byte.is_none_or(|b| !b.is_ascii_alphanumeric() && b != b'_' && b != b'.')
475                    && after_byte.is_none_or(|b| !b.is_ascii_alphanumeric() && b != b'_' && b != b'.')
476            } else {
477                before_byte.is_none_or(|b| !b.is_ascii_alphanumeric() && b != b'_')
478                    && after_byte.is_none_or(|b| !b.is_ascii_alphanumeric() && b != b'_')
479            };
480
481            if !is_valid_boundary {
482                continue;
483            }
484
485            // Skip if this is within any skip context (code blocks, MkDocs snippets, etc.)
486            if crate::utils::skip_context::is_in_skip_context(ctx, match_start) {
487                continue;
488            }
489
490            // Skip if within any excluded range (link/image dest/HTML comment)
491            let in_any_range = merged.iter().any(|(start, end)| {
492                // For HTML comments and other exclusions, check if URL overlaps the range
493                (match_start >= *start && match_start < *end)
494                    || (match_end > *start && match_end <= *end)
495                    || (match_start < *start && match_end > *end)
496            });
497            if in_any_range {
498                continue;
499            }
500
501            // Get line information efficiently
502            let (line_num, col_num) = ctx.offset_to_line_col(match_start);
503
504            // Skip reference definitions for URLs
505            if !is_email
506                && let Some(line_info) = ctx.line_info(line_num)
507                && REFERENCE_DEF_RE.is_match(&line_info.content)
508            {
509                continue;
510            }
511
512            let matched_text = &content[match_start..match_end];
513            let line_info = ctx.line_info(line_num).unwrap();
514            let (start_line, start_col, end_line, end_col) =
515                calculate_url_range(line_num, &line_info.content, col_num - 1, matched_text.len());
516
517            let message = if is_email {
518                "Email address without angle brackets or link formatting".to_string()
519            } else {
520                "URL without angle brackets or link formatting".to_string()
521            };
522
523            warnings.push(LintWarning {
524                rule_name: Some(self.name()),
525                line: start_line,
526                column: start_col,
527                end_line,
528                end_column: end_col,
529                message,
530                severity: Severity::Warning,
531                fix: Some(Fix {
532                    range: match_start..match_end,
533                    replacement: format!("<{matched_text}>"),
534                }),
535            });
536        }
537
538        Ok(warnings)
539    }
540
541    fn check_with_ast(&self, ctx: &LintContext, ast: &MarkdownAst) -> LintResult {
542        // Use AST-based detection for better accuracy
543        let mut warnings = Vec::new();
544        self.find_bare_urls_in_ast(ast, false, ctx.content, &mut warnings, ctx);
545        Ok(warnings)
546    }
547
548    fn uses_ast(&self) -> bool {
549        // AST-based approach doesn't work because CommonMark parser converts bare URLs to links
550        // Use document structure approach instead
551        false
552    }
553
554    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
555        let content = ctx.content;
556        if self.should_skip(content) {
557            return Ok(content.to_string());
558        }
559
560        // Get all warnings first - only fix URLs that are actually flagged
561        let warnings = self.check(ctx)?;
562        if warnings.is_empty() {
563            return Ok(content.to_string());
564        }
565
566        // Sort warnings by byte offset in reverse order (rightmost first) to avoid offset issues
567        let mut sorted_warnings = warnings.clone();
568        sorted_warnings.sort_by_key(|w| std::cmp::Reverse(w.fix.as_ref().map(|f| f.range.start).unwrap_or(0)));
569
570        let mut result = content.to_string();
571        for warning in sorted_warnings {
572            if let Some(fix) = &warning.fix {
573                let start = fix.range.start;
574                let end = fix.range.end;
575
576                if start <= result.len() && end <= result.len() && start < end {
577                    result.replace_range(start..end, &fix.replacement);
578                }
579            }
580        }
581
582        Ok(result)
583    }
584
585    /// Get the category of this rule for selective processing
586    fn category(&self) -> RuleCategory {
587        RuleCategory::Link
588    }
589
590    /// Check if this rule should be skipped based on content
591    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
592        self.should_skip(ctx.content)
593    }
594
595    fn as_any(&self) -> &dyn std::any::Any {
596        self
597    }
598
599    fn as_maybe_ast(&self) -> Option<&dyn MaybeAst> {
600        Some(self)
601    }
602
603    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
604    where
605        Self: Sized,
606    {
607        Box::new(MD034NoBareUrls)
608    }
609}
610
611impl AstExtensions for MD034NoBareUrls {
612    fn has_relevant_ast_elements(&self, ctx: &LintContext, ast: &MarkdownAst) -> bool {
613        // Check if AST contains text nodes (where bare URLs would be)
614        use crate::utils::ast_utils::ast_contains_node_type;
615        !self.should_skip(ctx.content) && ast_contains_node_type(ast, "text")
616    }
617}
618
619#[cfg(test)]
620mod tests {
621    use super::*;
622    use crate::lint_context::LintContext;
623
624    #[test]
625    fn test_url_quick_check() {
626        assert!(URL_QUICK_CHECK.is_match("This is a URL: https://example.com"));
627        assert!(!URL_QUICK_CHECK.is_match("This has no URL"));
628    }
629
630    #[test]
631    fn test_multiple_badges_and_links_on_one_line() {
632        let rule = MD034NoBareUrls;
633        let content = "# [React](https://react.dev/) \
634&middot; [![GitHub license](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/facebook/react/blob/main/LICENSE) \
635[![npm version](https://img.shields.io/npm/v/react.svg?style=flat)](https://www.npmjs.com/package/react) \
636[![(Runtime) Build and Test](https://github.com/facebook/react/actions/workflows/runtime_build_and_test.yml/badge.svg)](https://github.com/facebook/react/actions/workflows/runtime_build_and_test.yml) \
637[![(Compiler) TypeScript](https://github.com/facebook/react/actions/workflows/compiler_typescript.yml/badge.svg?branch=main)](https://github.com/facebook/react/actions/workflows/compiler_typescript.yml) \
638[![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](https://legacy.reactjs.org/docs/how-to-contribute.html#your-first-pull-request)";
639        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
640        let result = rule.check(&ctx).unwrap();
641        if !result.is_empty() {
642            log::debug!("MD034 warnings: {result:#?}");
643        }
644        assert!(
645            result.is_empty(),
646            "Multiple badges and links on one line should not be flagged as bare URLs"
647        );
648    }
649
650    #[test]
651    fn test_bare_urls() {
652        let rule = MD034NoBareUrls;
653        let content = "This is a bare URL: https://example.com/foobar";
654        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
655        let result = rule.check(&ctx).unwrap();
656        assert_eq!(result.len(), 1, "Bare URLs should be flagged");
657        assert_eq!(result[0].line, 1);
658        assert_eq!(result[0].column, 21);
659    }
660
661    #[test]
662    fn test_md034_performance_baseline() {
663        use std::time::Instant;
664
665        // Generate test content with various URL patterns
666        let mut content = String::with_capacity(50_000);
667
668        // Add content with bare URLs (should be detected)
669        for i in 0..250 {
670            content.push_str(&format!("Line {i} with bare URL https://example{i}.com/path\n"));
671        }
672
673        // Add content with proper markdown links (should not be detected)
674        for i in 0..250 {
675            content.push_str(&format!(
676                "Line {} with [proper link](https://example{}.com/path)\n",
677                i + 250,
678                i
679            ));
680        }
681
682        // Add content with no URLs (should be fast)
683        for i in 0..500 {
684            content.push_str(&format!("Line {} with no URLs, just regular text content\n", i + 500));
685        }
686
687        // Add content with emails
688        for i in 0..100 {
689            content.push_str(&format!("Contact user{i}@example{i}.com for more info\n"));
690        }
691
692        println!(
693            "MD034 Performance Test - Content: {} bytes, {} lines",
694            content.len(),
695            content.lines().count()
696        );
697
698        let rule = MD034NoBareUrls;
699        let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
700
701        // Warm up
702        let _ = rule.check(&ctx).unwrap();
703
704        // Measure check performance (more runs for accuracy)
705        let mut total_duration = std::time::Duration::ZERO;
706        let runs = 10;
707        let mut warnings_count = 0;
708
709        for _ in 0..runs {
710            let start = Instant::now();
711            let warnings = rule.check(&ctx).unwrap();
712            total_duration += start.elapsed();
713            warnings_count = warnings.len();
714        }
715
716        let avg_check_duration = total_duration / runs;
717
718        println!("MD034 Optimized Performance:");
719        println!(
720            "- Average check time: {:?} ({:.2} ms)",
721            avg_check_duration,
722            avg_check_duration.as_secs_f64() * 1000.0
723        );
724        println!("- Found {warnings_count} warnings");
725        println!(
726            "- Lines per second: {:.0}",
727            content.lines().count() as f64 / avg_check_duration.as_secs_f64()
728        );
729        println!(
730            "- Microseconds per line: {:.2}",
731            avg_check_duration.as_micros() as f64 / content.lines().count() as f64
732        );
733
734        // Performance assertion - should complete reasonably fast
735        // Note: In debug builds this may take longer, so we use a higher threshold
736        let max_duration_ms = if cfg!(debug_assertions) { 1000 } else { 100 };
737        assert!(
738            avg_check_duration.as_millis() < max_duration_ms,
739            "MD034 check should complete in under {}ms, took {}ms",
740            max_duration_ms,
741            avg_check_duration.as_millis()
742        );
743
744        // Verify we're finding the expected number of warnings
745        assert_eq!(warnings_count, 350, "Should find 250 URLs + 100 emails = 350 warnings");
746    }
747}
rumdl_lib/rules/md034_no_bare_urls.rs

rumdl_lib/rules/
md034_no_bare_urls.rs