rumdl_lib/rules/
md034_no_bare_urls.rs

1/// Rule MD034: No unformatted URLs
2///
3/// See [docs/md034.md](../../docs/md034.md) for full documentation, configuration, and examples.
4use crate::rule::{
5    AstExtensions, Fix, LintError, LintResult, LintWarning, MarkdownAst, MaybeAst, Rule, RuleCategory, Severity,
6};
7use crate::utils::early_returns;
8use crate::utils::range_utils::calculate_url_range;
9use crate::utils::regex_cache::EMAIL_PATTERN;
10
11use crate::lint_context::LintContext;
12use fancy_regex::Regex as FancyRegex;
13use lazy_static::lazy_static;
14use markdown::mdast::Node;
15use regex::Regex;
16
17lazy_static! {
18    // Simple pattern to quickly check if a line might contain a URL or email
19    static ref URL_QUICK_CHECK: Regex = Regex::new(r#"(?:https?|ftps?)://|@"#).unwrap();
20
21    // Use fancy-regex for look-behind/look-ahead
22    // Updated to support IPv6 addresses in square brackets
23    static ref URL_REGEX: FancyRegex = FancyRegex::new(r#"(?<![\w\[\(\<])((?:https?|ftps?)://(?:\[[0-9a-fA-F:%]+\]|[^\s<>\[\]()\\'\"]+)(?::\d+)?(?:/[^\s<>\[\]()\\'\"]*)?(?:\?[^\s<>\[\]()\\'\"]*)?(?:#[^\s<>\[\]()\\'\"]*)?)"#).unwrap();
24    static ref URL_FIX_REGEX: FancyRegex = FancyRegex::new(r#"(?<![\w\[\(\<])((?:https?|ftps?)://(?:\[[0-9a-fA-F:%]+\]|[^\s<>\[\]()\\'\"]+)(?::\d+)?(?:/[^\s<>\[\]()\\'\"]*)?(?:\?[^\s<>\[\]()\\'\"]*)?(?:#[^\s<>\[\]()\\'\"]*)?)"#).unwrap();
25
26    // Pattern to match markdown link format - capture destination in Group 1
27    // Updated to handle nested brackets in badge links like [![badge](img)](link)
28    static ref MARKDOWN_LINK_PATTERN: Regex = Regex::new(r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#).unwrap();
29
30    // Pattern to match angle bracket link format (URLs and emails)
31    // Updated to support IPv6 addresses
32    static ref ANGLE_LINK_PATTERN: Regex = Regex::new(r#"<((?:https?|ftps?)://(?:\[[0-9a-fA-F:]+(?:%[a-zA-Z0-9]+)?\]|[^>]+)|[^@\s]+@[^@\s]+\.[^@\s>]+)>"#).unwrap();
33
34    // Add regex to identify lines containing only a badge link
35    static ref BADGE_LINK_LINE: Regex = Regex::new(r#"^\s*\[!\[[^\]]*\]\([^)]*\)\]\([^)]*\)\s*$"#).unwrap();
36
37    // Add pattern to check if link text is *only* an image
38    static ref IMAGE_ONLY_LINK_TEXT_PATTERN: Regex = Regex::new(r#"^!\s*\[[^\]]*\]\s*\([^)]*\)$"#).unwrap();
39
40    // Captures full image in 0, alt text in 1, src in 2
41    static ref MARKDOWN_IMAGE_PATTERN: Regex = Regex::new(r#"!\s*\[([^\]]*)\]\s*\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#).unwrap();
42
43    // Add a simple regex for candidate URLs (no look-behind/look-ahead)
44    // Updated to match markdownlint's behavior: URLs can have domains without dots
45    // Handles URL components properly: scheme://domain[:port][/path][?query][#fragment]
46    // Will post-process to remove trailing sentence punctuation
47    // Now supports IPv6 addresses in square brackets
48    // Note: We need two separate patterns - one for IPv6 and one for regular URLs
49    // Updated to avoid matching partial IPv6 patterns (e.g., "https://::1]" without opening bracket)
50    static ref SIMPLE_URL_REGEX: Regex = Regex::new(r#"(https?|ftps?)://(?:\[[0-9a-fA-F:%.]+\](?::\d+)?|[^\s<>\[\]()\\'\"`:\]]+(?::\d+)?)(?:/[^\s<>\[\]()\\'\"`]*)?(?:\?[^\s<>\[\]()\\'\"`]*)?(?:#[^\s<>\[\]()\\'\"`]*)?"#).unwrap();
51
52    // Special pattern just for IPv6 URLs to handle them separately
53    // Note: This is permissive to match markdownlint behavior, allowing technically invalid IPv6 for examples
54    static ref IPV6_URL_REGEX: Regex = Regex::new(r#"(https?|ftps?)://\[[0-9a-fA-F:%.\-a-zA-Z]+\](?::\d+)?(?:/[^\s<>\[\]()\\'\"`]*)?(?:\?[^\s<>\[\]()\\'\"`]*)?(?:#[^\s<>\[\]()\\'\"`]*)?"#).unwrap();
55
56    // Add regex for reference definitions
57    // Updated to support IPv6 addresses
58    static ref REFERENCE_DEF_RE: Regex = Regex::new(r"^\s*\[[^\]]+\]:\s*(?:https?|ftps?)://\S+$").unwrap();
59
60    // Pattern to match HTML comments
61    static ref HTML_COMMENT_PATTERN: Regex = Regex::new(r#"<!--[\s\S]*?-->"#).unwrap();
62}
63
64#[derive(Default, Clone)]
65pub struct MD034NoBareUrls;
66
67impl MD034NoBareUrls {
68    #[inline]
69    pub fn should_skip(&self, content: &str) -> bool {
70        // Skip if content has no URLs and no email addresses
71        !early_returns::has_urls(content) && !content.contains('@')
72    }
73
74    /// Remove trailing punctuation that is likely sentence punctuation, not part of the URL
75    fn trim_trailing_punctuation<'a>(&self, url: &'a str) -> &'a str {
76        let trailing_punct = ['.', ',', ';', ':', '!', '?'];
77        let mut end = url.len();
78
79        // Remove trailing punctuation characters
80        while end > 0 {
81            if let Some(last_char) = url.chars().nth(end - 1) {
82                if trailing_punct.contains(&last_char) {
83                    end -= last_char.len_utf8();
84                } else {
85                    break;
86                }
87            } else {
88                break;
89            }
90        }
91
92        &url[..end]
93    }
94
95    // Uses DocumentStructure for code block and code span detection in check_with_structure.
96    pub fn check_with_structure(
97        &self,
98        ctx: &crate::lint_context::LintContext,
99        _structure: &crate::utils::document_structure::DocumentStructure,
100    ) -> LintResult {
101        let content = ctx.content;
102
103        // Early return: skip if no URLs or emails
104        if self.should_skip(content) {
105            return Ok(vec![]);
106        }
107
108        // Process the entire content to handle multi-line markdown links
109        let mut warnings = Vec::new();
110
111        // First, find all markdown link ranges across the entire content
112        let mut excluded_ranges: Vec<(usize, usize)> = Vec::new();
113
114        // Markdown links: [text](url) - exclude both destination and entire link text
115        for cap in MARKDOWN_LINK_PATTERN.captures_iter(content) {
116            if let Some(dest) = cap.get(1) {
117                excluded_ranges.push((dest.start(), dest.end()));
118            }
119            // Also exclude the entire link to handle URLs in link text
120            if let Some(full_match) = cap.get(0) {
121                excluded_ranges.push((full_match.start(), full_match.end()));
122            }
123        }
124
125        // Markdown images: ![alt](url)
126        for cap in MARKDOWN_IMAGE_PATTERN.captures_iter(content) {
127            if let Some(dest) = cap.get(2) {
128                excluded_ranges.push((dest.start(), dest.end()));
129            }
130        }
131
132        // Angle-bracket links: <url>
133        for cap in ANGLE_LINK_PATTERN.captures_iter(content) {
134            if let Some(m) = cap.get(1) {
135                excluded_ranges.push((m.start(), m.end()));
136            }
137        }
138
139        // HTML tags: exclude everything inside them
140        for html_tag in ctx.html_tags().iter() {
141            excluded_ranges.push((html_tag.byte_offset, html_tag.byte_end));
142        }
143
144        // HTML comments: <!-- url -->
145        for cap in HTML_COMMENT_PATTERN.captures_iter(content) {
146            if let Some(comment) = cap.get(0) {
147                excluded_ranges.push((comment.start(), comment.end()));
148            }
149        }
150
151        // Sort and merge overlapping ranges
152        excluded_ranges.sort_by_key(|r| r.0);
153        let mut merged: Vec<(usize, usize)> = Vec::new();
154        for (start, end) in excluded_ranges {
155            if let Some((_, last_end)) = merged.last_mut()
156                && *last_end >= start
157            {
158                *last_end = (*last_end).max(end);
159                continue;
160            }
161            merged.push((start, end));
162        }
163
164        // Now find all URLs and emails in the content and check if they're excluded
165        // We'll combine URL and email detection for efficiency
166        let mut all_matches: Vec<(usize, usize, bool)> = Vec::new(); // (start, end, is_email)
167
168        // Early exit if no potential URLs/emails based on quick check
169        if !content.contains("://") && !content.contains('@') {
170            return Ok(warnings);
171        }
172
173        // Use line-based processing for better cache locality
174        for line_info in ctx.lines.iter() {
175            let line_content = &line_info.content;
176
177            // Skip lines in code blocks
178            if line_info.in_code_block {
179                continue;
180            }
181
182            // Quick check if line might contain URLs or emails
183            if !line_content.contains("://") && !line_content.contains('@') {
184                continue;
185            }
186
187            // Check for URLs in this line
188            for url_match in SIMPLE_URL_REGEX.find_iter(line_content) {
189                let start_in_line = url_match.start();
190                let end_in_line = url_match.end();
191                let matched_str = &line_content[start_in_line..end_in_line];
192
193                // Skip invalid IPv6 patterns
194                if matched_str.contains("::") && !matched_str.contains('[') && matched_str.contains(']') {
195                    continue;
196                }
197
198                let global_start = line_info.byte_offset + start_in_line;
199                let global_end = line_info.byte_offset + end_in_line;
200                all_matches.push((global_start, global_end, false));
201            }
202
203            // Check for IPv6 URLs
204            for url_match in IPV6_URL_REGEX.find_iter(line_content) {
205                let global_start = line_info.byte_offset + url_match.start();
206                let global_end = line_info.byte_offset + url_match.end();
207
208                // Remove any overlapping regular URL matches
209                all_matches.retain(|(start, end, _)| !(*start < global_end && *end > global_start));
210
211                all_matches.push((global_start, global_end, false));
212            }
213
214            // Check for emails in this line
215            for email_match in EMAIL_PATTERN.find_iter(line_content) {
216                let global_start = line_info.byte_offset + email_match.start();
217                let global_end = line_info.byte_offset + email_match.end();
218                all_matches.push((global_start, global_end, true));
219            }
220        }
221
222        // Process all matches
223        for (match_start, match_end_orig, is_email) in all_matches {
224            let mut match_end = match_end_orig;
225
226            // For URLs, trim trailing punctuation
227            if !is_email {
228                let raw_url = &content[match_start..match_end];
229                let trimmed_url = self.trim_trailing_punctuation(raw_url);
230                match_end = match_start + trimmed_url.len();
231            }
232
233            // Skip if became empty after trimming
234            if match_end <= match_start {
235                continue;
236            }
237
238            // Manual boundary check: not part of a larger word
239            let before = if match_start == 0 {
240                None
241            } else {
242                content.get(match_start - 1..match_start)
243            };
244            let after = content.get(match_end..match_end + 1);
245
246            let is_valid_boundary = if is_email {
247                before.is_none_or(|c| !c.chars().next().unwrap().is_alphanumeric() && c != "_" && c != ".")
248                    && after.is_none_or(|c| !c.chars().next().unwrap().is_alphanumeric() && c != "_" && c != ".")
249            } else {
250                before.is_none_or(|c| !c.chars().next().unwrap().is_alphanumeric() && c != "_")
251                    && after.is_none_or(|c| !c.chars().next().unwrap().is_alphanumeric() && c != "_")
252            };
253
254            if !is_valid_boundary {
255                continue;
256            }
257
258            // Skip if this is within a code span (code blocks already checked)
259            if ctx.is_in_code_block_or_span(match_start) {
260                continue;
261            }
262
263            // Skip if within any excluded range (link/image dest/HTML comment)
264            let in_any_range = merged.iter().any(|(start, end)| {
265                // For HTML comments and other exclusions, check if URL overlaps the range
266                (match_start >= *start && match_start < *end)
267                    || (match_end > *start && match_end <= *end)
268                    || (match_start < *start && match_end > *end)
269            });
270            if in_any_range {
271                continue;
272            }
273
274            // Get line information efficiently
275            let (line_num, col_num) = ctx.offset_to_line_col(match_start);
276
277            // Skip reference definitions for URLs
278            if !is_email
279                && let Some(line_info) = ctx.line_info(line_num)
280                && REFERENCE_DEF_RE.is_match(&line_info.content)
281            {
282                continue;
283            }
284
285            let matched_text = &content[match_start..match_end];
286            let line_info = ctx.line_info(line_num).unwrap();
287            let (start_line, start_col, end_line, end_col) =
288                calculate_url_range(line_num, &line_info.content, col_num - 1, matched_text.len());
289
290            let message = if is_email {
291                "Email address without angle brackets or link formatting".to_string()
292            } else {
293                "URL without angle brackets or link formatting".to_string()
294            };
295
296            warnings.push(LintWarning {
297                rule_name: Some(self.name()),
298                line: start_line,
299                column: start_col,
300                end_line,
301                end_column: end_col,
302                message,
303                severity: Severity::Warning,
304                fix: Some(Fix {
305                    range: match_start..match_end,
306                    replacement: format!("<{matched_text}>"),
307                }),
308            });
309        }
310
311        Ok(warnings)
312    }
313
314    /// AST-based bare URL detection: only flag URLs in text nodes not inside links/images/code/html
315    fn find_bare_urls_in_ast(
316        &self,
317        node: &Node,
318        parent_is_link_or_image: bool,
319        _content: &str,
320        warnings: &mut Vec<LintWarning>,
321        ctx: &LintContext,
322    ) {
323        use markdown::mdast::Node::*;
324        match node {
325            Text(text) if !parent_is_link_or_image => {
326                let text_str = &text.value;
327
328                // Check for URLs
329                for url_match in SIMPLE_URL_REGEX.find_iter(text_str) {
330                    let url_start = url_match.start();
331                    let mut url_end = url_match.end();
332
333                    // Trim trailing punctuation that's likely sentence punctuation
334                    let raw_url = &text_str[url_start..url_end];
335                    let trimmed_url = self.trim_trailing_punctuation(raw_url);
336                    url_end = url_start + trimmed_url.len();
337
338                    // Skip if URL became empty after trimming
339                    if url_end <= url_start {
340                        continue;
341                    }
342
343                    let before = if url_start == 0 {
344                        None
345                    } else {
346                        text_str.get(url_start - 1..url_start)
347                    };
348                    let after = text_str.get(url_end..url_end + 1);
349                    let is_valid_boundary = before
350                        .is_none_or(|c| !c.chars().next().unwrap().is_alphanumeric() && c != "_")
351                        && after.is_none_or(|c| !c.chars().next().unwrap().is_alphanumeric() && c != "_");
352                    if !is_valid_boundary {
353                        continue;
354                    }
355                    if let Some(pos) = &text.position {
356                        let offset = pos.start.offset + url_start;
357                        let (line, column) = ctx.offset_to_line_col(offset);
358                        let url_text = &text_str[url_start..url_end];
359                        let (start_line, start_col, end_line, end_col) =
360                            (line, column, line, column + url_text.chars().count());
361                        warnings.push(LintWarning {
362                            rule_name: Some(self.name()),
363                            line: start_line,
364                            column: start_col,
365                            end_line,
366                            end_column: end_col,
367                            message: "URL without angle brackets or link formatting".to_string(),
368                            severity: Severity::Warning,
369                            fix: Some(Fix {
370                                range: offset..(offset + url_text.len()),
371                                replacement: format!("<{url_text}>"),
372                            }),
373                        });
374                    }
375                }
376
377                // Check for email addresses
378                for email_match in EMAIL_PATTERN.find_iter(text_str) {
379                    let email_start = email_match.start();
380                    let email_end = email_match.end();
381                    let before = if email_start == 0 {
382                        None
383                    } else {
384                        text_str.get(email_start - 1..email_start)
385                    };
386                    let after = text_str.get(email_end..email_end + 1);
387                    let is_valid_boundary = before
388                        .is_none_or(|c| !c.chars().next().unwrap().is_alphanumeric() && c != "_" && c != ".")
389                        && after.is_none_or(|c| !c.chars().next().unwrap().is_alphanumeric() && c != "_" && c != ".");
390                    if !is_valid_boundary {
391                        continue;
392                    }
393                    if let Some(pos) = &text.position {
394                        let offset = pos.start.offset + email_start;
395                        let (line, column) = ctx.offset_to_line_col(offset);
396                        let email_text = &text_str[email_start..email_end];
397                        let (start_line, start_col, end_line, end_col) =
398                            (line, column, line, column + email_text.chars().count());
399                        warnings.push(LintWarning {
400                            rule_name: Some(self.name()),
401                            line: start_line,
402                            column: start_col,
403                            end_line,
404                            end_column: end_col,
405                            message: "Email address without angle brackets or link formatting (wrap like: <email>)"
406                                .to_string(),
407                            severity: Severity::Warning,
408                            fix: Some(Fix {
409                                range: offset..(offset + email_text.len()),
410                                replacement: format!("<{email_text}>"),
411                            }),
412                        });
413                    }
414                }
415            }
416            Link(link) => {
417                for child in &link.children {
418                    self.find_bare_urls_in_ast(child, true, _content, warnings, ctx);
419                }
420            }
421            Image(image) => {
422                // Only check alt text for bare URLs (rare, but possible)
423                let alt_str = &image.alt;
424                for url_match in SIMPLE_URL_REGEX.find_iter(alt_str) {
425                    let url_start = url_match.start();
426                    let mut url_end = url_match.end();
427
428                    // Trim trailing punctuation that's likely sentence punctuation
429                    let raw_url = &alt_str[url_start..url_end];
430                    let trimmed_url = self.trim_trailing_punctuation(raw_url);
431                    url_end = url_start + trimmed_url.len();
432
433                    // Skip if URL became empty after trimming
434                    if url_end <= url_start {
435                        continue;
436                    }
437
438                    let before = if url_start == 0 {
439                        None
440                    } else {
441                        alt_str.get(url_start - 1..url_start)
442                    };
443                    let after = alt_str.get(url_end..url_end + 1);
444                    let is_valid_boundary = before
445                        .is_none_or(|c| !c.chars().next().unwrap().is_alphanumeric() && c != "_")
446                        && after.is_none_or(|c| !c.chars().next().unwrap().is_alphanumeric() && c != "_");
447                    if !is_valid_boundary {
448                        continue;
449                    }
450                    if let Some(pos) = &image.position {
451                        let offset = pos.start.offset + url_start;
452                        let (line, column) = ctx.offset_to_line_col(offset);
453                        let url_text = &alt_str[url_start..url_end];
454                        let (start_line, start_col, end_line, end_col) =
455                            (line, column, line, column + url_text.chars().count());
456                        warnings.push(LintWarning {
457                            rule_name: Some(self.name()),
458                            line: start_line,
459                            column: start_col,
460                            end_line,
461                            end_column: end_col,
462                            message: "URL without angle brackets or link formatting".to_string(),
463                            severity: Severity::Warning,
464                            fix: Some(Fix {
465                                range: offset..(offset + url_text.len()),
466                                replacement: format!("<{url_text}>"),
467                            }),
468                        });
469                    }
470                }
471            }
472            Code(_) | InlineCode(_) | Html(_) => {
473                // Skip code and HTML nodes
474            }
475            _ => {
476                if let Some(children) = node.children() {
477                    for child in children {
478                        self.find_bare_urls_in_ast(child, false, _content, warnings, ctx);
479                    }
480                }
481            }
482        }
483    }
484
485    /// AST-based check method for MD034
486    pub fn check_ast(&self, ctx: &LintContext, ast: &Node) -> LintResult {
487        let mut warnings = Vec::new();
488        self.find_bare_urls_in_ast(ast, false, ctx.content, &mut warnings, ctx);
489        Ok(warnings)
490    }
491}
492
493impl Rule for MD034NoBareUrls {
494    fn name(&self) -> &'static str {
495        "MD034"
496    }
497
498    fn description(&self) -> &'static str {
499        "URL without angle brackets or link formatting"
500    }
501
502    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
503        // Use line-based detection to properly distinguish between bare URLs and autolinks
504        // AST-based approach doesn't work because CommonMark parser converts bare URLs to links
505        let content = ctx.content;
506
507        // Fast path: Early return for empty content
508        if content.is_empty() {
509            return Ok(Vec::new());
510        }
511
512        // Fast path: Early return if no potential URLs or emails
513        if !content.contains("http://")
514            && !content.contains("https://")
515            && !content.contains("ftp://")
516            && !content.contains("ftps://")
517            && !content.contains('@')
518        {
519            return Ok(Vec::new());
520        }
521
522        // Fast path: Quick check using simple pattern
523        if !URL_QUICK_CHECK.is_match(content) {
524            return Ok(Vec::new());
525        }
526
527        // Fallback path: create structure manually (should rarely be used)
528        let structure = crate::utils::document_structure::DocumentStructure::new(content);
529        self.check_with_structure(ctx, &structure)
530    }
531
532    fn check_with_ast(&self, ctx: &LintContext, ast: &MarkdownAst) -> LintResult {
533        // Use AST-based detection for better accuracy
534        let mut warnings = Vec::new();
535        self.find_bare_urls_in_ast(ast, false, ctx.content, &mut warnings, ctx);
536        Ok(warnings)
537    }
538
539    fn uses_ast(&self) -> bool {
540        // AST-based approach doesn't work because CommonMark parser converts bare URLs to links
541        // Use document structure approach instead
542        false
543    }
544
545    fn uses_document_structure(&self) -> bool {
546        true
547    }
548
549    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
550        let content = ctx.content;
551        if self.should_skip(content) {
552            return Ok(content.to_string());
553        }
554
555        // Get all warnings first - only fix URLs that are actually flagged
556        // Use structure-based detection to match the main linting path (since uses_document_structure() returns true)
557        let structure = crate::utils::document_structure::DocumentStructure::new(content);
558        let warnings = self.check_with_structure(ctx, &structure)?;
559        if warnings.is_empty() {
560            return Ok(content.to_string());
561        }
562
563        // Sort warnings by byte offset in reverse order (rightmost first) to avoid offset issues
564        let mut sorted_warnings = warnings.clone();
565        sorted_warnings.sort_by_key(|w| std::cmp::Reverse(w.fix.as_ref().map(|f| f.range.start).unwrap_or(0)));
566
567        let mut result = content.to_string();
568        for warning in sorted_warnings {
569            if let Some(fix) = &warning.fix {
570                let start = fix.range.start;
571                let end = fix.range.end;
572
573                if start <= result.len() && end <= result.len() && start < end {
574                    result.replace_range(start..end, &fix.replacement);
575                }
576            }
577        }
578
579        Ok(result)
580    }
581
582    /// Get the category of this rule for selective processing
583    fn category(&self) -> RuleCategory {
584        RuleCategory::Link
585    }
586
587    /// Check if this rule should be skipped based on content
588    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
589        self.should_skip(ctx.content)
590    }
591
592    fn as_any(&self) -> &dyn std::any::Any {
593        self
594    }
595
596    fn as_maybe_document_structure(&self) -> Option<&dyn crate::rule::MaybeDocumentStructure> {
597        Some(self)
598    }
599
600    fn as_maybe_ast(&self) -> Option<&dyn MaybeAst> {
601        Some(self)
602    }
603
604    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
605    where
606        Self: Sized,
607    {
608        Box::new(MD034NoBareUrls)
609    }
610}
611
612impl crate::utils::document_structure::DocumentStructureExtensions for MD034NoBareUrls {
613    fn has_relevant_elements(
614        &self,
615        ctx: &crate::lint_context::LintContext,
616        _doc_structure: &crate::utils::document_structure::DocumentStructure,
617    ) -> bool {
618        // This rule is only relevant if there might be URLs or emails in the content
619        let content = ctx.content;
620        !content.is_empty()
621            && (content.contains("http://")
622                || content.contains("https://")
623                || content.contains("ftp://")
624                || content.contains("ftps://")
625                || content.contains('@'))
626    }
627}
628
629impl AstExtensions for MD034NoBareUrls {
630    fn has_relevant_ast_elements(&self, ctx: &LintContext, ast: &MarkdownAst) -> bool {
631        // Check if AST contains text nodes (where bare URLs would be)
632        use crate::utils::ast_utils::ast_contains_node_type;
633        !self.should_skip(ctx.content) && ast_contains_node_type(ast, "text")
634    }
635}
636
637#[cfg(test)]
638mod tests {
639    use super::*;
640    use crate::lint_context::LintContext;
641
642    #[test]
643    fn test_url_quick_check() {
644        assert!(URL_QUICK_CHECK.is_match("This is a URL: https://example.com"));
645        assert!(!URL_QUICK_CHECK.is_match("This has no URL"));
646    }
647
648    #[test]
649    fn test_multiple_badges_and_links_on_one_line() {
650        let rule = MD034NoBareUrls;
651        let content = "# [React](https://react.dev/) \
652&middot; [![GitHub license](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/facebook/react/blob/main/LICENSE) \
653[![npm version](https://img.shields.io/npm/v/react.svg?style=flat)](https://www.npmjs.com/package/react) \
654[![(Runtime) Build and Test](https://github.com/facebook/react/actions/workflows/runtime_build_and_test.yml/badge.svg)](https://github.com/facebook/react/actions/workflows/runtime_build_and_test.yml) \
655[![(Compiler) TypeScript](https://github.com/facebook/react/actions/workflows/compiler_typescript.yml/badge.svg?branch=main)](https://github.com/facebook/react/actions/workflows/compiler_typescript.yml) \
656[![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](https://legacy.reactjs.org/docs/how-to-contribute.html#your-first-pull-request)";
657        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
658        let result = rule.check(&ctx).unwrap();
659        if !result.is_empty() {
660            log::debug!("MD034 warnings: {result:#?}");
661        }
662        assert!(
663            result.is_empty(),
664            "Multiple badges and links on one line should not be flagged as bare URLs"
665        );
666    }
667
668    #[test]
669    fn test_bare_urls() {
670        let rule = MD034NoBareUrls;
671        let content = "This is a bare URL: https://example.com/foobar";
672        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
673        let result = rule.check(&ctx).unwrap();
674        assert_eq!(result.len(), 1, "Bare URLs should be flagged");
675        assert_eq!(result[0].line, 1);
676        assert_eq!(result[0].column, 21);
677    }
678
679    #[test]
680    fn test_md034_performance_baseline() {
681        use std::time::Instant;
682
683        // Generate test content with various URL patterns
684        let mut content = String::with_capacity(50_000);
685
686        // Add content with bare URLs (should be detected)
687        for i in 0..250 {
688            content.push_str(&format!("Line {i} with bare URL https://example{i}.com/path\n"));
689        }
690
691        // Add content with proper markdown links (should not be detected)
692        for i in 0..250 {
693            content.push_str(&format!(
694                "Line {} with [proper link](https://example{}.com/path)\n",
695                i + 250,
696                i
697            ));
698        }
699
700        // Add content with no URLs (should be fast)
701        for i in 0..500 {
702            content.push_str(&format!("Line {} with no URLs, just regular text content\n", i + 500));
703        }
704
705        // Add content with emails
706        for i in 0..100 {
707            content.push_str(&format!("Contact user{i}@example{i}.com for more info\n"));
708        }
709
710        println!(
711            "MD034 Performance Test - Content: {} bytes, {} lines",
712            content.len(),
713            content.lines().count()
714        );
715
716        let rule = MD034NoBareUrls;
717        let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
718
719        // Warm up
720        let _ = rule.check(&ctx).unwrap();
721
722        // Measure check performance (more runs for accuracy)
723        let mut total_duration = std::time::Duration::ZERO;
724        let runs = 10;
725        let mut warnings_count = 0;
726
727        for _ in 0..runs {
728            let start = Instant::now();
729            let warnings = rule.check(&ctx).unwrap();
730            total_duration += start.elapsed();
731            warnings_count = warnings.len();
732        }
733
734        let avg_check_duration = total_duration / runs;
735
736        println!("MD034 Optimized Performance:");
737        println!(
738            "- Average check time: {:?} ({:.2} ms)",
739            avg_check_duration,
740            avg_check_duration.as_secs_f64() * 1000.0
741        );
742        println!("- Found {warnings_count} warnings");
743        println!(
744            "- Lines per second: {:.0}",
745            content.lines().count() as f64 / avg_check_duration.as_secs_f64()
746        );
747        println!(
748            "- Microseconds per line: {:.2}",
749            avg_check_duration.as_micros() as f64 / content.lines().count() as f64
750        );
751
752        // Performance assertion - should complete reasonably fast
753        assert!(
754            avg_check_duration.as_millis() < 100,
755            "MD034 check should complete in under 100ms, took {}ms",
756            avg_check_duration.as_millis()
757        );
758
759        // Verify we're finding the expected number of warnings
760        assert_eq!(warnings_count, 350, "Should find 250 URLs + 100 emails = 350 warnings");
761    }
762}
rumdl_lib/rules/md034_no_bare_urls.rs

rumdl_lib/rules/
md034_no_bare_urls.rs