rumdl_lib/rules/
md034_no_bare_urls.rs

1/// Rule MD034: No unformatted URLs
2///
3/// See [docs/md034.md](../../docs/md034.md) for full documentation, configuration, and examples.
4use std::sync::LazyLock;
5
6use regex::Regex;
7
8use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
9use crate::utils::range_utils::{LineIndex, calculate_url_range};
10use crate::utils::regex_cache::{
11    EMAIL_PATTERN, URL_IPV6_REGEX, URL_QUICK_CHECK_REGEX, URL_STANDARD_REGEX, URL_WWW_REGEX, XMPP_URI_REGEX,
12};
13
14use crate::filtered_lines::FilteredLinesExt;
15use crate::lint_context::LintContext;
16
17// MD034-specific pre-compiled regex patterns for markdown constructs
18static CUSTOM_PROTOCOL_REGEX: LazyLock<Regex> = LazyLock::new(|| {
19    Regex::new(r#"(?:grpc|ws|wss|ssh|git|svn|file|data|javascript|vscode|chrome|about|slack|discord|matrix|irc|redis|mongodb|postgresql|mysql|kafka|nats|amqp|mqtt|custom|app|api|service)://"#).unwrap()
20});
21static MARKDOWN_LINK_REGEX: LazyLock<Regex> = LazyLock::new(|| {
22    Regex::new(r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#).unwrap()
23});
24static MARKDOWN_EMPTY_LINK_REGEX: LazyLock<Regex> =
25    LazyLock::new(|| Regex::new(r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(\)"#).unwrap());
26static MARKDOWN_EMPTY_REF_REGEX: LazyLock<Regex> =
27    LazyLock::new(|| Regex::new(r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\[\]"#).unwrap());
28static ANGLE_LINK_REGEX: LazyLock<Regex> = LazyLock::new(|| {
29    Regex::new(
30        r#"<((?:https?|ftps?)://(?:\[[0-9a-fA-F:]+(?:%[a-zA-Z0-9]+)?\]|[^>]+)|xmpp:[^>]+|[^@\s]+@[^@\s]+\.[^@\s>]+)>"#,
31    )
32    .unwrap()
33});
34static BADGE_LINK_LINE_REGEX: LazyLock<Regex> =
35    LazyLock::new(|| Regex::new(r#"^\s*\[!\[[^\]]*\]\([^)]*\)\]\([^)]*\)\s*$"#).unwrap());
36static MARKDOWN_IMAGE_REGEX: LazyLock<Regex> =
37    LazyLock::new(|| Regex::new(r#"!\s*\[([^\]]*)\]\s*\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#).unwrap());
38static REFERENCE_DEF_REGEX: LazyLock<Regex> =
39    LazyLock::new(|| Regex::new(r"^\s*\[[^\]]+\]:\s*(?:<|(?:https?|ftps?)://)").unwrap());
40static MULTILINE_LINK_CONTINUATION_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"^[^\[]*\]\(.*\)"#).unwrap());
41static SHORTCUT_REF_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"\[([^\[\]]+)\]"#).unwrap());
42
43/// Reusable buffers for check_line to reduce allocations
44#[derive(Default)]
45struct LineCheckBuffers {
46    markdown_link_ranges: Vec<(usize, usize)>,
47    image_ranges: Vec<(usize, usize)>,
48    urls_found: Vec<(usize, usize, String)>,
49}
50
51#[derive(Default, Clone)]
52pub struct MD034NoBareUrls;
53
54impl MD034NoBareUrls {
55    #[inline]
56    pub fn should_skip_content(&self, content: &str) -> bool {
57        // Skip if content has no URLs, XMPP URIs, or email addresses
58        // Fast byte scanning for common URL/email/xmpp indicators
59        let bytes = content.as_bytes();
60        let has_colon = bytes.contains(&b':');
61        let has_at = bytes.contains(&b'@');
62        let has_www = content.contains("www.");
63        !has_colon && !has_at && !has_www
64    }
65
66    /// Remove trailing punctuation that is likely sentence punctuation, not part of the URL
67    fn trim_trailing_punctuation<'a>(&self, url: &'a str) -> &'a str {
68        let mut trimmed = url;
69
70        // Check for balanced parentheses - if we have unmatched closing parens, they're likely punctuation
71        let open_parens = url.chars().filter(|&c| c == '(').count();
72        let close_parens = url.chars().filter(|&c| c == ')').count();
73
74        if close_parens > open_parens {
75            // Find the last balanced closing paren position
76            let mut balance = 0;
77            let mut last_balanced_pos = url.len();
78
79            for (byte_idx, c) in url.char_indices() {
80                if c == '(' {
81                    balance += 1;
82                } else if c == ')' {
83                    balance -= 1;
84                    if balance < 0 {
85                        // Found an unmatched closing paren
86                        last_balanced_pos = byte_idx;
87                        break;
88                    }
89                }
90            }
91
92            trimmed = &trimmed[..last_balanced_pos];
93        }
94
95        // Trim specific punctuation only if not followed by more URL-like chars
96        while let Some(last_char) = trimmed.chars().last() {
97            if matches!(last_char, '.' | ',' | ';' | ':' | '!' | '?') {
98                // Check if this looks like it could be part of the URL
99                // For ':' specifically, keep it if followed by digits (port number)
100                if last_char == ':' && trimmed.len() > 1 {
101                    // Don't trim
102                    break;
103                }
104                trimmed = &trimmed[..trimmed.len() - 1];
105            } else {
106                break;
107            }
108        }
109
110        trimmed
111    }
112
113    /// Check if line is inside a reference definition
114    fn is_reference_definition(&self, line: &str) -> bool {
115        REFERENCE_DEF_REGEX.is_match(line)
116    }
117
118    fn check_line(
119        &self,
120        line: &str,
121        ctx: &LintContext,
122        line_number: usize,
123        code_spans: &[crate::lint_context::CodeSpan],
124        buffers: &mut LineCheckBuffers,
125        line_index: &LineIndex,
126    ) -> Vec<LintWarning> {
127        let mut warnings = Vec::new();
128
129        // Skip reference definitions
130        if self.is_reference_definition(line) {
131            return warnings;
132        }
133
134        // Skip lines inside HTML blocks - URLs in HTML attributes should not be linted
135        if ctx.line_info(line_number).is_some_and(|info| info.in_html_block) {
136            return warnings;
137        }
138
139        // Skip lines that are continuations of multiline markdown links
140        // Pattern: text](url) without a leading [
141        if MULTILINE_LINK_CONTINUATION_REGEX.is_match(line) {
142            return warnings;
143        }
144
145        // Quick check - does this line potentially have a URL or email?
146        let has_quick_check = URL_QUICK_CHECK_REGEX.is_match(line);
147        let has_www = line.contains("www.");
148        let has_at = line.contains('@');
149
150        if !has_quick_check && !has_at && !has_www {
151            return warnings;
152        }
153
154        // Clear and reuse buffers instead of allocating new ones
155        buffers.markdown_link_ranges.clear();
156        buffers.image_ranges.clear();
157
158        let has_bracket = line.contains('[');
159        let has_angle = line.contains('<');
160        let has_bang = line.contains('!');
161
162        if has_bracket {
163            for mat in MARKDOWN_LINK_REGEX.find_iter(line) {
164                buffers.markdown_link_ranges.push((mat.start(), mat.end()));
165            }
166
167            // Also include empty link patterns like [text]() and [text][]
168            for mat in MARKDOWN_EMPTY_LINK_REGEX.find_iter(line) {
169                buffers.markdown_link_ranges.push((mat.start(), mat.end()));
170            }
171
172            for mat in MARKDOWN_EMPTY_REF_REGEX.find_iter(line) {
173                buffers.markdown_link_ranges.push((mat.start(), mat.end()));
174            }
175
176            // Also exclude shortcut reference links like [URL]
177            for mat in SHORTCUT_REF_REGEX.find_iter(line) {
178                let end = mat.end();
179                let next_non_ws = line[end..].bytes().find(|b| !b.is_ascii_whitespace());
180                if next_non_ws == Some(b'(') || next_non_ws == Some(b'[') {
181                    continue;
182                }
183                buffers.markdown_link_ranges.push((mat.start(), mat.end()));
184            }
185
186            // Check if this line contains only a badge link (common pattern)
187            if has_bang && BADGE_LINK_LINE_REGEX.is_match(line) {
188                return warnings;
189            }
190        }
191
192        if has_angle {
193            for mat in ANGLE_LINK_REGEX.find_iter(line) {
194                buffers.markdown_link_ranges.push((mat.start(), mat.end()));
195            }
196        }
197
198        // Find all markdown images for exclusion
199        if has_bang && has_bracket {
200            for mat in MARKDOWN_IMAGE_REGEX.find_iter(line) {
201                buffers.image_ranges.push((mat.start(), mat.end()));
202            }
203        }
204
205        // Find bare URLs
206        buffers.urls_found.clear();
207
208        // First, find IPv6 URLs (they need special handling)
209        for mat in URL_IPV6_REGEX.find_iter(line) {
210            let url_str = mat.as_str();
211            buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
212        }
213
214        // Then find regular URLs
215        for mat in URL_STANDARD_REGEX.find_iter(line) {
216            let url_str = mat.as_str();
217
218            // Skip if it's an IPv6 URL (already handled)
219            if url_str.contains("://[") {
220                continue;
221            }
222
223            // Skip malformed IPv6-like URLs
224            // Check for IPv6-like patterns that are malformed
225            if let Some(host_start) = url_str.find("://") {
226                let after_protocol = &url_str[host_start + 3..];
227                // If it looks like IPv6 (has :: or multiple :) but no brackets, skip if followed by ]
228                if after_protocol.contains("::") || after_protocol.chars().filter(|&c| c == ':').count() > 1 {
229                    // Check if the next byte after our match is ] (ASCII, so byte check is safe)
230                    if line.as_bytes().get(mat.end()) == Some(&b']') {
231                        // This is likely a malformed IPv6 URL like "https://::1]:8080"
232                        continue;
233                    }
234                }
235            }
236
237            buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
238        }
239
240        // Find www URLs without protocol (e.g., www.example.com)
241        for mat in URL_WWW_REGEX.find_iter(line) {
242            let url_str = mat.as_str();
243            let start_pos = mat.start();
244            let end_pos = mat.end();
245
246            // Skip if preceded by / or @ (likely part of a full URL)
247            if start_pos > 0 {
248                let prev_char = line.as_bytes().get(start_pos - 1).copied();
249                if prev_char == Some(b'/') || prev_char == Some(b'@') {
250                    continue;
251                }
252            }
253
254            // Skip if inside angle brackets (autolink syntax like <www.example.com>)
255            if start_pos > 0 && end_pos < line.len() {
256                let prev_char = line.as_bytes().get(start_pos - 1).copied();
257                let next_char = line.as_bytes().get(end_pos).copied();
258                if prev_char == Some(b'<') && next_char == Some(b'>') {
259                    continue;
260                }
261            }
262
263            buffers.urls_found.push((start_pos, end_pos, url_str.to_string()));
264        }
265
266        // Find XMPP URIs (GFM extended autolinks: xmpp:user@domain/resource)
267        for mat in XMPP_URI_REGEX.find_iter(line) {
268            let uri_str = mat.as_str();
269            let start_pos = mat.start();
270            let end_pos = mat.end();
271
272            // Skip if inside angle brackets (already properly formatted: <xmpp:user@domain>)
273            if start_pos > 0 && end_pos < line.len() {
274                let prev_char = line.as_bytes().get(start_pos - 1).copied();
275                let next_char = line.as_bytes().get(end_pos).copied();
276                if prev_char == Some(b'<') && next_char == Some(b'>') {
277                    continue;
278                }
279            }
280
281            buffers.urls_found.push((start_pos, end_pos, uri_str.to_string()));
282        }
283
284        // Process found URLs
285        for &(start, _end, ref url_str) in &buffers.urls_found {
286            // Skip custom protocols
287            if CUSTOM_PROTOCOL_REGEX.is_match(url_str) {
288                continue;
289            }
290
291            // Check if this URL is inside a markdown link, angle bracket, or image
292            // We check if the URL starts within a construct, not if it's entirely contained.
293            // This handles cases where URL detection may include trailing characters
294            // that extend past the construct boundary (e.g., parentheses).
295            // Linear scan is correct here because ranges can overlap/nest (e.g., [[1]](url))
296            let is_inside_construct = buffers
297                .markdown_link_ranges
298                .iter()
299                .any(|&(s, e)| start >= s && start < e)
300                || buffers.image_ranges.iter().any(|&(s, e)| start >= s && start < e);
301
302            if is_inside_construct {
303                continue;
304            }
305
306            // Calculate absolute byte position for context-aware checks
307            let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
308            let absolute_pos = line_start_byte + start;
309
310            // Check if URL is inside an HTML tag (handles multiline tags correctly)
311            if ctx.is_in_html_tag(absolute_pos) {
312                continue;
313            }
314
315            // Check if we're inside an HTML comment
316            if ctx.is_in_html_comment(absolute_pos) || ctx.is_in_mdx_comment(absolute_pos) {
317                continue;
318            }
319
320            // Check if we're inside a Hugo/Quarto shortcode
321            if ctx.is_in_shortcode(absolute_pos) {
322                continue;
323            }
324
325            // Clean up the URL by removing trailing punctuation
326            let trimmed_url = self.trim_trailing_punctuation(url_str);
327
328            // Only report if we have a valid URL after trimming
329            if !trimmed_url.is_empty() && trimmed_url != "//" {
330                let trimmed_len = trimmed_url.len();
331                let (start_line, start_col, end_line, end_col) =
332                    calculate_url_range(line_number, line, start, trimmed_len);
333
334                // For www URLs without protocol, add https:// prefix in the fix
335                let replacement = if trimmed_url.starts_with("www.") {
336                    format!("<https://{trimmed_url}>")
337                } else {
338                    format!("<{trimmed_url}>")
339                };
340
341                warnings.push(LintWarning {
342                    rule_name: Some("MD034".to_string()),
343                    line: start_line,
344                    column: start_col,
345                    end_line,
346                    end_column: end_col,
347                    message: format!("URL without angle brackets or link formatting: '{trimmed_url}'"),
348                    severity: Severity::Warning,
349                    fix: Some(Fix::new(
350                        {
351                            let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
352                            (line_start_byte + start)..(line_start_byte + start + trimmed_len)
353                        },
354                        replacement,
355                    )),
356                });
357            }
358        }
359
360        // Check for bare email addresses
361        for cap in EMAIL_PATTERN.captures_iter(line) {
362            if let Some(mat) = cap.get(0) {
363                let email = mat.as_str();
364                let start = mat.start();
365                let end = mat.end();
366
367                // Skip if email is part of an XMPP URI (xmpp:user@domain)
368                // Check character boundary to avoid panics with multi-byte UTF-8
369                if start >= 5 && line.is_char_boundary(start - 5) && &line[start - 5..start] == "xmpp:" {
370                    continue;
371                }
372
373                // Check if email is inside angle brackets or markdown link
374                let mut is_inside_construct = false;
375                for &(link_start, link_end) in &buffers.markdown_link_ranges {
376                    if start >= link_start && end <= link_end {
377                        is_inside_construct = true;
378                        break;
379                    }
380                }
381
382                if !is_inside_construct {
383                    // Calculate absolute byte position for context-aware checks
384                    let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
385                    let absolute_pos = line_start_byte + start;
386
387                    // Check if email is inside an HTML tag (handles multiline tags)
388                    if ctx.is_in_html_tag(absolute_pos) {
389                        continue;
390                    }
391
392                    // Check if email is inside a code span (byte offsets handle multi-line spans)
393                    let is_in_code_span = code_spans
394                        .iter()
395                        .any(|span| absolute_pos >= span.byte_offset && absolute_pos < span.byte_end);
396
397                    if !is_in_code_span {
398                        let email_len = end - start;
399                        let (start_line, start_col, end_line, end_col) =
400                            calculate_url_range(line_number, line, start, email_len);
401
402                        warnings.push(LintWarning {
403                            rule_name: Some("MD034".to_string()),
404                            line: start_line,
405                            column: start_col,
406                            end_line,
407                            end_column: end_col,
408                            message: format!("Email address without angle brackets or link formatting: '{email}'"),
409                            severity: Severity::Warning,
410                            fix: Some(Fix::new(
411                                (line_start_byte + start)..(line_start_byte + end),
412                                format!("<{email}>"),
413                            )),
414                        });
415                    }
416                }
417            }
418        }
419
420        warnings
421    }
422}
423
424impl Rule for MD034NoBareUrls {
425    #[inline]
426    fn name(&self) -> &'static str {
427        "MD034"
428    }
429
430    fn as_any(&self) -> &dyn std::any::Any {
431        self
432    }
433
434    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
435    where
436        Self: Sized,
437    {
438        Box::new(MD034NoBareUrls)
439    }
440
441    #[inline]
442    fn category(&self) -> RuleCategory {
443        RuleCategory::Link
444    }
445
446    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
447        !ctx.likely_has_links_or_images() && self.should_skip_content(ctx.content)
448    }
449
450    #[inline]
451    fn description(&self) -> &'static str {
452        "No bare URLs - wrap URLs in angle brackets"
453    }
454
455    fn check(&self, ctx: &LintContext) -> LintResult {
456        let mut warnings = Vec::new();
457        let content = ctx.content;
458
459        // Quick skip for content without URLs
460        if self.should_skip_content(content) {
461            return Ok(warnings);
462        }
463
464        // Create LineIndex for correct byte position calculations across all line ending types
465        let line_index = &ctx.line_index;
466
467        // Get code spans for exclusion
468        let code_spans = ctx.code_spans();
469
470        // Allocate reusable buffers once instead of per-line to reduce allocations
471        let mut buffers = LineCheckBuffers::default();
472
473        // Iterate over content lines, automatically skipping front matter, code blocks,
474        // and Obsidian comments (when in Obsidian flavor)
475        // This uses the filtered iterator API which centralizes the skip logic
476        for line in ctx
477            .filtered_lines()
478            .skip_front_matter()
479            .skip_code_blocks()
480            .skip_jsx_expressions()
481            .skip_mdx_comments()
482            .skip_obsidian_comments()
483        {
484            let mut line_warnings =
485                self.check_line(line.content, ctx, line.line_num, &code_spans, &mut buffers, line_index);
486
487            // Filter out warnings that are inside code spans (handles multi-line spans via byte offsets)
488            line_warnings.retain(|warning| {
489                !code_spans.iter().any(|span| {
490                    if let Some(fix) = &warning.fix {
491                        // Byte-offset check handles both single-line and multi-line code spans
492                        fix.range.start >= span.byte_offset && fix.range.start < span.byte_end
493                    } else {
494                        span.line == warning.line
495                            && span.end_line == warning.line
496                            && warning.column > 0
497                            && (warning.column - 1) >= span.start_col
498                            && (warning.column - 1) < span.end_col
499                    }
500                })
501            });
502
503            // Filter out warnings where the URL is inside a parsed link
504            // This handles cases like [text]( https://url ) where the URL has leading whitespace
505            // pulldown-cmark correctly parses these as valid links even though our regex misses them
506            line_warnings.retain(|warning| {
507                if let Some(fix) = &warning.fix {
508                    // Check if the fix range falls inside any parsed link's byte range
509                    !ctx.links
510                        .iter()
511                        .any(|link| fix.range.start >= link.byte_offset && fix.range.end <= link.byte_end)
512                } else {
513                    true
514                }
515            });
516
517            // Filter out warnings where the URL is inside an Obsidian comment (%%...%%)
518            // This handles inline comments like: text %%https://hidden.com%% text
519            line_warnings.retain(|warning| !ctx.is_position_in_obsidian_comment(warning.line, warning.column));
520
521            warnings.extend(line_warnings);
522        }
523
524        Ok(warnings)
525    }
526
527    fn fix(&self, ctx: &LintContext) -> Result<String, LintError> {
528        let mut content = ctx.content.to_string();
529        let warnings = self.check(ctx)?;
530        let mut warnings =
531            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
532
533        // Sort warnings by position to ensure consistent fix application
534        warnings.sort_by_key(|w| w.fix.as_ref().map_or(0, |f| f.range.start));
535
536        // Apply fixes in reverse order to maintain positions
537        for warning in warnings.iter().rev() {
538            if let Some(fix) = &warning.fix {
539                let start = fix.range.start;
540                let end = fix.range.end;
541                content.replace_range(start..end, &fix.replacement);
542            }
543        }
544
545        Ok(content)
546    }
547}
548
549#[cfg(test)]
550mod tests {
551    use super::*;
552
553    #[test]
554    fn test_shortcut_ref_at_end_of_line_no_trailing_chars() {
555        let rule = MD034NoBareUrls;
556        let content = "See [https://example.com]";
557        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
558        let result = rule.check(&ctx).unwrap();
559        assert!(
560            result.is_empty(),
561            "[URL] at end of line should be treated as shortcut ref: {result:?}"
562        );
563    }
564
565    #[test]
566    fn test_shortcut_ref_multiple_spaces_before_paren() {
567        let rule = MD034NoBareUrls;
568        let content = "[text]  (https://example.com)";
569        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
570        let result = rule.check(&ctx).unwrap();
571        // [text]  (url) — the spaces between ] and ( mean this should be treated
572        // as shortcut ref then bare parens, NOT a markdown link. URL may still be bare.
573        // This test verifies consistent behavior with the FancyRegex that had (?!\s*[\[(])
574        let _ = result; // Just verify no panic; the exact warning count depends on other rules
575    }
576
577    #[test]
578    fn test_shortcut_ref_tab_before_bracket() {
579        let rule = MD034NoBareUrls;
580        let content = "[https://example.com]\t[other]";
581        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
582        let result = rule.check(&ctx).unwrap();
583        // Tab between ] and [ does not form a full reference link in Markdown.
584        // The first [URL] is a shortcut ref containing a bare URL, so MD034 warns.
585        // This test verifies consistent behavior and no panic with tab characters.
586        assert_eq!(
587            result.len(),
588            1,
589            "Bare URL inside shortcut ref should be detected: {result:?}"
590        );
591    }
592
593    #[test]
594    fn test_shortcut_ref_followed_by_punctuation() {
595        let rule = MD034NoBareUrls;
596        let content = "[https://example.com], see also other things.";
597        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
598        let result = rule.check(&ctx).unwrap();
599        assert!(
600            result.is_empty(),
601            "[URL] followed by comma should be treated as shortcut ref: {result:?}"
602        );
603    }
604
605    #[test]
606    fn test_url_in_backticks_inside_mdx_component_not_flagged() {
607        // Exact reproduction from issue #572: URL inside inline code within an MDX
608        // component body must not be flagged. The same URL in backticks outside the
609        // component is already handled correctly and serves as a control.
610        let rule = MD034NoBareUrls;
611        let content = "# Test\n\nControl: `https://rumdl.example.com/` is fine here.\n\n<ParamField path=\"--stuff\">\n  This URL `https://rumdl.example.com/` must not be flagged.\n</ParamField>\n";
612        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
613        let result = rule.check(&ctx).unwrap();
614        assert!(
615            result.is_empty(),
616            "URL in backticks inside MDX component must not be flagged: {result:?}"
617        );
618    }
619
620    #[test]
621    fn test_bare_url_inside_mdx_component_still_flagged() {
622        // A bare URL (not in backticks) inside an MDX component body must still be flagged.
623        // This ensures the fix for issue #572 only suppresses properly code-spanned URLs.
624        let rule = MD034NoBareUrls;
625        let content =
626            "# Test\n\n<ParamField path=\"--stuff\">\n  Visit https://rumdl.example.com/ for details.\n</ParamField>\n";
627        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
628        let result = rule.check(&ctx).unwrap();
629        assert_eq!(
630            result.len(),
631            1,
632            "Bare URL in MDX component body must still be flagged: {result:?}"
633        );
634    }
635
636    #[test]
637    fn test_url_in_backticks_inside_nested_mdx_component_not_flagged() {
638        // Nested MDX components must also respect code spans.
639        let rule = MD034NoBareUrls;
640        let content = "<Outer>\n  <Inner>\n    Check `https://example.com/` here.\n  </Inner>\n</Outer>\n";
641        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
642        let result = rule.check(&ctx).unwrap();
643        assert!(
644            result.is_empty(),
645            "URL in backticks inside nested MDX component must not be flagged: {result:?}"
646        );
647    }
648
649    #[test]
650    fn test_url_in_backticks_after_fenced_code_block_inside_mdx_not_flagged() {
651        // A fenced code block inside a JSX component must not misalign the code-span
652        // offset map. The URL in backticks that appears *after* the code block must
653        // still be recognised as being inside a code span.
654        let rule = MD034NoBareUrls;
655        let content = "\
656<Component>
657Some intro text.
658
659```
660example code here
661```
662
663Check `https://example.com/` here.
664</Component>
665";
666        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
667        let result = rule.check(&ctx).unwrap();
668        assert!(
669            result.is_empty(),
670            "URL in backticks after a fenced code block inside MDX must not be flagged: {result:?}"
671        );
672    }
673}
rumdl_lib/rules/md034_no_bare_urls.rs

rumdl_lib/rules/
md034_no_bare_urls.rs