rumdl_lib/rules/
md034_no_bare_urls.rs

1/// Rule MD034: No unformatted URLs
2///
3/// See [docs/md034.md](../../docs/md034.md) for full documentation, configuration, and examples.
4use std::sync::LazyLock;
5
6use regex::Regex;
7
8use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
9use crate::utils::range_utils::{LineIndex, calculate_url_range};
10use crate::utils::regex_cache::{
11    EMAIL_PATTERN, URL_IPV6_REGEX, URL_QUICK_CHECK_REGEX, URL_STANDARD_REGEX, URL_WWW_REGEX, XMPP_URI_REGEX,
12};
13
14use crate::filtered_lines::FilteredLinesExt;
15use crate::lint_context::LintContext;
16
17// MD034-specific pre-compiled regex patterns for markdown constructs
18static CUSTOM_PROTOCOL_REGEX: LazyLock<Regex> = LazyLock::new(|| {
19    Regex::new(r#"(?:grpc|ws|wss|ssh|git|svn|file|data|javascript|vscode|chrome|about|slack|discord|matrix|irc|redis|mongodb|postgresql|mysql|kafka|nats|amqp|mqtt|custom|app|api|service)://"#).unwrap()
20});
21static MARKDOWN_LINK_REGEX: LazyLock<Regex> = LazyLock::new(|| {
22    Regex::new(r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#).unwrap()
23});
24static MARKDOWN_EMPTY_LINK_REGEX: LazyLock<Regex> =
25    LazyLock::new(|| Regex::new(r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(\)"#).unwrap());
26static MARKDOWN_EMPTY_REF_REGEX: LazyLock<Regex> =
27    LazyLock::new(|| Regex::new(r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\[\]"#).unwrap());
28static ANGLE_LINK_REGEX: LazyLock<Regex> = LazyLock::new(|| {
29    Regex::new(
30        r#"<((?:https?|ftps?)://(?:\[[0-9a-fA-F:]+(?:%[a-zA-Z0-9]+)?\]|[^>]+)|xmpp:[^>]+|[^@\s]+@[^@\s]+\.[^@\s>]+)>"#,
31    )
32    .unwrap()
33});
34static BADGE_LINK_LINE_REGEX: LazyLock<Regex> =
35    LazyLock::new(|| Regex::new(r#"^\s*\[!\[[^\]]*\]\([^)]*\)\]\([^)]*\)\s*$"#).unwrap());
36static MARKDOWN_IMAGE_REGEX: LazyLock<Regex> =
37    LazyLock::new(|| Regex::new(r#"!\s*\[([^\]]*)\]\s*\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#).unwrap());
38static REFERENCE_DEF_REGEX: LazyLock<Regex> =
39    LazyLock::new(|| Regex::new(r"^\s*\[[^\]]+\]:\s*(?:<|(?:https?|ftps?)://)").unwrap());
40static MULTILINE_LINK_CONTINUATION_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"^[^\[]*\]\(.*\)"#).unwrap());
41static SHORTCUT_REF_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"\[([^\[\]]+)\]"#).unwrap());
42
43/// Reusable buffers for check_line to reduce allocations
44#[derive(Default)]
45struct LineCheckBuffers {
46    markdown_link_ranges: Vec<(usize, usize)>,
47    image_ranges: Vec<(usize, usize)>,
48    urls_found: Vec<(usize, usize, String)>,
49}
50
51#[derive(Default, Clone)]
52pub struct MD034NoBareUrls;
53
54impl MD034NoBareUrls {
55    #[inline]
56    pub fn should_skip_content(&self, content: &str) -> bool {
57        // Skip if content has no URLs, XMPP URIs, or email addresses
58        // Fast byte scanning for common URL/email/xmpp indicators
59        let bytes = content.as_bytes();
60        let has_colon = bytes.contains(&b':');
61        let has_at = bytes.contains(&b'@');
62        let has_www = content.contains("www.");
63        !has_colon && !has_at && !has_www
64    }
65
66    /// Remove trailing punctuation that is likely sentence punctuation, not part of the URL
67    fn trim_trailing_punctuation<'a>(&self, url: &'a str) -> &'a str {
68        let mut trimmed = url;
69
70        // Check for balanced parentheses - if we have unmatched closing parens, they're likely punctuation
71        let open_parens = url.chars().filter(|&c| c == '(').count();
72        let close_parens = url.chars().filter(|&c| c == ')').count();
73
74        if close_parens > open_parens {
75            // Find the last balanced closing paren position
76            let mut balance = 0;
77            let mut last_balanced_pos = url.len();
78
79            for (byte_idx, c) in url.char_indices() {
80                if c == '(' {
81                    balance += 1;
82                } else if c == ')' {
83                    balance -= 1;
84                    if balance < 0 {
85                        // Found an unmatched closing paren
86                        last_balanced_pos = byte_idx;
87                        break;
88                    }
89                }
90            }
91
92            trimmed = &trimmed[..last_balanced_pos];
93        }
94
95        // Trim specific punctuation only if not followed by more URL-like chars
96        while let Some(last_char) = trimmed.chars().last() {
97            if matches!(last_char, '.' | ',' | ';' | ':' | '!' | '?') {
98                // Check if this looks like it could be part of the URL
99                // For ':' specifically, keep it if followed by digits (port number)
100                if last_char == ':' && trimmed.len() > 1 {
101                    // Don't trim
102                    break;
103                }
104                trimmed = &trimmed[..trimmed.len() - 1];
105            } else {
106                break;
107            }
108        }
109
110        trimmed
111    }
112
113    /// Check if line is inside a reference definition
114    fn is_reference_definition(&self, line: &str) -> bool {
115        REFERENCE_DEF_REGEX.is_match(line)
116    }
117
118    fn check_line(
119        &self,
120        line: &str,
121        ctx: &LintContext,
122        line_number: usize,
123        code_spans: &[crate::lint_context::CodeSpan],
124        buffers: &mut LineCheckBuffers,
125        line_index: &LineIndex,
126    ) -> Vec<LintWarning> {
127        let mut warnings = Vec::new();
128
129        // Skip reference definitions
130        if self.is_reference_definition(line) {
131            return warnings;
132        }
133
134        // Skip lines inside HTML blocks - URLs in HTML attributes should not be linted
135        if ctx.line_info(line_number).is_some_and(|info| info.in_html_block) {
136            return warnings;
137        }
138
139        // Skip lines that are continuations of multiline markdown links
140        // Pattern: text](url) without a leading [
141        if MULTILINE_LINK_CONTINUATION_REGEX.is_match(line) {
142            return warnings;
143        }
144
145        // Quick check - does this line potentially have a URL or email?
146        let has_quick_check = URL_QUICK_CHECK_REGEX.is_match(line);
147        let has_www = line.contains("www.");
148        let has_at = line.contains('@');
149
150        if !has_quick_check && !has_at && !has_www {
151            return warnings;
152        }
153
154        // Clear and reuse buffers instead of allocating new ones
155        buffers.markdown_link_ranges.clear();
156        buffers.image_ranges.clear();
157
158        let has_bracket = line.contains('[');
159        let has_angle = line.contains('<');
160        let has_bang = line.contains('!');
161
162        if has_bracket {
163            for mat in MARKDOWN_LINK_REGEX.find_iter(line) {
164                buffers.markdown_link_ranges.push((mat.start(), mat.end()));
165            }
166
167            // Also include empty link patterns like [text]() and [text][]
168            for mat in MARKDOWN_EMPTY_LINK_REGEX.find_iter(line) {
169                buffers.markdown_link_ranges.push((mat.start(), mat.end()));
170            }
171
172            for mat in MARKDOWN_EMPTY_REF_REGEX.find_iter(line) {
173                buffers.markdown_link_ranges.push((mat.start(), mat.end()));
174            }
175
176            // Also exclude shortcut reference links like [URL]
177            for mat in SHORTCUT_REF_REGEX.find_iter(line) {
178                let end = mat.end();
179                let next_non_ws = line[end..].bytes().find(|b| !b.is_ascii_whitespace());
180                if next_non_ws == Some(b'(') || next_non_ws == Some(b'[') {
181                    continue;
182                }
183                buffers.markdown_link_ranges.push((mat.start(), mat.end()));
184            }
185
186            // Check if this line contains only a badge link (common pattern)
187            if has_bang && BADGE_LINK_LINE_REGEX.is_match(line) {
188                return warnings;
189            }
190        }
191
192        if has_angle {
193            for mat in ANGLE_LINK_REGEX.find_iter(line) {
194                buffers.markdown_link_ranges.push((mat.start(), mat.end()));
195            }
196        }
197
198        // Find all markdown images for exclusion
199        if has_bang && has_bracket {
200            for mat in MARKDOWN_IMAGE_REGEX.find_iter(line) {
201                buffers.image_ranges.push((mat.start(), mat.end()));
202            }
203        }
204
205        // Find bare URLs
206        buffers.urls_found.clear();
207
208        // First, find IPv6 URLs (they need special handling)
209        for mat in URL_IPV6_REGEX.find_iter(line) {
210            let url_str = mat.as_str();
211            buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
212        }
213
214        // Then find regular URLs
215        for mat in URL_STANDARD_REGEX.find_iter(line) {
216            let url_str = mat.as_str();
217
218            // Skip if it's an IPv6 URL (already handled)
219            if url_str.contains("://[") {
220                continue;
221            }
222
223            // Skip malformed IPv6-like URLs
224            // Check for IPv6-like patterns that are malformed
225            if let Some(host_start) = url_str.find("://") {
226                let after_protocol = &url_str[host_start + 3..];
227                // If it looks like IPv6 (has :: or multiple :) but no brackets, skip if followed by ]
228                if after_protocol.contains("::") || after_protocol.chars().filter(|&c| c == ':').count() > 1 {
229                    // Check if the next byte after our match is ] (ASCII, so byte check is safe)
230                    if line.as_bytes().get(mat.end()) == Some(&b']') {
231                        // This is likely a malformed IPv6 URL like "https://::1]:8080"
232                        continue;
233                    }
234                }
235            }
236
237            buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
238        }
239
240        // Find www URLs without protocol (e.g., www.example.com)
241        for mat in URL_WWW_REGEX.find_iter(line) {
242            let url_str = mat.as_str();
243            let start_pos = mat.start();
244            let end_pos = mat.end();
245
246            // Skip if preceded by / or @ (likely part of a full URL)
247            if start_pos > 0 {
248                let prev_char = line.as_bytes().get(start_pos - 1).copied();
249                if prev_char == Some(b'/') || prev_char == Some(b'@') {
250                    continue;
251                }
252            }
253
254            // Skip if inside angle brackets (autolink syntax like <www.example.com>)
255            if start_pos > 0 && end_pos < line.len() {
256                let prev_char = line.as_bytes().get(start_pos - 1).copied();
257                let next_char = line.as_bytes().get(end_pos).copied();
258                if prev_char == Some(b'<') && next_char == Some(b'>') {
259                    continue;
260                }
261            }
262
263            buffers.urls_found.push((start_pos, end_pos, url_str.to_string()));
264        }
265
266        // Find XMPP URIs (GFM extended autolinks: xmpp:user@domain/resource)
267        for mat in XMPP_URI_REGEX.find_iter(line) {
268            let uri_str = mat.as_str();
269            let start_pos = mat.start();
270            let end_pos = mat.end();
271
272            // Skip if inside angle brackets (already properly formatted: <xmpp:user@domain>)
273            if start_pos > 0 && end_pos < line.len() {
274                let prev_char = line.as_bytes().get(start_pos - 1).copied();
275                let next_char = line.as_bytes().get(end_pos).copied();
276                if prev_char == Some(b'<') && next_char == Some(b'>') {
277                    continue;
278                }
279            }
280
281            buffers.urls_found.push((start_pos, end_pos, uri_str.to_string()));
282        }
283
284        // Process found URLs
285        for &(start, _end, ref url_str) in &buffers.urls_found {
286            // Skip custom protocols
287            if CUSTOM_PROTOCOL_REGEX.is_match(url_str) {
288                continue;
289            }
290
291            // Check if this URL is inside a markdown link, angle bracket, or image
292            // We check if the URL starts within a construct, not if it's entirely contained.
293            // This handles cases where URL detection may include trailing characters
294            // that extend past the construct boundary (e.g., parentheses).
295            // Linear scan is correct here because ranges can overlap/nest (e.g., [[1]](url))
296            let is_inside_construct = buffers
297                .markdown_link_ranges
298                .iter()
299                .any(|&(s, e)| start >= s && start < e)
300                || buffers.image_ranges.iter().any(|&(s, e)| start >= s && start < e);
301
302            if is_inside_construct {
303                continue;
304            }
305
306            // Calculate absolute byte position for context-aware checks
307            let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
308            let absolute_pos = line_start_byte + start;
309
310            // Check if URL is inside an HTML tag (handles multiline tags correctly)
311            if ctx.is_in_html_tag(absolute_pos) {
312                continue;
313            }
314
315            // Check if we're inside an HTML comment
316            if ctx.is_in_html_comment(absolute_pos) || ctx.is_in_mdx_comment(absolute_pos) {
317                continue;
318            }
319
320            // Check if we're inside a Hugo/Quarto shortcode
321            if ctx.is_in_shortcode(absolute_pos) {
322                continue;
323            }
324
325            // Skip URLs inside Pandoc line blocks (`| text`) or YAML metadata blocks.
326            // Both constructs treat their content as literal/structured text where bare
327            // URLs are intentional and should not be reformatted.
328            if ctx.flavor.is_pandoc_compatible()
329                && (ctx.is_in_line_block(absolute_pos) || ctx.is_in_pandoc_metadata(absolute_pos))
330            {
331                continue;
332            }
333
334            // Clean up the URL by removing trailing punctuation
335            let trimmed_url = self.trim_trailing_punctuation(url_str);
336
337            // Only report if we have a valid URL after trimming
338            if !trimmed_url.is_empty() && trimmed_url != "//" {
339                let trimmed_len = trimmed_url.len();
340                let (start_line, start_col, end_line, end_col) =
341                    calculate_url_range(line_number, line, start, trimmed_len);
342
343                // For www URLs without protocol, add https:// prefix in the fix
344                let replacement = if trimmed_url.starts_with("www.") {
345                    format!("<https://{trimmed_url}>")
346                } else {
347                    format!("<{trimmed_url}>")
348                };
349
350                warnings.push(LintWarning {
351                    rule_name: Some("MD034".to_string()),
352                    line: start_line,
353                    column: start_col,
354                    end_line,
355                    end_column: end_col,
356                    message: format!("URL without angle brackets or link formatting: '{trimmed_url}'"),
357                    severity: Severity::Warning,
358                    fix: Some(Fix::new(
359                        {
360                            let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
361                            (line_start_byte + start)..(line_start_byte + start + trimmed_len)
362                        },
363                        replacement,
364                    )),
365                });
366            }
367        }
368
369        // Check for bare email addresses
370        for cap in EMAIL_PATTERN.captures_iter(line) {
371            if let Some(mat) = cap.get(0) {
372                let email = mat.as_str();
373                let start = mat.start();
374                let end = mat.end();
375
376                // Skip if email is part of an XMPP URI (xmpp:user@domain)
377                // Check character boundary to avoid panics with multi-byte UTF-8
378                if start >= 5 && line.is_char_boundary(start - 5) && &line[start - 5..start] == "xmpp:" {
379                    continue;
380                }
381
382                // Check if email is inside angle brackets or markdown link
383                let mut is_inside_construct = false;
384                for &(link_start, link_end) in &buffers.markdown_link_ranges {
385                    if start >= link_start && end <= link_end {
386                        is_inside_construct = true;
387                        break;
388                    }
389                }
390
391                if !is_inside_construct {
392                    // Calculate absolute byte position for context-aware checks
393                    let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
394                    let absolute_pos = line_start_byte + start;
395
396                    // Check if email is inside an HTML tag (handles multiline tags)
397                    if ctx.is_in_html_tag(absolute_pos) {
398                        continue;
399                    }
400
401                    // Skip emails inside Pandoc line blocks or YAML metadata blocks.
402                    if ctx.flavor.is_pandoc_compatible()
403                        && (ctx.is_in_line_block(absolute_pos) || ctx.is_in_pandoc_metadata(absolute_pos))
404                    {
405                        continue;
406                    }
407
408                    // Check if email is inside a code span (byte offsets handle multi-line spans)
409                    let is_in_code_span = code_spans
410                        .iter()
411                        .any(|span| absolute_pos >= span.byte_offset && absolute_pos < span.byte_end);
412
413                    if !is_in_code_span {
414                        let email_len = end - start;
415                        let (start_line, start_col, end_line, end_col) =
416                            calculate_url_range(line_number, line, start, email_len);
417
418                        warnings.push(LintWarning {
419                            rule_name: Some("MD034".to_string()),
420                            line: start_line,
421                            column: start_col,
422                            end_line,
423                            end_column: end_col,
424                            message: format!("Email address without angle brackets or link formatting: '{email}'"),
425                            severity: Severity::Warning,
426                            fix: Some(Fix::new(
427                                (line_start_byte + start)..(line_start_byte + end),
428                                format!("<{email}>"),
429                            )),
430                        });
431                    }
432                }
433            }
434        }
435
436        warnings
437    }
438}
439
440impl Rule for MD034NoBareUrls {
441    #[inline]
442    fn name(&self) -> &'static str {
443        "MD034"
444    }
445
446    fn as_any(&self) -> &dyn std::any::Any {
447        self
448    }
449
450    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
451    where
452        Self: Sized,
453    {
454        Box::new(MD034NoBareUrls)
455    }
456
457    #[inline]
458    fn category(&self) -> RuleCategory {
459        RuleCategory::Link
460    }
461
462    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
463        !ctx.likely_has_links_or_images() && self.should_skip_content(ctx.content)
464    }
465
466    #[inline]
467    fn description(&self) -> &'static str {
468        "No bare URLs - wrap URLs in angle brackets"
469    }
470
471    fn check(&self, ctx: &LintContext) -> LintResult {
472        let mut warnings = Vec::new();
473        let content = ctx.content;
474
475        // Quick skip for content without URLs
476        if self.should_skip_content(content) {
477            return Ok(warnings);
478        }
479
480        // Create LineIndex for correct byte position calculations across all line ending types
481        let line_index = &ctx.line_index;
482
483        // Get code spans for exclusion
484        let code_spans = ctx.code_spans();
485
486        // Allocate reusable buffers once instead of per-line to reduce allocations
487        let mut buffers = LineCheckBuffers::default();
488
489        // Iterate over content lines, automatically skipping front matter, code blocks,
490        // and Obsidian comments (when in Obsidian flavor)
491        // This uses the filtered iterator API which centralizes the skip logic
492        for line in ctx
493            .filtered_lines()
494            .skip_front_matter()
495            .skip_code_blocks()
496            .skip_jsx_expressions()
497            .skip_mdx_comments()
498            .skip_obsidian_comments()
499        {
500            let mut line_warnings =
501                self.check_line(line.content, ctx, line.line_num, &code_spans, &mut buffers, line_index);
502
503            // Filter out warnings that are inside code spans (handles multi-line spans via byte offsets)
504            line_warnings.retain(|warning| {
505                !code_spans.iter().any(|span| {
506                    if let Some(fix) = &warning.fix {
507                        // Byte-offset check handles both single-line and multi-line code spans
508                        fix.range.start >= span.byte_offset && fix.range.start < span.byte_end
509                    } else {
510                        span.line == warning.line
511                            && span.end_line == warning.line
512                            && warning.column > 0
513                            && (warning.column - 1) >= span.start_col
514                            && (warning.column - 1) < span.end_col
515                    }
516                })
517            });
518
519            // Filter out warnings where the URL is inside a parsed link
520            // This handles cases like [text]( https://url ) where the URL has leading whitespace
521            // pulldown-cmark correctly parses these as valid links even though our regex misses them
522            line_warnings.retain(|warning| {
523                if let Some(fix) = &warning.fix {
524                    // Check if the fix range falls inside any parsed link's byte range
525                    !ctx.links
526                        .iter()
527                        .any(|link| fix.range.start >= link.byte_offset && fix.range.end <= link.byte_end)
528                } else {
529                    true
530                }
531            });
532
533            // Filter out warnings where the URL is inside an Obsidian comment (%%...%%)
534            // This handles inline comments like: text %%https://hidden.com%% text
535            line_warnings.retain(|warning| !ctx.is_position_in_obsidian_comment(warning.line, warning.column));
536
537            warnings.extend(line_warnings);
538        }
539
540        Ok(warnings)
541    }
542
543    fn fix(&self, ctx: &LintContext) -> Result<String, LintError> {
544        let mut content = ctx.content.to_string();
545        let warnings = self.check(ctx)?;
546        let mut warnings =
547            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
548
549        // Sort warnings by position to ensure consistent fix application
550        warnings.sort_by_key(|w| w.fix.as_ref().map_or(0, |f| f.range.start));
551
552        // Apply fixes in reverse order to maintain positions
553        for warning in warnings.iter().rev() {
554            if let Some(fix) = &warning.fix {
555                let start = fix.range.start;
556                let end = fix.range.end;
557                content.replace_range(start..end, &fix.replacement);
558            }
559        }
560
561        Ok(content)
562    }
563}
564
565#[cfg(test)]
566mod tests {
567    use super::*;
568
569    #[test]
570    fn test_shortcut_ref_at_end_of_line_no_trailing_chars() {
571        let rule = MD034NoBareUrls;
572        let content = "See [https://example.com]";
573        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
574        let result = rule.check(&ctx).unwrap();
575        assert!(
576            result.is_empty(),
577            "[URL] at end of line should be treated as shortcut ref: {result:?}"
578        );
579    }
580
581    #[test]
582    fn test_shortcut_ref_multiple_spaces_before_paren() {
583        let rule = MD034NoBareUrls;
584        let content = "[text]  (https://example.com)";
585        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
586        let result = rule.check(&ctx).unwrap();
587        // [text]  (url) — the spaces between ] and ( mean this should be treated
588        // as shortcut ref then bare parens, NOT a markdown link. URL may still be bare.
589        // This test verifies consistent behavior with the FancyRegex that had (?!\s*[\[(])
590        let _ = result; // Just verify no panic; the exact warning count depends on other rules
591    }
592
593    #[test]
594    fn test_shortcut_ref_tab_before_bracket() {
595        let rule = MD034NoBareUrls;
596        let content = "[https://example.com]\t[other]";
597        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
598        let result = rule.check(&ctx).unwrap();
599        // Tab between ] and [ does not form a full reference link in Markdown.
600        // The first [URL] is a shortcut ref containing a bare URL, so MD034 warns.
601        // This test verifies consistent behavior and no panic with tab characters.
602        assert_eq!(
603            result.len(),
604            1,
605            "Bare URL inside shortcut ref should be detected: {result:?}"
606        );
607    }
608
609    #[test]
610    fn test_shortcut_ref_followed_by_punctuation() {
611        let rule = MD034NoBareUrls;
612        let content = "[https://example.com], see also other things.";
613        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
614        let result = rule.check(&ctx).unwrap();
615        assert!(
616            result.is_empty(),
617            "[URL] followed by comma should be treated as shortcut ref: {result:?}"
618        );
619    }
620
621    #[test]
622    fn test_url_in_backticks_inside_mdx_component_not_flagged() {
623        // Exact reproduction from issue #572: URL inside inline code within an MDX
624        // component body must not be flagged. The same URL in backticks outside the
625        // component is already handled correctly and serves as a control.
626        let rule = MD034NoBareUrls;
627        let content = "# Test\n\nControl: `https://rumdl.example.com/` is fine here.\n\n<ParamField path=\"--stuff\">\n  This URL `https://rumdl.example.com/` must not be flagged.\n</ParamField>\n";
628        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
629        let result = rule.check(&ctx).unwrap();
630        assert!(
631            result.is_empty(),
632            "URL in backticks inside MDX component must not be flagged: {result:?}"
633        );
634    }
635
636    #[test]
637    fn test_bare_url_inside_mdx_component_still_flagged() {
638        // A bare URL (not in backticks) inside an MDX component body must still be flagged.
639        // This ensures the fix for issue #572 only suppresses properly code-spanned URLs.
640        let rule = MD034NoBareUrls;
641        let content =
642            "# Test\n\n<ParamField path=\"--stuff\">\n  Visit https://rumdl.example.com/ for details.\n</ParamField>\n";
643        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
644        let result = rule.check(&ctx).unwrap();
645        assert_eq!(
646            result.len(),
647            1,
648            "Bare URL in MDX component body must still be flagged: {result:?}"
649        );
650    }
651
652    #[test]
653    fn test_url_in_backticks_inside_nested_mdx_component_not_flagged() {
654        // Nested MDX components must also respect code spans.
655        let rule = MD034NoBareUrls;
656        let content = "<Outer>\n  <Inner>\n    Check `https://example.com/` here.\n  </Inner>\n</Outer>\n";
657        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
658        let result = rule.check(&ctx).unwrap();
659        assert!(
660            result.is_empty(),
661            "URL in backticks inside nested MDX component must not be flagged: {result:?}"
662        );
663    }
664
665    /// URLs inside Pandoc line blocks (`| text`) must not be flagged as bare URLs.
666    #[test]
667    fn test_pandoc_skips_urls_in_line_blocks() {
668        use crate::config::MarkdownFlavor;
669        use crate::lint_context::LintContext;
670        let rule = MD034NoBareUrls;
671        let content = "| See https://example.com\n| For details\n";
672        let ctx = LintContext::new(content, MarkdownFlavor::Pandoc, None);
673        let result = rule.check(&ctx).unwrap();
674        assert!(
675            result.is_empty(),
676            "MD034 should skip URLs in Pandoc line blocks: {result:?}"
677        );
678    }
679
680    /// URLs inside Pandoc YAML metadata blocks must not be flagged.
681    #[test]
682    fn test_pandoc_skips_urls_in_metadata() {
683        use crate::config::MarkdownFlavor;
684        use crate::lint_context::LintContext;
685        let rule = MD034NoBareUrls;
686        let content = "---\nhomepage: https://example.com\n---\n\nBody.\n";
687        let ctx = LintContext::new(content, MarkdownFlavor::Pandoc, None);
688        let result = rule.check(&ctx).unwrap();
689        assert!(
690            result.is_empty(),
691            "MD034 should skip URLs in Pandoc YAML metadata: {result:?}"
692        );
693    }
694
695    /// Standard flavor must still flag bare URLs in lines starting with `|`
696    /// (which are not interpreted as line blocks).
697    #[test]
698    fn test_standard_still_flags_urls_in_pipe_prefixed_lines() {
699        use crate::config::MarkdownFlavor;
700        use crate::lint_context::LintContext;
701        let rule = MD034NoBareUrls;
702        let content = "| See https://example.com\n";
703        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
704        let result = rule.check(&ctx).unwrap();
705        assert!(
706            !result.is_empty(),
707            "MD034 should still flag URLs in pipe-prefixed lines under Standard flavor"
708        );
709    }
710
711    #[test]
712    fn test_url_in_backticks_after_fenced_code_block_inside_mdx_not_flagged() {
713        // A fenced code block inside a JSX component must not misalign the code-span
714        // offset map. The URL in backticks that appears *after* the code block must
715        // still be recognised as being inside a code span.
716        let rule = MD034NoBareUrls;
717        let content = "\
718<Component>
719Some intro text.
720
721```
722example code here
723```
724
725Check `https://example.com/` here.
726</Component>
727";
728        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
729        let result = rule.check(&ctx).unwrap();
730        assert!(
731            result.is_empty(),
732            "URL in backticks after a fenced code block inside MDX must not be flagged: {result:?}"
733        );
734    }
735}
rumdl_lib/rules/md034_no_bare_urls.rs

rumdl_lib/rules/
md034_no_bare_urls.rs