rumdl_lib/rules/
md034_no_bare_urls.rs

1/// Rule MD034: No unformatted URLs
2///
3/// See [docs/md034.md](../../docs/md034.md) for full documentation, configuration, and examples.
4use std::sync::LazyLock;
5
6use regex::Regex;
7
8use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
9use crate::utils::range_utils::{LineIndex, calculate_url_range};
10use crate::utils::regex_cache::{
11    EMAIL_PATTERN, URL_IPV6_REGEX, URL_QUICK_CHECK_REGEX, URL_STANDARD_REGEX, URL_WWW_REGEX, XMPP_URI_REGEX,
12};
13
14use crate::filtered_lines::FilteredLinesExt;
15use crate::lint_context::LintContext;
16
17// MD034-specific pre-compiled regex patterns for markdown constructs
18static CUSTOM_PROTOCOL_REGEX: LazyLock<Regex> = LazyLock::new(|| {
19    Regex::new(r#"(?:grpc|ws|wss|ssh|git|svn|file|data|javascript|vscode|chrome|about|slack|discord|matrix|irc|redis|mongodb|postgresql|mysql|kafka|nats|amqp|mqtt|custom|app|api|service)://"#).unwrap()
20});
21static MARKDOWN_LINK_REGEX: LazyLock<Regex> = LazyLock::new(|| {
22    Regex::new(r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#).unwrap()
23});
24static MARKDOWN_EMPTY_LINK_REGEX: LazyLock<Regex> =
25    LazyLock::new(|| Regex::new(r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(\)"#).unwrap());
26static MARKDOWN_EMPTY_REF_REGEX: LazyLock<Regex> =
27    LazyLock::new(|| Regex::new(r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\[\]"#).unwrap());
28static ANGLE_LINK_REGEX: LazyLock<Regex> = LazyLock::new(|| {
29    Regex::new(
30        r#"<((?:https?|ftps?)://(?:\[[0-9a-fA-F:]+(?:%[a-zA-Z0-9]+)?\]|[^>]+)|xmpp:[^>]+|[^@\s]+@[^@\s]+\.[^@\s>]+)>"#,
31    )
32    .unwrap()
33});
34static BADGE_LINK_LINE_REGEX: LazyLock<Regex> =
35    LazyLock::new(|| Regex::new(r#"^\s*\[!\[[^\]]*\]\([^)]*\)\]\([^)]*\)\s*$"#).unwrap());
36static MARKDOWN_IMAGE_REGEX: LazyLock<Regex> =
37    LazyLock::new(|| Regex::new(r#"!\s*\[([^\]]*)\]\s*\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#).unwrap());
38static REFERENCE_DEF_REGEX: LazyLock<Regex> =
39    LazyLock::new(|| Regex::new(r"^\s*\[[^\]]+\]:\s*(?:<|(?:https?|ftps?)://)").unwrap());
40static MULTILINE_LINK_CONTINUATION_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"^[^\[]*\]\(.*\)"#).unwrap());
41static SHORTCUT_REF_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"\[([^\[\]]+)\]"#).unwrap());
42
43/// Reusable buffers for check_line to reduce allocations
44#[derive(Default)]
45struct LineCheckBuffers {
46    markdown_link_ranges: Vec<(usize, usize)>,
47    image_ranges: Vec<(usize, usize)>,
48    urls_found: Vec<(usize, usize, String)>,
49}
50
51#[derive(Default, Clone)]
52pub struct MD034NoBareUrls;
53
54impl MD034NoBareUrls {
55    #[inline]
56    pub fn should_skip_content(&self, content: &str) -> bool {
57        // Skip if content has no URLs, XMPP URIs, or email addresses
58        // Fast byte scanning for common URL/email/xmpp indicators
59        let bytes = content.as_bytes();
60        let has_colon = bytes.contains(&b':');
61        let has_at = bytes.contains(&b'@');
62        let has_www = content.contains("www.");
63        !has_colon && !has_at && !has_www
64    }
65
66    /// Remove trailing punctuation that is likely sentence punctuation, not part of the URL
67    fn trim_trailing_punctuation<'a>(&self, url: &'a str) -> &'a str {
68        let mut trimmed = url;
69
70        // Check for balanced parentheses - if we have unmatched closing parens, they're likely punctuation
71        let open_parens = url.chars().filter(|&c| c == '(').count();
72        let close_parens = url.chars().filter(|&c| c == ')').count();
73
74        if close_parens > open_parens {
75            // Find the last balanced closing paren position
76            let mut balance = 0;
77            let mut last_balanced_pos = url.len();
78
79            for (byte_idx, c) in url.char_indices() {
80                if c == '(' {
81                    balance += 1;
82                } else if c == ')' {
83                    balance -= 1;
84                    if balance < 0 {
85                        // Found an unmatched closing paren
86                        last_balanced_pos = byte_idx;
87                        break;
88                    }
89                }
90            }
91
92            trimmed = &trimmed[..last_balanced_pos];
93        }
94
95        // Trim specific punctuation only if not followed by more URL-like chars
96        while let Some(last_char) = trimmed.chars().last() {
97            if matches!(last_char, '.' | ',' | ';' | ':' | '!' | '?') {
98                // Check if this looks like it could be part of the URL
99                // For ':' specifically, keep it if followed by digits (port number)
100                if last_char == ':' && trimmed.len() > 1 {
101                    // Don't trim
102                    break;
103                }
104                trimmed = &trimmed[..trimmed.len() - 1];
105            } else {
106                break;
107            }
108        }
109
110        trimmed
111    }
112
113    /// Check if line is inside a reference definition
114    fn is_reference_definition(&self, line: &str) -> bool {
115        REFERENCE_DEF_REGEX.is_match(line)
116    }
117
118    fn check_line(
119        &self,
120        line: &str,
121        ctx: &LintContext,
122        line_number: usize,
123        code_spans: &[crate::lint_context::CodeSpan],
124        buffers: &mut LineCheckBuffers,
125        line_index: &LineIndex,
126    ) -> Vec<LintWarning> {
127        let mut warnings = Vec::new();
128
129        // Skip reference definitions
130        if self.is_reference_definition(line) {
131            return warnings;
132        }
133
134        // Skip lines inside HTML blocks - URLs in HTML attributes should not be linted
135        if ctx.line_info(line_number).is_some_and(|info| info.in_html_block) {
136            return warnings;
137        }
138
139        // Skip lines that are continuations of multiline markdown links
140        // Pattern: text](url) without a leading [
141        if MULTILINE_LINK_CONTINUATION_REGEX.is_match(line) {
142            return warnings;
143        }
144
145        // Quick check - does this line potentially have a URL or email?
146        let has_quick_check = URL_QUICK_CHECK_REGEX.is_match(line);
147        let has_www = line.contains("www.");
148        let has_at = line.contains('@');
149
150        if !has_quick_check && !has_at && !has_www {
151            return warnings;
152        }
153
154        // Clear and reuse buffers instead of allocating new ones
155        buffers.markdown_link_ranges.clear();
156        buffers.image_ranges.clear();
157
158        let has_bracket = line.contains('[');
159        let has_angle = line.contains('<');
160        let has_bang = line.contains('!');
161
162        if has_bracket {
163            for mat in MARKDOWN_LINK_REGEX.find_iter(line) {
164                buffers.markdown_link_ranges.push((mat.start(), mat.end()));
165            }
166
167            // Also include empty link patterns like [text]() and [text][]
168            for mat in MARKDOWN_EMPTY_LINK_REGEX.find_iter(line) {
169                buffers.markdown_link_ranges.push((mat.start(), mat.end()));
170            }
171
172            for mat in MARKDOWN_EMPTY_REF_REGEX.find_iter(line) {
173                buffers.markdown_link_ranges.push((mat.start(), mat.end()));
174            }
175
176            // Also exclude shortcut reference links like [URL]
177            for mat in SHORTCUT_REF_REGEX.find_iter(line) {
178                let end = mat.end();
179                let next_non_ws = line[end..].bytes().find(|b| !b.is_ascii_whitespace());
180                if next_non_ws == Some(b'(') || next_non_ws == Some(b'[') {
181                    continue;
182                }
183                buffers.markdown_link_ranges.push((mat.start(), mat.end()));
184            }
185
186            // Check if this line contains only a badge link (common pattern)
187            if has_bang && BADGE_LINK_LINE_REGEX.is_match(line) {
188                return warnings;
189            }
190        }
191
192        if has_angle {
193            for mat in ANGLE_LINK_REGEX.find_iter(line) {
194                buffers.markdown_link_ranges.push((mat.start(), mat.end()));
195            }
196        }
197
198        // Find all markdown images for exclusion
199        if has_bang && has_bracket {
200            for mat in MARKDOWN_IMAGE_REGEX.find_iter(line) {
201                buffers.image_ranges.push((mat.start(), mat.end()));
202            }
203        }
204
205        // Find bare URLs
206        buffers.urls_found.clear();
207
208        // First, find IPv6 URLs (they need special handling)
209        for mat in URL_IPV6_REGEX.find_iter(line) {
210            let url_str = mat.as_str();
211            buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
212        }
213
214        // Then find regular URLs
215        for mat in URL_STANDARD_REGEX.find_iter(line) {
216            let url_str = mat.as_str();
217
218            // Skip if it's an IPv6 URL (already handled)
219            if url_str.contains("://[") {
220                continue;
221            }
222
223            // Skip malformed IPv6-like URLs
224            // Check for IPv6-like patterns that are malformed
225            if let Some(host_start) = url_str.find("://") {
226                let after_protocol = &url_str[host_start + 3..];
227                // If it looks like IPv6 (has :: or multiple :) but no brackets, skip if followed by ]
228                if after_protocol.contains("::") || after_protocol.chars().filter(|&c| c == ':').count() > 1 {
229                    // Check if the next byte after our match is ] (ASCII, so byte check is safe)
230                    if line.as_bytes().get(mat.end()) == Some(&b']') {
231                        // This is likely a malformed IPv6 URL like "https://::1]:8080"
232                        continue;
233                    }
234                }
235            }
236
237            buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
238        }
239
240        // Find www URLs without protocol (e.g., www.example.com)
241        for mat in URL_WWW_REGEX.find_iter(line) {
242            let url_str = mat.as_str();
243            let start_pos = mat.start();
244            let end_pos = mat.end();
245
246            // Skip if preceded by / or @ (likely part of a full URL)
247            if start_pos > 0 {
248                let prev_char = line.as_bytes().get(start_pos - 1).copied();
249                if prev_char == Some(b'/') || prev_char == Some(b'@') {
250                    continue;
251                }
252            }
253
254            // Skip if inside angle brackets (autolink syntax like <www.example.com>)
255            if start_pos > 0 && end_pos < line.len() {
256                let prev_char = line.as_bytes().get(start_pos - 1).copied();
257                let next_char = line.as_bytes().get(end_pos).copied();
258                if prev_char == Some(b'<') && next_char == Some(b'>') {
259                    continue;
260                }
261            }
262
263            buffers.urls_found.push((start_pos, end_pos, url_str.to_string()));
264        }
265
266        // Find XMPP URIs (GFM extended autolinks: xmpp:user@domain/resource)
267        for mat in XMPP_URI_REGEX.find_iter(line) {
268            let uri_str = mat.as_str();
269            let start_pos = mat.start();
270            let end_pos = mat.end();
271
272            // Skip if inside angle brackets (already properly formatted: <xmpp:user@domain>)
273            if start_pos > 0 && end_pos < line.len() {
274                let prev_char = line.as_bytes().get(start_pos - 1).copied();
275                let next_char = line.as_bytes().get(end_pos).copied();
276                if prev_char == Some(b'<') && next_char == Some(b'>') {
277                    continue;
278                }
279            }
280
281            buffers.urls_found.push((start_pos, end_pos, uri_str.to_string()));
282        }
283
284        // Process found URLs
285        for &(start, _end, ref url_str) in &buffers.urls_found {
286            // Skip custom protocols
287            if CUSTOM_PROTOCOL_REGEX.is_match(url_str) {
288                continue;
289            }
290
291            // Check if this URL is inside a markdown link, angle bracket, or image
292            // We check if the URL starts within a construct, not if it's entirely contained.
293            // This handles cases where URL detection may include trailing characters
294            // that extend past the construct boundary (e.g., parentheses).
295            // Linear scan is correct here because ranges can overlap/nest (e.g., [[1]](url))
296            let is_inside_construct = buffers
297                .markdown_link_ranges
298                .iter()
299                .any(|&(s, e)| start >= s && start < e)
300                || buffers.image_ranges.iter().any(|&(s, e)| start >= s && start < e);
301
302            if is_inside_construct {
303                continue;
304            }
305
306            // Calculate absolute byte position for context-aware checks
307            let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
308            let absolute_pos = line_start_byte + start;
309
310            // Check if URL is inside an HTML tag (handles multiline tags correctly)
311            if ctx.is_in_html_tag(absolute_pos) {
312                continue;
313            }
314
315            // Check if we're inside an HTML comment
316            if ctx.is_in_html_comment(absolute_pos) || ctx.is_in_mdx_comment(absolute_pos) {
317                continue;
318            }
319
320            // Check if we're inside a Hugo/Quarto shortcode
321            if ctx.is_in_shortcode(absolute_pos) {
322                continue;
323            }
324
325            // Skip URLs inside Pandoc line blocks (`| text`) or YAML metadata blocks.
326            // Both constructs treat their content as literal/structured text where bare
327            // URLs are intentional and should not be reformatted.
328            if ctx.flavor.is_pandoc_compatible()
329                && (ctx.is_in_line_block(absolute_pos) || ctx.is_in_pandoc_metadata(absolute_pos))
330            {
331                continue;
332            }
333
334            // Clean up the URL by removing trailing punctuation
335            let trimmed_url = self.trim_trailing_punctuation(url_str);
336
337            // Only report if we have a valid URL after trimming
338            if !trimmed_url.is_empty() && trimmed_url != "//" {
339                let trimmed_len = trimmed_url.len();
340                let (start_line, start_col, end_line, end_col) =
341                    calculate_url_range(line_number, line, start, trimmed_len);
342
343                // For www URLs without protocol, add https:// prefix in the fix
344                let replacement = if trimmed_url.starts_with("www.") {
345                    format!("<https://{trimmed_url}>")
346                } else {
347                    format!("<{trimmed_url}>")
348                };
349
350                warnings.push(LintWarning {
351                    rule_name: Some("MD034".to_string()),
352                    line: start_line,
353                    column: start_col,
354                    end_line,
355                    end_column: end_col,
356                    message: format!("URL without angle brackets or link formatting: '{trimmed_url}'"),
357                    severity: Severity::Warning,
358                    fix: Some(Fix::new(
359                        {
360                            let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
361                            (line_start_byte + start)..(line_start_byte + start + trimmed_len)
362                        },
363                        replacement,
364                    )),
365                });
366            }
367        }
368
369        // Check for bare email addresses
370        for cap in EMAIL_PATTERN.captures_iter(line) {
371            if let Some(mat) = cap.get(0) {
372                let email = mat.as_str();
373                let start = mat.start();
374                let end = mat.end();
375
376                // Skip if email is part of an XMPP URI (xmpp:user@domain)
377                // Check character boundary to avoid panics with multi-byte UTF-8
378                if start >= 5 && line.is_char_boundary(start - 5) && &line[start - 5..start] == "xmpp:" {
379                    continue;
380                }
381
382                // Check if email is inside angle brackets or markdown link
383                let mut is_inside_construct = false;
384                for &(link_start, link_end) in &buffers.markdown_link_ranges {
385                    if start >= link_start && end <= link_end {
386                        is_inside_construct = true;
387                        break;
388                    }
389                }
390
391                if !is_inside_construct {
392                    // Calculate absolute byte position for context-aware checks
393                    let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
394                    let absolute_pos = line_start_byte + start;
395
396                    // Check if email is inside an HTML tag (handles multiline tags)
397                    if ctx.is_in_html_tag(absolute_pos) {
398                        continue;
399                    }
400
401                    // Skip emails inside Pandoc line blocks or YAML metadata blocks.
402                    if ctx.flavor.is_pandoc_compatible()
403                        && (ctx.is_in_line_block(absolute_pos) || ctx.is_in_pandoc_metadata(absolute_pos))
404                    {
405                        continue;
406                    }
407
408                    // Check if email is inside a code span (byte offsets handle multi-line spans)
409                    let is_in_code_span = code_spans
410                        .iter()
411                        .any(|span| absolute_pos >= span.byte_offset && absolute_pos < span.byte_end);
412
413                    if !is_in_code_span {
414                        let email_len = end - start;
415                        let (start_line, start_col, end_line, end_col) =
416                            calculate_url_range(line_number, line, start, email_len);
417
418                        warnings.push(LintWarning {
419                            rule_name: Some("MD034".to_string()),
420                            line: start_line,
421                            column: start_col,
422                            end_line,
423                            end_column: end_col,
424                            message: format!("Email address without angle brackets or link formatting: '{email}'"),
425                            severity: Severity::Warning,
426                            fix: Some(Fix::new(
427                                (line_start_byte + start)..(line_start_byte + end),
428                                format!("<{email}>"),
429                            )),
430                        });
431                    }
432                }
433            }
434        }
435
436        warnings
437    }
438}
439
440impl Rule for MD034NoBareUrls {
441    #[inline]
442    fn name(&self) -> &'static str {
443        "MD034"
444    }
445
446    fn as_any(&self) -> &dyn std::any::Any {
447        self
448    }
449
450    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
451    where
452        Self: Sized,
453    {
454        Box::new(MD034NoBareUrls)
455    }
456
457    #[inline]
458    fn category(&self) -> RuleCategory {
459        RuleCategory::Link
460    }
461
462    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
463        !ctx.likely_has_links_or_images() && self.should_skip_content(ctx.content)
464    }
465
466    #[inline]
467    fn description(&self) -> &'static str {
468        "No bare URLs - wrap URLs in angle brackets"
469    }
470
471    fn check(&self, ctx: &LintContext) -> LintResult {
472        let mut warnings = Vec::new();
473        let content = ctx.content;
474
475        // Quick skip for content without URLs
476        if self.should_skip_content(content) {
477            return Ok(warnings);
478        }
479
480        // Create LineIndex for correct byte position calculations across all line ending types
481        let line_index = &ctx.line_index;
482
483        // Get code spans for exclusion
484        let code_spans = ctx.code_spans();
485
486        // Allocate reusable buffers once instead of per-line to reduce allocations
487        let mut buffers = LineCheckBuffers::default();
488
489        // Iterate over content lines, automatically skipping front matter, code blocks,
490        // and Obsidian comments (when in Obsidian flavor)
491        // This uses the filtered iterator API which centralizes the skip logic
492        for line in ctx
493            .filtered_lines()
494            .skip_front_matter()
495            .skip_code_blocks()
496            .skip_jsx_expressions()
497            .skip_mdx_comments()
498            .skip_obsidian_comments()
499        {
500            // Skip MyST colon-fence directive openers (`:::{name} <arg>`). The text
501            // after the directive name is an opaque argument (a URL, path, or label),
502            // not markdown prose, so a bare URL there must not be wrapped in angle
503            // brackets. Directive body lines are not openers, so they fall through to
504            // `check_line` and are linted as usual.
505            if ctx.is_myst_colon_directive_opener_line(line.line_num) {
506                continue;
507            }
508
509            let mut line_warnings =
510                self.check_line(line.content, ctx, line.line_num, &code_spans, &mut buffers, line_index);
511
512            // Filter out warnings that are inside code spans (handles multi-line spans via byte offsets)
513            line_warnings.retain(|warning| {
514                !code_spans.iter().any(|span| {
515                    if let Some(fix) = &warning.fix {
516                        // Byte-offset check handles both single-line and multi-line code spans
517                        fix.range.start >= span.byte_offset && fix.range.start < span.byte_end
518                    } else {
519                        span.line == warning.line
520                            && span.end_line == warning.line
521                            && warning.column > 0
522                            && (warning.column - 1) >= span.start_col
523                            && (warning.column - 1) < span.end_col
524                    }
525                })
526            });
527
528            // Filter out warnings where the URL is inside a parsed link
529            // This handles cases like [text]( https://url ) where the URL has leading whitespace
530            // pulldown-cmark correctly parses these as valid links even though our regex misses them
531            line_warnings.retain(|warning| {
532                if let Some(fix) = &warning.fix {
533                    // Check if the fix range falls inside any parsed link's byte range
534                    !ctx.links
535                        .iter()
536                        .any(|link| fix.range.start >= link.byte_offset && fix.range.end <= link.byte_end)
537                } else {
538                    true
539                }
540            });
541
542            // Filter out warnings where the URL is inside an Obsidian comment (%%...%%)
543            // This handles inline comments like: text %%https://hidden.com%% text
544            line_warnings.retain(|warning| !ctx.is_position_in_obsidian_comment(warning.line, warning.column));
545
546            warnings.extend(line_warnings);
547        }
548
549        Ok(warnings)
550    }
551
552    fn fix(&self, ctx: &LintContext) -> Result<String, LintError> {
553        let mut content = ctx.content.to_string();
554        let warnings = self.check(ctx)?;
555        let mut warnings =
556            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
557
558        // Sort warnings by position to ensure consistent fix application
559        warnings.sort_by_key(|w| w.fix.as_ref().map_or(0, |f| f.range.start));
560
561        // Apply fixes in reverse order to maintain positions
562        for warning in warnings.iter().rev() {
563            if let Some(fix) = &warning.fix {
564                let start = fix.range.start;
565                let end = fix.range.end;
566                content.replace_range(start..end, &fix.replacement);
567            }
568        }
569
570        Ok(content)
571    }
572}
573
574#[cfg(test)]
575mod tests {
576    use super::*;
577
578    #[test]
579    fn test_shortcut_ref_at_end_of_line_no_trailing_chars() {
580        let rule = MD034NoBareUrls;
581        let content = "See [https://example.com]";
582        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
583        let result = rule.check(&ctx).unwrap();
584        assert!(
585            result.is_empty(),
586            "[URL] at end of line should be treated as shortcut ref: {result:?}"
587        );
588    }
589
590    #[test]
591    fn test_shortcut_ref_multiple_spaces_before_paren() {
592        let rule = MD034NoBareUrls;
593        let content = "[text]  (https://example.com)";
594        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
595        let result = rule.check(&ctx).unwrap();
596        // [text]  (url) — the spaces between ] and ( mean this should be treated
597        // as shortcut ref then bare parens, NOT a markdown link. URL may still be bare.
598        // This test verifies consistent behavior with the FancyRegex that had (?!\s*[\[(])
599        let _ = result; // Just verify no panic; the exact warning count depends on other rules
600    }
601
602    #[test]
603    fn test_shortcut_ref_tab_before_bracket() {
604        let rule = MD034NoBareUrls;
605        let content = "[https://example.com]\t[other]";
606        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
607        let result = rule.check(&ctx).unwrap();
608        // Tab between ] and [ does not form a full reference link in Markdown.
609        // The first [URL] is a shortcut ref containing a bare URL, so MD034 warns.
610        // This test verifies consistent behavior and no panic with tab characters.
611        assert_eq!(
612            result.len(),
613            1,
614            "Bare URL inside shortcut ref should be detected: {result:?}"
615        );
616    }
617
618    #[test]
619    fn test_shortcut_ref_followed_by_punctuation() {
620        let rule = MD034NoBareUrls;
621        let content = "[https://example.com], see also other things.";
622        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
623        let result = rule.check(&ctx).unwrap();
624        assert!(
625            result.is_empty(),
626            "[URL] followed by comma should be treated as shortcut ref: {result:?}"
627        );
628    }
629
630    #[test]
631    fn test_url_in_backticks_inside_mdx_component_not_flagged() {
632        // Exact reproduction from issue #572: URL inside inline code within an MDX
633        // component body must not be flagged. The same URL in backticks outside the
634        // component is already handled correctly and serves as a control.
635        let rule = MD034NoBareUrls;
636        let content = "# Test\n\nControl: `https://rumdl.example.com/` is fine here.\n\n<ParamField path=\"--stuff\">\n  This URL `https://rumdl.example.com/` must not be flagged.\n</ParamField>\n";
637        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
638        let result = rule.check(&ctx).unwrap();
639        assert!(
640            result.is_empty(),
641            "URL in backticks inside MDX component must not be flagged: {result:?}"
642        );
643    }
644
645    #[test]
646    fn test_bare_url_inside_mdx_component_still_flagged() {
647        // A bare URL (not in backticks) inside an MDX component body must still be flagged.
648        // This ensures the fix for issue #572 only suppresses properly code-spanned URLs.
649        let rule = MD034NoBareUrls;
650        let content =
651            "# Test\n\n<ParamField path=\"--stuff\">\n  Visit https://rumdl.example.com/ for details.\n</ParamField>\n";
652        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
653        let result = rule.check(&ctx).unwrap();
654        assert_eq!(
655            result.len(),
656            1,
657            "Bare URL in MDX component body must still be flagged: {result:?}"
658        );
659    }
660
661    #[test]
662    fn test_url_in_backticks_inside_nested_mdx_component_not_flagged() {
663        // Nested MDX components must also respect code spans.
664        let rule = MD034NoBareUrls;
665        let content = "<Outer>\n  <Inner>\n    Check `https://example.com/` here.\n  </Inner>\n</Outer>\n";
666        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
667        let result = rule.check(&ctx).unwrap();
668        assert!(
669            result.is_empty(),
670            "URL in backticks inside nested MDX component must not be flagged: {result:?}"
671        );
672    }
673
674    /// URLs inside Pandoc line blocks (`| text`) must not be flagged as bare URLs.
675    #[test]
676    fn test_pandoc_skips_urls_in_line_blocks() {
677        use crate::config::MarkdownFlavor;
678        use crate::lint_context::LintContext;
679        let rule = MD034NoBareUrls;
680        let content = "| See https://example.com\n| For details\n";
681        let ctx = LintContext::new(content, MarkdownFlavor::Pandoc, None);
682        let result = rule.check(&ctx).unwrap();
683        assert!(
684            result.is_empty(),
685            "MD034 should skip URLs in Pandoc line blocks: {result:?}"
686        );
687    }
688
689    /// URLs inside Pandoc YAML metadata blocks must not be flagged.
690    #[test]
691    fn test_pandoc_skips_urls_in_metadata() {
692        use crate::config::MarkdownFlavor;
693        use crate::lint_context::LintContext;
694        let rule = MD034NoBareUrls;
695        let content = "---\nhomepage: https://example.com\n---\n\nBody.\n";
696        let ctx = LintContext::new(content, MarkdownFlavor::Pandoc, None);
697        let result = rule.check(&ctx).unwrap();
698        assert!(
699            result.is_empty(),
700            "MD034 should skip URLs in Pandoc YAML metadata: {result:?}"
701        );
702    }
703
704    /// Standard flavor must still flag bare URLs in lines starting with `|`
705    /// (which are not interpreted as line blocks).
706    #[test]
707    fn test_standard_still_flags_urls_in_pipe_prefixed_lines() {
708        use crate::config::MarkdownFlavor;
709        use crate::lint_context::LintContext;
710        let rule = MD034NoBareUrls;
711        let content = "| See https://example.com\n";
712        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
713        let result = rule.check(&ctx).unwrap();
714        assert!(
715            !result.is_empty(),
716            "MD034 should still flag URLs in pipe-prefixed lines under Standard flavor"
717        );
718    }
719
720    #[test]
721    fn test_url_in_backticks_after_fenced_code_block_inside_mdx_not_flagged() {
722        // A fenced code block inside a JSX component must not misalign the code-span
723        // offset map. The URL in backticks that appears *after* the code block must
724        // still be recognised as being inside a code span.
725        let rule = MD034NoBareUrls;
726        let content = "\
727<Component>
728Some intro text.
729
730```
731example code here
732```
733
734Check `https://example.com/` here.
735</Component>
736";
737        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
738        let result = rule.check(&ctx).unwrap();
739        assert!(
740            result.is_empty(),
741            "URL in backticks after a fenced code block inside MDX must not be flagged: {result:?}"
742        );
743    }
744
745    /// Issue #642: a URL given as the argument of a MyST colon-fence directive
746    /// (`:::{name} <url>`) is the directive's opaque argument, not markdown prose,
747    /// and must not be wrapped in angle brackets.
748    #[test]
749    fn test_myst_colon_directive_argument_url_not_flagged() {
750        use crate::config::MarkdownFlavor;
751        use crate::lint_context::LintContext;
752        let rule = MD034NoBareUrls;
753        let content = "\
754:::{anywidget} https://cdn.jsdelivr.net/npm/repo-review-webapp@1.1.3/dist/repo-review-anywidget.mjs
755{
756  \"deps\": [\"repo-review~=1.1.0\"]
757}
758:::
759";
760        let ctx = LintContext::new(content, MarkdownFlavor::MyST, None);
761        let result = rule.check(&ctx).unwrap();
762        assert!(
763            result.is_empty(),
764            "URL argument on a MyST colon directive opener must not be flagged: {result:?}"
765        );
766    }
767
768    /// A nested MyST colon directive opener also carries an opaque argument.
769    #[test]
770    fn test_myst_nested_colon_directive_argument_url_not_flagged() {
771        use crate::config::MarkdownFlavor;
772        use crate::lint_context::LintContext;
773        let rule = MD034NoBareUrls;
774        let content = "\
775::::{grid}
776:::{card} https://example.com/card-target
777Some caption.
778:::
779::::
780";
781        let ctx = LintContext::new(content, MarkdownFlavor::MyST, None);
782        let result = rule.check(&ctx).unwrap();
783        assert!(
784            result.is_empty(),
785            "URL argument on a nested MyST colon directive opener must not be flagged: {result:?}"
786        );
787    }
788
789    /// A bare URL in the *body* of a content directive (e.g. `{note}`) is genuine
790    /// prose and must still be flagged. The opener exemption must not leak to the body.
791    #[test]
792    fn test_myst_directive_body_url_still_flagged() {
793        use crate::config::MarkdownFlavor;
794        use crate::lint_context::LintContext;
795        let rule = MD034NoBareUrls;
796        let content = "\
797:::{note}
798See https://example.com/docs for more details.
799:::
800";
801        let ctx = LintContext::new(content, MarkdownFlavor::MyST, None);
802        let result = rule.check(&ctx).unwrap();
803        assert_eq!(
804            result.len(),
805            1,
806            "Bare URL in a MyST directive body must still be flagged: {result:?}"
807        );
808    }
809
810    /// An unclosed colon directive (no terminating `:::`) still has its opener
811    /// argument treated as opaque: the URL must not be flagged.
812    #[test]
813    fn test_myst_unclosed_colon_directive_argument_url_not_flagged() {
814        use crate::config::MarkdownFlavor;
815        use crate::lint_context::LintContext;
816        let rule = MD034NoBareUrls;
817        let content = "\
818:::{anywidget} https://example.com/widget.mjs
819Some trailing content with no closing fence.
820";
821        let ctx = LintContext::new(content, MarkdownFlavor::MyST, None);
822        let result = rule.check(&ctx).unwrap();
823        assert!(
824            result.is_empty(),
825            "URL argument on an unclosed MyST colon directive opener must not be flagged: {result:?}"
826        );
827    }
828
829    /// The colon-directive exemption is MyST-specific: under the Standard flavor a
830    /// `:::{...}` line is ordinary text and a bare URL on it must still be flagged.
831    #[test]
832    fn test_colon_directive_url_flagged_in_standard_flavor() {
833        use crate::config::MarkdownFlavor;
834        use crate::lint_context::LintContext;
835        let rule = MD034NoBareUrls;
836        let content = ":::{anywidget} https://example.com/widget.mjs\n";
837        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
838        let result = rule.check(&ctx).unwrap();
839        assert_eq!(
840            result.len(),
841            1,
842            "Under Standard flavor a bare URL on a `:::` line must still be flagged: {result:?}"
843        );
844    }
845}
rumdl_lib/rules/md034_no_bare_urls.rs

rumdl_lib/rules/
md034_no_bare_urls.rs