use std::sync::LazyLock;
use regex::Regex;
use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
use crate::utils::range_utils::{LineIndex, calculate_url_range};
use crate::utils::regex_cache::{
EMAIL_PATTERN, URL_IPV6_REGEX, URL_QUICK_CHECK_REGEX, URL_STANDARD_REGEX, URL_WWW_REGEX, XMPP_URI_REGEX,
};
use crate::filtered_lines::FilteredLinesExt;
use crate::lint_context::LintContext;
static CUSTOM_PROTOCOL_REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"(?:grpc|ws|wss|ssh|git|svn|file|data|javascript|vscode|chrome|about|slack|discord|matrix|irc|redis|mongodb|postgresql|mysql|kafka|nats|amqp|mqtt|custom|app|api|service)://"#).unwrap()
});
static MARKDOWN_LINK_REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#).unwrap()
});
static MARKDOWN_EMPTY_LINK_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\(\)"#).unwrap());
static MARKDOWN_EMPTY_REF_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"\[(?:[^\[\]]|\[[^\]]*\])*\]\[\]"#).unwrap());
static ANGLE_LINK_REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r#"<((?:https?|ftps?)://(?:\[[0-9a-fA-F:]+(?:%[a-zA-Z0-9]+)?\]|[^>]+)|xmpp:[^>]+|[^@\s]+@[^@\s]+\.[^@\s>]+)>"#,
)
.unwrap()
});
static BADGE_LINK_LINE_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"^\s*\[!\[[^\]]*\]\([^)]*\)\]\([^)]*\)\s*$"#).unwrap());
static MARKDOWN_IMAGE_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"!\s*\[([^\]]*)\]\s*\(([^)\s]+)(?:\s+(?:\"[^\"]*\"|\'[^\']*\'))?\)"#).unwrap());
static REFERENCE_DEF_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^\s*\[[^\]]+\]:\s*(?:<|(?:https?|ftps?)://)").unwrap());
static MULTILINE_LINK_CONTINUATION_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"^[^\[]*\]\(.*\)"#).unwrap());
static SHORTCUT_REF_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"\[([^\[\]]+)\]"#).unwrap());
#[derive(Default)]
struct LineCheckBuffers {
markdown_link_ranges: Vec<(usize, usize)>,
image_ranges: Vec<(usize, usize)>,
urls_found: Vec<(usize, usize, String)>,
}
#[derive(Default, Clone)]
pub struct MD034NoBareUrls;
impl MD034NoBareUrls {
#[inline]
pub fn should_skip_content(&self, content: &str) -> bool {
let bytes = content.as_bytes();
let has_colon = bytes.contains(&b':');
let has_at = bytes.contains(&b'@');
let has_www = content.contains("www.");
!has_colon && !has_at && !has_www
}
fn trim_trailing_punctuation<'a>(&self, url: &'a str) -> &'a str {
let mut trimmed = url;
let open_parens = url.chars().filter(|&c| c == '(').count();
let close_parens = url.chars().filter(|&c| c == ')').count();
if close_parens > open_parens {
let mut balance = 0;
let mut last_balanced_pos = url.len();
for (byte_idx, c) in url.char_indices() {
if c == '(' {
balance += 1;
} else if c == ')' {
balance -= 1;
if balance < 0 {
last_balanced_pos = byte_idx;
break;
}
}
}
trimmed = &trimmed[..last_balanced_pos];
}
while let Some(last_char) = trimmed.chars().last() {
if matches!(last_char, '.' | ',' | ';' | ':' | '!' | '?') {
if last_char == ':' && trimmed.len() > 1 {
break;
}
trimmed = &trimmed[..trimmed.len() - 1];
} else {
break;
}
}
trimmed
}
fn is_reference_definition(&self, line: &str) -> bool {
REFERENCE_DEF_REGEX.is_match(line)
}
fn check_line(
&self,
line: &str,
ctx: &LintContext,
line_number: usize,
code_spans: &[crate::lint_context::CodeSpan],
buffers: &mut LineCheckBuffers,
line_index: &LineIndex,
) -> Vec<LintWarning> {
let mut warnings = Vec::new();
if self.is_reference_definition(line) {
return warnings;
}
if ctx.line_info(line_number).is_some_and(|info| info.in_html_block) {
return warnings;
}
if MULTILINE_LINK_CONTINUATION_REGEX.is_match(line) {
return warnings;
}
let has_quick_check = URL_QUICK_CHECK_REGEX.is_match(line);
let has_www = line.contains("www.");
let has_at = line.contains('@');
if !has_quick_check && !has_at && !has_www {
return warnings;
}
buffers.markdown_link_ranges.clear();
buffers.image_ranges.clear();
let has_bracket = line.contains('[');
let has_angle = line.contains('<');
let has_bang = line.contains('!');
if has_bracket {
for mat in MARKDOWN_LINK_REGEX.find_iter(line) {
buffers.markdown_link_ranges.push((mat.start(), mat.end()));
}
for mat in MARKDOWN_EMPTY_LINK_REGEX.find_iter(line) {
buffers.markdown_link_ranges.push((mat.start(), mat.end()));
}
for mat in MARKDOWN_EMPTY_REF_REGEX.find_iter(line) {
buffers.markdown_link_ranges.push((mat.start(), mat.end()));
}
for mat in SHORTCUT_REF_REGEX.find_iter(line) {
let end = mat.end();
let next_non_ws = line[end..].bytes().find(|b| !b.is_ascii_whitespace());
if next_non_ws == Some(b'(') || next_non_ws == Some(b'[') {
continue;
}
buffers.markdown_link_ranges.push((mat.start(), mat.end()));
}
if has_bang && BADGE_LINK_LINE_REGEX.is_match(line) {
return warnings;
}
}
if has_angle {
for mat in ANGLE_LINK_REGEX.find_iter(line) {
buffers.markdown_link_ranges.push((mat.start(), mat.end()));
}
}
if has_bang && has_bracket {
for mat in MARKDOWN_IMAGE_REGEX.find_iter(line) {
buffers.image_ranges.push((mat.start(), mat.end()));
}
}
buffers.urls_found.clear();
for mat in URL_IPV6_REGEX.find_iter(line) {
let url_str = mat.as_str();
buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
}
for mat in URL_STANDARD_REGEX.find_iter(line) {
let url_str = mat.as_str();
if url_str.contains("://[") {
continue;
}
if let Some(host_start) = url_str.find("://") {
let after_protocol = &url_str[host_start + 3..];
if after_protocol.contains("::") || after_protocol.chars().filter(|&c| c == ':').count() > 1 {
if line.as_bytes().get(mat.end()) == Some(&b']') {
continue;
}
}
}
buffers.urls_found.push((mat.start(), mat.end(), url_str.to_string()));
}
for mat in URL_WWW_REGEX.find_iter(line) {
let url_str = mat.as_str();
let start_pos = mat.start();
let end_pos = mat.end();
if start_pos > 0 {
let prev_char = line.as_bytes().get(start_pos - 1).copied();
if prev_char == Some(b'/') || prev_char == Some(b'@') {
continue;
}
}
if start_pos > 0 && end_pos < line.len() {
let prev_char = line.as_bytes().get(start_pos - 1).copied();
let next_char = line.as_bytes().get(end_pos).copied();
if prev_char == Some(b'<') && next_char == Some(b'>') {
continue;
}
}
buffers.urls_found.push((start_pos, end_pos, url_str.to_string()));
}
for mat in XMPP_URI_REGEX.find_iter(line) {
let uri_str = mat.as_str();
let start_pos = mat.start();
let end_pos = mat.end();
if start_pos > 0 && end_pos < line.len() {
let prev_char = line.as_bytes().get(start_pos - 1).copied();
let next_char = line.as_bytes().get(end_pos).copied();
if prev_char == Some(b'<') && next_char == Some(b'>') {
continue;
}
}
buffers.urls_found.push((start_pos, end_pos, uri_str.to_string()));
}
for &(start, _end, ref url_str) in &buffers.urls_found {
if CUSTOM_PROTOCOL_REGEX.is_match(url_str) {
continue;
}
let is_inside_construct = buffers
.markdown_link_ranges
.iter()
.any(|&(s, e)| start >= s && start < e)
|| buffers.image_ranges.iter().any(|&(s, e)| start >= s && start < e);
if is_inside_construct {
continue;
}
let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
let absolute_pos = line_start_byte + start;
if ctx.is_in_html_tag(absolute_pos) {
continue;
}
if ctx.is_in_html_comment(absolute_pos) || ctx.is_in_mdx_comment(absolute_pos) {
continue;
}
if ctx.is_in_shortcode(absolute_pos) {
continue;
}
let trimmed_url = self.trim_trailing_punctuation(url_str);
if !trimmed_url.is_empty() && trimmed_url != "//" {
let trimmed_len = trimmed_url.len();
let (start_line, start_col, end_line, end_col) =
calculate_url_range(line_number, line, start, trimmed_len);
let replacement = if trimmed_url.starts_with("www.") {
format!("<https://{trimmed_url}>")
} else {
format!("<{trimmed_url}>")
};
warnings.push(LintWarning {
rule_name: Some("MD034".to_string()),
line: start_line,
column: start_col,
end_line,
end_column: end_col,
message: format!("URL without angle brackets or link formatting: '{trimmed_url}'"),
severity: Severity::Warning,
fix: Some(Fix {
range: {
let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
(line_start_byte + start)..(line_start_byte + start + trimmed_len)
},
replacement,
}),
});
}
}
for cap in EMAIL_PATTERN.captures_iter(line) {
if let Some(mat) = cap.get(0) {
let email = mat.as_str();
let start = mat.start();
let end = mat.end();
if start >= 5 && line.is_char_boundary(start - 5) && &line[start - 5..start] == "xmpp:" {
continue;
}
let mut is_inside_construct = false;
for &(link_start, link_end) in &buffers.markdown_link_ranges {
if start >= link_start && end <= link_end {
is_inside_construct = true;
break;
}
}
if !is_inside_construct {
let line_start_byte = line_index.get_line_start_byte(line_number).unwrap_or(0);
let absolute_pos = line_start_byte + start;
if ctx.is_in_html_tag(absolute_pos) {
continue;
}
let is_in_code_span = code_spans
.iter()
.any(|span| absolute_pos >= span.byte_offset && absolute_pos < span.byte_end);
if !is_in_code_span {
let email_len = end - start;
let (start_line, start_col, end_line, end_col) =
calculate_url_range(line_number, line, start, email_len);
warnings.push(LintWarning {
rule_name: Some("MD034".to_string()),
line: start_line,
column: start_col,
end_line,
end_column: end_col,
message: format!("Email address without angle brackets or link formatting: '{email}'"),
severity: Severity::Warning,
fix: Some(Fix {
range: (line_start_byte + start)..(line_start_byte + end),
replacement: format!("<{email}>"),
}),
});
}
}
}
}
warnings
}
}
impl Rule for MD034NoBareUrls {
#[inline]
fn name(&self) -> &'static str {
"MD034"
}
fn as_any(&self) -> &dyn std::any::Any {
self
}
fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
where
Self: Sized,
{
Box::new(MD034NoBareUrls)
}
#[inline]
fn category(&self) -> RuleCategory {
RuleCategory::Link
}
fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
!ctx.likely_has_links_or_images() && self.should_skip_content(ctx.content)
}
#[inline]
fn description(&self) -> &'static str {
"No bare URLs - wrap URLs in angle brackets"
}
fn check(&self, ctx: &LintContext) -> LintResult {
let mut warnings = Vec::new();
let content = ctx.content;
if self.should_skip_content(content) {
return Ok(warnings);
}
let line_index = &ctx.line_index;
let code_spans = ctx.code_spans();
let mut buffers = LineCheckBuffers::default();
for line in ctx
.filtered_lines()
.skip_front_matter()
.skip_code_blocks()
.skip_jsx_expressions()
.skip_mdx_comments()
.skip_obsidian_comments()
{
let mut line_warnings =
self.check_line(line.content, ctx, line.line_num, &code_spans, &mut buffers, line_index);
line_warnings.retain(|warning| {
!code_spans.iter().any(|span| {
if let Some(fix) = &warning.fix {
fix.range.start >= span.byte_offset && fix.range.start < span.byte_end
} else {
span.line == warning.line
&& span.end_line == warning.line
&& warning.column > 0
&& (warning.column - 1) >= span.start_col
&& (warning.column - 1) < span.end_col
}
})
});
line_warnings.retain(|warning| {
if let Some(fix) = &warning.fix {
!ctx.links
.iter()
.any(|link| fix.range.start >= link.byte_offset && fix.range.end <= link.byte_end)
} else {
true
}
});
line_warnings.retain(|warning| !ctx.is_position_in_obsidian_comment(warning.line, warning.column));
warnings.extend(line_warnings);
}
Ok(warnings)
}
fn fix(&self, ctx: &LintContext) -> Result<String, LintError> {
let mut content = ctx.content.to_string();
let warnings = self.check(ctx)?;
let mut warnings =
crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
warnings.sort_by_key(|w| w.fix.as_ref().map_or(0, |f| f.range.start));
for warning in warnings.iter().rev() {
if let Some(fix) = &warning.fix {
let start = fix.range.start;
let end = fix.range.end;
content.replace_range(start..end, &fix.replacement);
}
}
Ok(content)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_shortcut_ref_at_end_of_line_no_trailing_chars() {
let rule = MD034NoBareUrls;
let content = "See [https://example.com]";
let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
let result = rule.check(&ctx).unwrap();
assert!(
result.is_empty(),
"[URL] at end of line should be treated as shortcut ref: {result:?}"
);
}
#[test]
fn test_shortcut_ref_multiple_spaces_before_paren() {
let rule = MD034NoBareUrls;
let content = "[text] (https://example.com)";
let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
let result = rule.check(&ctx).unwrap();
let _ = result; }
#[test]
fn test_shortcut_ref_tab_before_bracket() {
let rule = MD034NoBareUrls;
let content = "[https://example.com]\t[other]";
let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
let result = rule.check(&ctx).unwrap();
assert_eq!(
result.len(),
1,
"Bare URL inside shortcut ref should be detected: {result:?}"
);
}
#[test]
fn test_shortcut_ref_followed_by_punctuation() {
let rule = MD034NoBareUrls;
let content = "[https://example.com], see also other things.";
let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
let result = rule.check(&ctx).unwrap();
assert!(
result.is_empty(),
"[URL] followed by comma should be treated as shortcut ref: {result:?}"
);
}
#[test]
fn test_url_in_backticks_inside_mdx_component_not_flagged() {
let rule = MD034NoBareUrls;
let content = "# Test\n\nControl: `https://rumdl.example.com/` is fine here.\n\n<ParamField path=\"--stuff\">\n This URL `https://rumdl.example.com/` must not be flagged.\n</ParamField>\n";
let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
let result = rule.check(&ctx).unwrap();
assert!(
result.is_empty(),
"URL in backticks inside MDX component must not be flagged: {result:?}"
);
}
#[test]
fn test_bare_url_inside_mdx_component_still_flagged() {
let rule = MD034NoBareUrls;
let content =
"# Test\n\n<ParamField path=\"--stuff\">\n Visit https://rumdl.example.com/ for details.\n</ParamField>\n";
let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
let result = rule.check(&ctx).unwrap();
assert_eq!(
result.len(),
1,
"Bare URL in MDX component body must still be flagged: {result:?}"
);
}
#[test]
fn test_url_in_backticks_inside_nested_mdx_component_not_flagged() {
let rule = MD034NoBareUrls;
let content = "<Outer>\n <Inner>\n Check `https://example.com/` here.\n </Inner>\n</Outer>\n";
let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
let result = rule.check(&ctx).unwrap();
assert!(
result.is_empty(),
"URL in backticks inside nested MDX component must not be flagged: {result:?}"
);
}
#[test]
fn test_url_in_backticks_after_fenced_code_block_inside_mdx_not_flagged() {
let rule = MD034NoBareUrls;
let content = "\
<Component>
Some intro text.
```
example code here
```
Check `https://example.com/` here.
</Component>
";
let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
let result = rule.check(&ctx).unwrap();
assert!(
result.is_empty(),
"URL in backticks after a fenced code block inside MDX must not be flagged: {result:?}"
);
}
}