use regex::Regex;
use std::sync::LazyLock;
static URL_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?:https?://[^\s\)\]>]+)|(?:mailto:[^\s\)\]>]+)|(?:[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})")
.expect("URL regex should compile")
});
static MARKDOWN_LINK_URL: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"\]\(([^)]+)\)").expect("Markdown link regex should compile")
});
const PLACEHOLDER_PREFIX: &str = "\x00URL_PLACEHOLDER_";
const PLACEHOLDER_SUFFIX: &str = "\x00";
#[derive(Debug, Clone)]
pub struct ProtectedUrl {
pub url: String,
pub placeholder: String,
}
#[derive(Debug, Clone, Default)]
pub struct UrlProtector;
impl UrlProtector {
pub fn new() -> Self {
Self
}
pub fn mask_urls(&self, text: &str) -> (String, Vec<ProtectedUrl>) {
let mut result = text.to_string();
let mut protected_urls = Vec::new();
let mut placeholder_id = 0u32;
let markdown_matches: Vec<_> = MARKDOWN_LINK_URL
.find_iter(&result)
.map(|m| (m.start(), m.end(), m.as_str().to_string()))
.collect();
for (start, end, matched) in markdown_matches.into_iter().rev() {
if let Some(url) = matched.strip_prefix("](").and_then(|s| s.strip_suffix(')')) {
let placeholder = format!(
"{}{}{}",
PLACEHOLDER_PREFIX, placeholder_id, PLACEHOLDER_SUFFIX
);
protected_urls.push(ProtectedUrl {
url: url.to_string(),
placeholder: placeholder.clone(),
});
let new_content = format!("]({})", placeholder);
result.replace_range(start..end, &new_content);
placeholder_id += 1;
}
}
let mut url_matches: Vec<(usize, usize, String)> = Vec::new();
for mat in URL_PATTERN.find_iter(&result) {
let url_str = mat.as_str();
if !url_str.starts_with(PLACEHOLDER_PREFIX) {
url_matches.push((mat.start(), mat.end(), url_str.to_string()));
}
}
for (start, end, url) in url_matches.into_iter().rev() {
let placeholder = format!(
"{}{}{}",
PLACEHOLDER_PREFIX, placeholder_id, PLACEHOLDER_SUFFIX
);
protected_urls.push(ProtectedUrl {
url,
placeholder: placeholder.clone(),
});
result.replace_range(start..end, &placeholder);
placeholder_id += 1;
}
(result, protected_urls)
}
pub fn restore_urls(&self, text: &str, protected_urls: &[ProtectedUrl]) -> String {
let mut result = text.to_string();
for protected in protected_urls {
result = result.replace(&protected.placeholder, &protected.url);
}
result
}
pub fn contains_urls(text: &str) -> bool {
URL_PATTERN.is_match(text) || MARKDOWN_LINK_URL.is_match(text)
}
}
pub fn with_protected_urls<F>(text: &str, transform: F) -> String
where
F: FnOnce(&str) -> String,
{
let protector = UrlProtector::new();
let (masked, protected_urls) = protector.mask_urls(text);
let transformed = transform(&masked);
protector.restore_urls(&transformed, &protected_urls)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_mask_simple_url() {
let protector = UrlProtector::new();
let text = "Visit https://example.com for more";
let (masked, protected) = protector.mask_urls(text);
assert!(!masked.contains("https://example.com"));
assert!(masked.contains(PLACEHOLDER_PREFIX));
assert_eq!(protected.len(), 1);
assert_eq!(protected[0].url, "https://example.com");
}
#[test]
fn test_restore_urls() {
let protector = UrlProtector::new();
let text = "Check https://example.com and https://other.org";
let (masked, protected) = protector.mask_urls(text);
let restored = protector.restore_urls(&masked, &protected);
assert_eq!(restored, text);
}
#[test]
fn test_markdown_link_url_preserved() {
let protector = UrlProtector::new();
let text = "[Claude Code](https://claude.ai/code)";
let (masked, protected) = protector.mask_urls(text);
assert!(masked.contains("[Claude Code]"));
assert!(!masked.contains("https://claude.ai/code"));
assert!(protected.iter().any(|p| p.url == "https://claude.ai/code"));
}
#[test]
fn test_email_address_protected() {
let protector = UrlProtector::new();
let text = "Contact noreply@anthropic.com for help";
let (masked, protected) = protector.mask_urls(text);
assert!(!masked.contains("noreply@anthropic.com"));
assert!(protected.iter().any(|p| p.url == "noreply@anthropic.com"));
}
#[test]
fn test_multiple_urls() {
let protector = UrlProtector::new();
let text = "See https://a.com and https://b.org or email test@example.com";
let (masked, protected) = protector.mask_urls(text);
let restored = protector.restore_urls(&masked, &protected);
assert_eq!(restored, text);
assert_eq!(protected.len(), 3);
}
#[test]
fn test_no_urls() {
let protector = UrlProtector::new();
let text = "No URLs here, just plain text";
let (masked, protected) = protector.mask_urls(text);
assert_eq!(masked, text);
assert!(protected.is_empty());
}
#[test]
fn test_contains_urls() {
assert!(UrlProtector::contains_urls("Visit https://example.com"));
assert!(UrlProtector::contains_urls("Email user@example.com"));
assert!(UrlProtector::contains_urls("[link](https://url.com)"));
assert!(!UrlProtector::contains_urls("No URLs here"));
}
#[test]
fn test_with_protected_urls() {
let text = "Replace Claude at https://claude.ai with something";
let result = with_protected_urls(text, |s| s.replace("Claude", "Assistant"));
assert!(result.contains("https://claude.ai")); assert!(result.contains("Assistant")); assert!(!result.contains("Claude")); }
#[test]
fn test_complex_markdown_with_urls() {
let protector = UrlProtector::new();
let text = "Generated with [Claude Code](https://claude.ai/claude-code) by Claude";
let (masked, protected) = protector.mask_urls(text);
assert!(masked.contains("[Claude Code]"));
assert!(!masked.contains("https://claude.ai/claude-code"));
let restored = protector.restore_urls(&masked, &protected);
assert_eq!(restored, text);
}
}