use alloc::sync::Arc;
use core::ops::Range;
use fancy_regex::{Captures, Regex};
#[cfg(feature = "use_serde")]
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use wezterm_dynamic::{FromDynamic, FromDynamicOptions, ToDynamic, Value};
extern crate alloc;
use crate::alloc::borrow::ToOwned;
use crate::alloc::string::ToString;
use alloc::format;
use alloc::string::String;
use alloc::vec::Vec;
pub use wezterm_escape_parser::hyperlink::Hyperlink;
#[cfg_attr(feature = "use_serde", derive(Deserialize, Serialize))]
#[derive(Debug, Clone, FromDynamic, ToDynamic)]
pub struct Rule {
#[cfg_attr(
feature = "use_serde",
serde(
deserialize_with = "deserialize_regex",
serialize_with = "serialize_regex"
)
)]
#[dynamic(into = "RegexWrap", try_from = "RegexWrap")]
pub regex: Regex,
pub format: String,
#[dynamic(default)]
pub highlight: usize,
}
struct RegexWrap(Regex);
impl FromDynamic for RegexWrap {
fn from_dynamic(
value: &Value,
options: FromDynamicOptions,
) -> Result<RegexWrap, wezterm_dynamic::Error> {
let s = String::from_dynamic(value, options)?;
Ok(RegexWrap(Regex::new(&s).map_err(|e| e.to_string())?))
}
}
impl From<&Regex> for RegexWrap {
fn from(regex: &Regex) -> RegexWrap {
RegexWrap(regex.clone())
}
}
impl Into<Regex> for RegexWrap {
fn into(self) -> Regex {
self.0
}
}
impl ToDynamic for RegexWrap {
fn to_dynamic(&self) -> Value {
self.0.to_string().to_dynamic()
}
}
#[cfg(feature = "use_serde")]
fn deserialize_regex<'de, D>(deserializer: D) -> Result<Regex, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
Regex::new(&s).map_err(|e| serde::de::Error::custom(format!("{:?}", e)))
}
#[cfg(feature = "use_serde")]
fn serialize_regex<S>(regex: &Regex, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let s = regex.to_string();
s.serialize(serializer)
}
#[derive(Debug, PartialEq)]
pub struct RuleMatch {
pub range: Range<usize>,
pub link: Arc<Hyperlink>,
}
#[derive(Debug)]
struct Match<'t> {
rule: &'t Rule,
captures: Captures<'t>,
}
impl<'t> Match<'t> {
fn len(&self) -> usize {
let c0 = self.highlight().unwrap();
c0.end() - c0.start()
}
fn range(&self) -> Range<usize> {
let c0 = self.highlight().unwrap();
c0.start()..c0.end()
}
fn highlight(&self) -> Option<fancy_regex::Match> {
self.captures.get(self.rule.highlight)
}
fn expand(&self) -> String {
let mut result = self.rule.format.clone();
for n in (0..self.captures.len()).rev() {
let search = format!("${}", n);
if let Some(rep) = self.captures.get(n) {
result = result.replace(&search, rep.as_str());
} else {
result = result.replace(&search, "");
}
}
result
}
}
pub const CLOSING_PARENTHESIS_HYPERLINK_PATTERN: &str =
r"\b\w+://[^\s()]*\(\S*\)(?=\s|$|[^_/a-zA-Z0-9-])";
pub const GENERIC_HYPERLINK_PATTERN: &str = r"\b\w+://\S+[_/a-zA-Z0-9-]";
impl Rule {
pub fn new(regex: &str, format: &str) -> Result<Self, fancy_regex::Error> {
Self::with_highlight(regex, format, 0)
}
pub fn with_highlight(
regex: &str,
format: &str,
highlight: usize,
) -> Result<Self, fancy_regex::Error> {
Ok(Self {
regex: Regex::new(regex)?,
format: format.to_owned(),
highlight,
})
}
pub fn match_hyperlinks(line: &str, rules: &[Rule]) -> Vec<RuleMatch> {
let mut matches = Vec::new();
for rule in rules.iter() {
for capture_result in rule.regex.captures_iter(line) {
if let Ok(captures) = capture_result {
let m = Match { rule, captures };
if m.highlight().is_some() {
matches.push(m);
}
}
}
}
matches.sort_by(|a, b| b.len().cmp(&a.len()));
matches
.into_iter()
.map(|m| {
let url = m.expand();
let link = Arc::new(Hyperlink::new_implicit(url));
RuleMatch {
link,
range: m.range(),
}
})
.collect()
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn parse_implicit() {
let rules = vec![
Rule::new(r"\b\w+://(?:[\w.-]+)\.[a-z]{2,15}\S*\b", "$0").unwrap(),
Rule::new(r"\b\w+@[\w-]+(\.[\w-]+)+\b", "mailto:$0").unwrap(),
];
assert_eq!(
Rule::match_hyperlinks(" http://example.com", &rules),
vec![RuleMatch {
range: 2..20,
link: Arc::new(Hyperlink::new_implicit("http://example.com")),
}]
);
assert_eq!(
Rule::match_hyperlinks(" foo@example.com woot@example.com", &rules),
vec![
RuleMatch {
range: 18..34,
link: Arc::new(Hyperlink::new_implicit("mailto:woot@example.com")),
},
RuleMatch {
range: 2..17,
link: Arc::new(Hyperlink::new_implicit("mailto:foo@example.com")),
},
]
);
}
#[test]
fn parse_with_parentheses() {
fn assert_helper(test_uri: &str, expected_uri: &str, msg: &str) {
let rules = vec![
Rule::new(CLOSING_PARENTHESIS_HYPERLINK_PATTERN, "$0").unwrap(),
Rule::new(GENERIC_HYPERLINK_PATTERN, "$0").unwrap(),
];
assert_eq!(
Rule::match_hyperlinks(test_uri, &rules)[0].link.uri(),
expected_uri,
"{}",
msg,
);
}
assert_helper(
" http://example.com)",
"http://example.com",
"Unblanced terminating parenthesis should not be captured.",
);
assert_helper(
"http://example.com/(complete_parentheses)",
"http://example.com/(complete_parentheses)",
"Balanced terminating parenthesis should be captureed.",
);
assert_helper(
"http://example.com/(complete_parentheses)>",
"http://example.com/(complete_parentheses)",
"Non-URL characters after a balanced terminating parenthesis should be dropped.",
);
assert_helper(
"http://example.com/(complete_parentheses))",
"http://example.com/(complete_parentheses))",
"Non-terminating parentheses should not impact matching the entire URL - Terminated with )",
);
assert_helper(
"http://example.com/(complete_parentheses)-((-)-()-_-",
"http://example.com/(complete_parentheses)-((-)-()-_-",
"Non-terminating parentheses should not impact matching the entire URL - Terminated with a valid character",
);
}
}