use crate::{ensure, format_err, Result};
use fancy_regex::{Captures, Regex};
#[cfg(feature = "use_serde")]
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::collections::HashMap;
use std::fmt::{Display, Error as FmtError, Formatter};
use std::hash::{Hash, Hasher};
use std::ops::Range;
use std::sync::Arc;
use wezterm_dynamic::{FromDynamic, FromDynamicOptions, ToDynamic, Value};
#[cfg_attr(feature = "use_serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Eq, FromDynamic, ToDynamic)]
pub struct Hyperlink {
params: HashMap<String, String>,
uri: String,
implicit: bool,
}
impl Hyperlink {
pub fn uri(&self) -> &str {
&self.uri
}
pub fn compute_shape_hash<H: Hasher>(&self, hasher: &mut H) {
self.uri.hash(hasher);
for (k, v) in &self.params {
k.hash(hasher);
v.hash(hasher);
}
self.implicit.hash(hasher);
}
pub fn params(&self) -> &HashMap<String, String> {
&self.params
}
pub fn new<S: Into<String>>(uri: S) -> Self {
Self {
uri: uri.into(),
params: HashMap::new(),
implicit: false,
}
}
#[inline]
pub fn is_implicit(&self) -> bool {
self.implicit
}
pub fn new_implicit<S: Into<String>>(uri: S) -> Self {
Self {
uri: uri.into(),
params: HashMap::new(),
implicit: true,
}
}
pub fn new_with_id<S: Into<String>, S2: Into<String>>(uri: S, id: S2) -> Self {
let mut params = HashMap::new();
params.insert("id".into(), id.into());
Self {
uri: uri.into(),
params,
implicit: false,
}
}
pub fn new_with_params<S: Into<String>>(uri: S, params: HashMap<String, String>) -> Self {
Self {
uri: uri.into(),
params,
implicit: false,
}
}
pub fn parse(osc: &[&[u8]]) -> Result<Option<Hyperlink>> {
ensure!(osc.len() == 3, "wrong param count");
if osc[1].is_empty() && osc[2].is_empty() {
Ok(None)
} else {
let param_str = String::from_utf8(osc[1].to_vec())?;
let uri = String::from_utf8(osc[2].to_vec())?;
let mut params = HashMap::new();
if !param_str.is_empty() {
for pair in param_str.split(':') {
let mut iter = pair.splitn(2, '=');
let key = iter.next().ok_or_else(|| format_err!("bad params"))?;
let value = iter.next().ok_or_else(|| format_err!("bad params"))?;
params.insert(key.to_owned(), value.to_owned());
}
}
Ok(Some(Hyperlink::new_with_params(uri, params)))
}
}
}
impl Display for Hyperlink {
fn fmt(&self, f: &mut Formatter) -> std::result::Result<(), FmtError> {
write!(f, "8;")?;
for (idx, (k, v)) in self.params.iter().enumerate() {
if idx > 0 {
write!(f, ":")?;
}
write!(f, "{}={}", k, v)?;
}
write!(f, ";{}", self.uri)?;
Ok(())
}
}
#[cfg_attr(feature = "use_serde", derive(Deserialize, Serialize))]
#[derive(Debug, Clone, FromDynamic, ToDynamic)]
pub struct Rule {
#[cfg_attr(
feature = "use_serde",
serde(
deserialize_with = "deserialize_regex",
serialize_with = "serialize_regex"
)
)]
#[dynamic(into = "RegexWrap", try_from = "RegexWrap")]
pub regex: Regex,
pub format: String,
#[dynamic(default)]
pub highlight: usize,
}
struct RegexWrap(Regex);
impl FromDynamic for RegexWrap {
fn from_dynamic(
value: &Value,
options: FromDynamicOptions,
) -> std::result::Result<RegexWrap, wezterm_dynamic::Error> {
let s = String::from_dynamic(value, options)?;
Ok(RegexWrap(Regex::new(&s).map_err(|e| e.to_string())?))
}
}
impl From<&Regex> for RegexWrap {
fn from(regex: &Regex) -> RegexWrap {
RegexWrap(regex.clone())
}
}
impl Into<Regex> for RegexWrap {
fn into(self) -> Regex {
self.0
}
}
impl ToDynamic for RegexWrap {
fn to_dynamic(&self) -> Value {
self.0.to_string().to_dynamic()
}
}
#[cfg(feature = "use_serde")]
fn deserialize_regex<'de, D>(deserializer: D) -> std::result::Result<Regex, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
Regex::new(&s).map_err(|e| serde::de::Error::custom(format!("{:?}", e)))
}
#[cfg(feature = "use_serde")]
fn serialize_regex<S>(regex: &Regex, serializer: S) -> std::result::Result<S::Ok, S::Error>
where
S: Serializer,
{
let s = regex.to_string();
s.serialize(serializer)
}
#[derive(Debug, PartialEq)]
pub struct RuleMatch {
pub range: Range<usize>,
pub link: Arc<Hyperlink>,
}
#[derive(Debug)]
struct Match<'t> {
rule: &'t Rule,
captures: Captures<'t>,
}
impl<'t> Match<'t> {
fn len(&self) -> usize {
let c0 = self.highlight().unwrap();
c0.end() - c0.start()
}
fn range(&self) -> Range<usize> {
let c0 = self.highlight().unwrap();
c0.start()..c0.end()
}
fn highlight(&self) -> Option<fancy_regex::Match> {
self.captures.get(self.rule.highlight)
}
fn expand(&self) -> String {
let mut result = self.rule.format.clone();
for n in (0..self.captures.len()).rev() {
let search = format!("${}", n);
if let Some(rep) = self.captures.get(n) {
result = result.replace(&search, rep.as_str());
} else {
result = result.replace(&search, "");
}
}
result
}
}
pub const CLOSING_PARENTHESIS_HYPERLINK_PATTERN: &str =
r"\b\w+://[^\s()]*\(\S*\)(?=\s|$|[^_/a-zA-Z0-9-])";
pub const GENERIC_HYPERLINK_PATTERN: &str = r"\b\w+://\S+[_/a-zA-Z0-9-]";
impl Rule {
pub fn new(regex: &str, format: &str) -> Result<Self> {
Self::with_highlight(regex, format, 0)
}
pub fn with_highlight(regex: &str, format: &str, highlight: usize) -> Result<Self> {
Ok(Self {
regex: Regex::new(regex)?,
format: format.to_owned(),
highlight,
})
}
pub fn match_hyperlinks(line: &str, rules: &[Rule]) -> Vec<RuleMatch> {
let mut matches = Vec::new();
for rule in rules.iter() {
for capture_result in rule.regex.captures_iter(line) {
if let Ok(captures) = capture_result {
let m = Match { rule, captures };
if m.highlight().is_some() {
matches.push(m);
}
}
}
}
matches.sort_by(|a, b| b.len().cmp(&a.len()));
matches
.into_iter()
.map(|m| {
let url = m.expand();
let link = Arc::new(Hyperlink::new_implicit(url));
RuleMatch {
link,
range: m.range(),
}
})
.collect()
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn parse_implicit() {
let rules = vec![
Rule::new(r"\b\w+://(?:[\w.-]+)\.[a-z]{2,15}\S*\b", "$0").unwrap(),
Rule::new(r"\b\w+@[\w-]+(\.[\w-]+)+\b", "mailto:$0").unwrap(),
];
assert_eq!(
Rule::match_hyperlinks(" http://example.com", &rules),
vec![RuleMatch {
range: 2..20,
link: Arc::new(Hyperlink::new_implicit("http://example.com")),
}]
);
assert_eq!(
Rule::match_hyperlinks(" foo@example.com woot@example.com", &rules),
vec![
RuleMatch {
range: 18..34,
link: Arc::new(Hyperlink::new_implicit("mailto:woot@example.com")),
},
RuleMatch {
range: 2..17,
link: Arc::new(Hyperlink::new_implicit("mailto:foo@example.com")),
},
]
);
}
#[test]
fn parse_with_parentheses() {
fn assert_helper(test_uri: &str, expected_uri: &str, msg: &str) {
let rules = vec![
Rule::new(CLOSING_PARENTHESIS_HYPERLINK_PATTERN, "$0").unwrap(),
Rule::new(GENERIC_HYPERLINK_PATTERN, "$0").unwrap(),
];
assert_eq!(
Rule::match_hyperlinks(test_uri, &rules)[0].link.uri,
expected_uri,
"{}",
msg,
);
}
assert_helper(
" http://example.com)",
"http://example.com",
"Unblanced terminating parenthesis should not be captured.",
);
assert_helper(
"http://example.com/(complete_parentheses)",
"http://example.com/(complete_parentheses)",
"Balanced terminating parenthesis should be captureed.",
);
assert_helper(
"http://example.com/(complete_parentheses)>",
"http://example.com/(complete_parentheses)",
"Non-URL characters after a balanced terminating parenthesis should be dropped.",
);
assert_helper(
"http://example.com/(complete_parentheses))",
"http://example.com/(complete_parentheses))",
"Non-terminating parentheses should not impact matching the entire URL - Terminated with )",
);
assert_helper(
"http://example.com/(complete_parentheses)-((-)-()-_-",
"http://example.com/(complete_parentheses)-((-)-()-_-",
"Non-terminating parentheses should not impact matching the entire URL - Terminated with a valid character",
);
}
}