termwiz/
hyperlink.rs

1//! Handling hyperlinks.
2//! This gist describes an escape sequence for explicitly managing hyperlinks:
3//! <https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda>
4//! We use that as the foundation of our hyperlink support, and the game
5//! plan is to then implicitly enable the hyperlink attribute for a cell
6//! as we recognize linkable input text during print() processing.
7use crate::{ensure, format_err, Result};
8use fancy_regex::{Captures, Regex};
9#[cfg(feature = "use_serde")]
10use serde::{Deserialize, Deserializer, Serialize, Serializer};
11use std::collections::HashMap;
12use std::fmt::{Display, Error as FmtError, Formatter};
13use std::hash::{Hash, Hasher};
14use std::ops::Range;
15use std::sync::Arc;
16use wezterm_dynamic::{FromDynamic, FromDynamicOptions, ToDynamic, Value};
17
18#[cfg_attr(feature = "use_serde", derive(Serialize, Deserialize))]
19#[derive(Debug, Clone, PartialEq, Eq, FromDynamic, ToDynamic)]
20pub struct Hyperlink {
21    params: HashMap<String, String>,
22    uri: String,
23    /// If the link was produced by an implicit or matching rule,
24    /// this field will be set to true.
25    implicit: bool,
26}
27
28impl Hyperlink {
29    pub fn uri(&self) -> &str {
30        &self.uri
31    }
32
33    pub fn compute_shape_hash<H: Hasher>(&self, hasher: &mut H) {
34        self.uri.hash(hasher);
35        for (k, v) in &self.params {
36            k.hash(hasher);
37            v.hash(hasher);
38        }
39        self.implicit.hash(hasher);
40    }
41
42    pub fn params(&self) -> &HashMap<String, String> {
43        &self.params
44    }
45
46    pub fn new<S: Into<String>>(uri: S) -> Self {
47        Self {
48            uri: uri.into(),
49            params: HashMap::new(),
50            implicit: false,
51        }
52    }
53
54    #[inline]
55    pub fn is_implicit(&self) -> bool {
56        self.implicit
57    }
58
59    pub fn new_implicit<S: Into<String>>(uri: S) -> Self {
60        Self {
61            uri: uri.into(),
62            params: HashMap::new(),
63            implicit: true,
64        }
65    }
66
67    pub fn new_with_id<S: Into<String>, S2: Into<String>>(uri: S, id: S2) -> Self {
68        let mut params = HashMap::new();
69        params.insert("id".into(), id.into());
70        Self {
71            uri: uri.into(),
72            params,
73            implicit: false,
74        }
75    }
76
77    pub fn new_with_params<S: Into<String>>(uri: S, params: HashMap<String, String>) -> Self {
78        Self {
79            uri: uri.into(),
80            params,
81            implicit: false,
82        }
83    }
84
85    pub fn parse(osc: &[&[u8]]) -> Result<Option<Hyperlink>> {
86        ensure!(osc.len() == 3, "wrong param count");
87        if osc[1].is_empty() && osc[2].is_empty() {
88            // Clearing current hyperlink
89            Ok(None)
90        } else {
91            let param_str = String::from_utf8(osc[1].to_vec())?;
92            let uri = String::from_utf8(osc[2].to_vec())?;
93
94            let mut params = HashMap::new();
95            if !param_str.is_empty() {
96                for pair in param_str.split(':') {
97                    let mut iter = pair.splitn(2, '=');
98                    let key = iter.next().ok_or_else(|| format_err!("bad params"))?;
99                    let value = iter.next().ok_or_else(|| format_err!("bad params"))?;
100                    params.insert(key.to_owned(), value.to_owned());
101                }
102            }
103
104            Ok(Some(Hyperlink::new_with_params(uri, params)))
105        }
106    }
107}
108
109impl Display for Hyperlink {
110    fn fmt(&self, f: &mut Formatter) -> std::result::Result<(), FmtError> {
111        write!(f, "8;")?;
112        for (idx, (k, v)) in self.params.iter().enumerate() {
113            // TODO: protect against k, v containing : or =
114            if idx > 0 {
115                write!(f, ":")?;
116            }
117            write!(f, "{}={}", k, v)?;
118        }
119        // TODO: ensure that link.uri doesn't contain characters
120        // outside the range 32-126.  Need to pull in a URI/URL
121        // crate to help with this.
122        write!(f, ";{}", self.uri)?;
123
124        Ok(())
125    }
126}
127
128/// In addition to handling explicit escape sequences to enable
129/// hyperlinks, we also support defining rules that match text
130/// from screen lines and generate implicit hyperlinks.  This
131/// can be used both for making http URLs clickable and also to
132/// make other text clickable.  For example, you might define
133/// a rule that makes bug or issue numbers expand to the corresponding
134/// URL to view the details for that issue.
135/// The Rule struct is configuration that is passed to the terminal
136/// and is evaluated when processing mouse hover events.
137#[cfg_attr(feature = "use_serde", derive(Deserialize, Serialize))]
138#[derive(Debug, Clone, FromDynamic, ToDynamic)]
139pub struct Rule {
140    /// The compiled regex for the rule.  This is used to match
141    /// against a line of text from the screen (typically the line
142    /// over which the mouse is hovering).
143    #[cfg_attr(
144        feature = "use_serde",
145        serde(
146            deserialize_with = "deserialize_regex",
147            serialize_with = "serialize_regex"
148        )
149    )]
150    #[dynamic(into = "RegexWrap", try_from = "RegexWrap")]
151    pub regex: Regex,
152    /// The format string that defines how to transform the matched
153    /// text into a URL.  For example, a format string of `$0` expands
154    /// to the entire matched text, whereas `mailto:$0` expands to
155    /// the matched text with a `mailto:` prefix.  More formally,
156    /// each instance of `$N` (where N is a number) in the `format`
157    /// string is replaced by the capture number N from the regex.
158    /// The replacements are carried out in reverse order, starting
159    /// with the highest numbered capture first.  This avoids issues
160    /// with ambiguous replacement of `$11` vs `$1` in the case of
161    /// more complex regexes.
162    pub format: String,
163
164    /// Which capture to highlight
165    #[dynamic(default)]
166    pub highlight: usize,
167}
168
169struct RegexWrap(Regex);
170
171impl FromDynamic for RegexWrap {
172    fn from_dynamic(
173        value: &Value,
174        options: FromDynamicOptions,
175    ) -> std::result::Result<RegexWrap, wezterm_dynamic::Error> {
176        let s = String::from_dynamic(value, options)?;
177        Ok(RegexWrap(Regex::new(&s).map_err(|e| e.to_string())?))
178    }
179}
180
181impl From<&Regex> for RegexWrap {
182    fn from(regex: &Regex) -> RegexWrap {
183        RegexWrap(regex.clone())
184    }
185}
186
187impl Into<Regex> for RegexWrap {
188    fn into(self) -> Regex {
189        self.0
190    }
191}
192
193impl ToDynamic for RegexWrap {
194    fn to_dynamic(&self) -> Value {
195        self.0.to_string().to_dynamic()
196    }
197}
198
199#[cfg(feature = "use_serde")]
200fn deserialize_regex<'de, D>(deserializer: D) -> std::result::Result<Regex, D::Error>
201where
202    D: Deserializer<'de>,
203{
204    let s = String::deserialize(deserializer)?;
205    Regex::new(&s).map_err(|e| serde::de::Error::custom(format!("{:?}", e)))
206}
207
208#[cfg(feature = "use_serde")]
209fn serialize_regex<S>(regex: &Regex, serializer: S) -> std::result::Result<S::Ok, S::Error>
210where
211    S: Serializer,
212{
213    let s = regex.to_string();
214    s.serialize(serializer)
215}
216
217/// Holds a resolved rule match.
218#[derive(Debug, PartialEq)]
219pub struct RuleMatch {
220    /// Holds the span (measured in bytes) of the matched text
221    pub range: Range<usize>,
222    /// Holds the created Hyperlink object that should be associated
223    /// the cells that correspond to the span.
224    pub link: Arc<Hyperlink>,
225}
226
227/// An internal intermediate match result
228#[derive(Debug)]
229struct Match<'t> {
230    rule: &'t Rule,
231    captures: Captures<'t>,
232}
233
234impl<'t> Match<'t> {
235    /// Returns the length of the matched text in bytes (not cells!)
236    fn len(&self) -> usize {
237        let c0 = self.highlight().unwrap();
238        c0.end() - c0.start()
239    }
240
241    /// Returns the span of the matched text, measured in bytes (not cells!)
242    fn range(&self) -> Range<usize> {
243        let c0 = self.highlight().unwrap();
244        c0.start()..c0.end()
245    }
246
247    fn highlight(&self) -> Option<fancy_regex::Match> {
248        self.captures.get(self.rule.highlight)
249    }
250
251    /// Expand replacements in the format string to yield the URL
252    /// The replacement is as described on Rule::format.
253    fn expand(&self) -> String {
254        let mut result = self.rule.format.clone();
255        // Start with the highest numbered capture and decrement.
256        // This avoids ambiguity when replacing $11 vs $1.
257        for n in (0..self.captures.len()).rev() {
258            let search = format!("${}", n);
259            if let Some(rep) = self.captures.get(n) {
260                result = result.replace(&search, rep.as_str());
261            } else {
262                result = result.replace(&search, "");
263            }
264        }
265        result
266    }
267}
268pub const CLOSING_PARENTHESIS_HYPERLINK_PATTERN: &str =
269    r"\b\w+://[^\s()]*\(\S*\)(?=\s|$|[^_/a-zA-Z0-9-])";
270pub const GENERIC_HYPERLINK_PATTERN: &str = r"\b\w+://\S+[_/a-zA-Z0-9-]";
271
272impl Rule {
273    /// Construct a new rule.  It may fail if the regex is invalid.
274    pub fn new(regex: &str, format: &str) -> Result<Self> {
275        Self::with_highlight(regex, format, 0)
276    }
277
278    pub fn with_highlight(regex: &str, format: &str, highlight: usize) -> Result<Self> {
279        Ok(Self {
280            regex: Regex::new(regex)?,
281            format: format.to_owned(),
282            highlight,
283        })
284    }
285
286    /// Given a line of text from the terminal screen, and a set of
287    /// rules, return the set of RuleMatches.
288    pub fn match_hyperlinks(line: &str, rules: &[Rule]) -> Vec<RuleMatch> {
289        let mut matches = Vec::new();
290        for rule in rules.iter() {
291            for capture_result in rule.regex.captures_iter(line) {
292                if let Ok(captures) = capture_result {
293                    let m = Match { rule, captures };
294                    if m.highlight().is_some() {
295                        matches.push(m);
296                    }
297                }
298            }
299        }
300        // Sort the matches by descending match length.
301        // This is to avoid confusion if multiple rules match the
302        // same sections of text.
303        matches.sort_by(|a, b| b.len().cmp(&a.len()));
304
305        matches
306            .into_iter()
307            .map(|m| {
308                let url = m.expand();
309                let link = Arc::new(Hyperlink::new_implicit(url));
310                RuleMatch {
311                    link,
312                    range: m.range(),
313                }
314            })
315            .collect()
316    }
317}
318
319#[cfg(test)]
320mod test {
321    use super::*;
322
323    #[test]
324    fn parse_implicit() {
325        let rules = vec![
326            Rule::new(r"\b\w+://(?:[\w.-]+)\.[a-z]{2,15}\S*\b", "$0").unwrap(),
327            Rule::new(r"\b\w+@[\w-]+(\.[\w-]+)+\b", "mailto:$0").unwrap(),
328        ];
329
330        assert_eq!(
331            Rule::match_hyperlinks("  http://example.com", &rules),
332            vec![RuleMatch {
333                range: 2..20,
334                link: Arc::new(Hyperlink::new_implicit("http://example.com")),
335            }]
336        );
337
338        assert_eq!(
339            Rule::match_hyperlinks("  foo@example.com woot@example.com", &rules),
340            vec![
341                // Longest match first
342                RuleMatch {
343                    range: 18..34,
344                    link: Arc::new(Hyperlink::new_implicit("mailto:woot@example.com")),
345                },
346                RuleMatch {
347                    range: 2..17,
348                    link: Arc::new(Hyperlink::new_implicit("mailto:foo@example.com")),
349                },
350            ]
351        );
352    }
353
354    #[test]
355    fn parse_with_parentheses() {
356        fn assert_helper(test_uri: &str, expected_uri: &str, msg: &str) {
357            let rules = vec![
358                Rule::new(CLOSING_PARENTHESIS_HYPERLINK_PATTERN, "$0").unwrap(),
359                Rule::new(GENERIC_HYPERLINK_PATTERN, "$0").unwrap(),
360            ];
361
362            assert_eq!(
363                Rule::match_hyperlinks(test_uri, &rules)[0].link.uri,
364                expected_uri,
365                "{}",
366                msg,
367            );
368        }
369
370        assert_helper(
371            "   http://example.com)",
372            "http://example.com",
373            "Unblanced terminating parenthesis should not be captured.",
374        );
375
376        assert_helper(
377            "http://example.com/(complete_parentheses)",
378            "http://example.com/(complete_parentheses)",
379            "Balanced terminating parenthesis should be captureed.",
380        );
381
382        assert_helper(
383            "http://example.com/(complete_parentheses)>",
384            "http://example.com/(complete_parentheses)",
385            "Non-URL characters after a balanced terminating parenthesis should be dropped.",
386        );
387
388        assert_helper(
389            "http://example.com/(complete_parentheses))",
390            "http://example.com/(complete_parentheses))",
391            "Non-terminating parentheses should not impact matching the entire URL - Terminated with )",
392        );
393
394        assert_helper(
395            "http://example.com/(complete_parentheses)-((-)-()-_-",
396            "http://example.com/(complete_parentheses)-((-)-()-_-",
397            "Non-terminating parentheses should not impact matching the entire URL - Terminated with a valid character",
398        );
399    }
400}