grep_printer/hyperlink/
mod.rs

1use std::{cell::RefCell, io, path::Path, sync::Arc};
2
3use {
4    bstr::ByteSlice,
5    termcolor::{HyperlinkSpec, WriteColor},
6};
7
8use crate::util::DecimalFormatter;
9
10use self::aliases::HYPERLINK_PATTERN_ALIASES;
11
12mod aliases;
13
14/// Hyperlink configuration.
15///
16/// This configuration specifies both the [hyperlink format](HyperlinkFormat)
17/// and an [environment](HyperlinkConfig) for interpolating a subset of
18/// variables. The specific subset includes variables that are intended to
19/// be invariant throughout the lifetime of a process, such as a machine's
20/// hostname.
21///
22/// A hyperlink configuration can be provided to printer builders such as
23/// [`StandardBuilder::hyperlink`](crate::StandardBuilder::hyperlink).
24#[derive(Clone, Debug, Default, Eq, PartialEq)]
25pub struct HyperlinkConfig(Arc<HyperlinkConfigInner>);
26
27#[derive(Clone, Debug, Default, Eq, PartialEq)]
28struct HyperlinkConfigInner {
29    env: HyperlinkEnvironment,
30    format: HyperlinkFormat,
31}
32
33impl HyperlinkConfig {
34    /// Create a new configuration from an environment and a format.
35    pub fn new(
36        env: HyperlinkEnvironment,
37        format: HyperlinkFormat,
38    ) -> HyperlinkConfig {
39        HyperlinkConfig(Arc::new(HyperlinkConfigInner { env, format }))
40    }
41
42    /// Returns the hyperlink environment in this configuration.
43    pub(crate) fn environment(&self) -> &HyperlinkEnvironment {
44        &self.0.env
45    }
46
47    /// Returns the hyperlink format in this configuration.
48    pub(crate) fn format(&self) -> &HyperlinkFormat {
49        &self.0.format
50    }
51}
52
53/// A hyperlink format with variables.
54///
55/// This can be created by parsing a string using `HyperlinkFormat::from_str`.
56///
57/// The default format is empty. An empty format is valid and effectively
58/// disables hyperlinks.
59///
60/// # Example
61///
62/// ```
63/// use grep_printer::HyperlinkFormat;
64///
65/// let fmt = "vscode".parse::<HyperlinkFormat>()?;
66/// assert_eq!(fmt.to_string(), "vscode://file{path}:{line}:{column}");
67///
68/// # Ok::<(), Box<dyn std::error::Error>>(())
69/// ```
70#[derive(Clone, Debug, Default, Eq, PartialEq)]
71pub struct HyperlinkFormat {
72    parts: Vec<Part>,
73    is_line_dependent: bool,
74}
75
76impl HyperlinkFormat {
77    /// Creates an empty hyperlink format.
78    pub fn empty() -> HyperlinkFormat {
79        HyperlinkFormat::default()
80    }
81
82    /// Returns true if this format is empty.
83    pub fn is_empty(&self) -> bool {
84        self.parts.is_empty()
85    }
86
87    /// Creates a [`HyperlinkConfig`] from this format and the environment
88    /// given.
89    pub fn into_config(self, env: HyperlinkEnvironment) -> HyperlinkConfig {
90        HyperlinkConfig::new(env, self)
91    }
92
93    /// Returns true if the format can produce line-dependent hyperlinks.
94    pub(crate) fn is_line_dependent(&self) -> bool {
95        self.is_line_dependent
96    }
97}
98
99impl std::str::FromStr for HyperlinkFormat {
100    type Err = HyperlinkFormatError;
101
102    fn from_str(s: &str) -> Result<HyperlinkFormat, HyperlinkFormatError> {
103        use self::HyperlinkFormatErrorKind::*;
104
105        #[derive(Debug)]
106        enum State {
107            Verbatim,
108            VerbatimCloseVariable,
109            OpenVariable,
110            InVariable,
111        }
112
113        let mut builder = FormatBuilder::new();
114        let input = match HyperlinkAlias::find(s) {
115            Some(alias) => alias.format(),
116            None => s,
117        };
118        let mut name = String::new();
119        let mut state = State::Verbatim;
120        let err = |kind| HyperlinkFormatError { kind };
121        for ch in input.chars() {
122            state = match state {
123                State::Verbatim => {
124                    if ch == '{' {
125                        State::OpenVariable
126                    } else if ch == '}' {
127                        State::VerbatimCloseVariable
128                    } else {
129                        builder.append_char(ch);
130                        State::Verbatim
131                    }
132                }
133                State::VerbatimCloseVariable => {
134                    if ch == '}' {
135                        builder.append_char('}');
136                        State::Verbatim
137                    } else {
138                        return Err(err(InvalidCloseVariable));
139                    }
140                }
141                State::OpenVariable => {
142                    if ch == '{' {
143                        builder.append_char('{');
144                        State::Verbatim
145                    } else {
146                        name.clear();
147                        if ch == '}' {
148                            builder.append_var(&name)?;
149                            State::Verbatim
150                        } else {
151                            name.push(ch);
152                            State::InVariable
153                        }
154                    }
155                }
156                State::InVariable => {
157                    if ch == '}' {
158                        builder.append_var(&name)?;
159                        State::Verbatim
160                    } else {
161                        name.push(ch);
162                        State::InVariable
163                    }
164                }
165            };
166        }
167        match state {
168            State::Verbatim => builder.build(),
169            State::VerbatimCloseVariable => Err(err(InvalidCloseVariable)),
170            State::OpenVariable | State::InVariable => {
171                Err(err(UnclosedVariable))
172            }
173        }
174    }
175}
176
177impl std::fmt::Display for HyperlinkFormat {
178    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
179        for part in self.parts.iter() {
180            part.fmt(f)?;
181        }
182        Ok(())
183    }
184}
185
186/// An alias for a hyperlink format.
187///
188/// Hyperlink aliases are built-in formats, therefore they hold static values.
189/// Some of their features are usable in const blocks.
190#[derive(Clone, Debug)]
191pub struct HyperlinkAlias {
192    name: &'static str,
193    description: &'static str,
194    format: &'static str,
195    display_priority: Option<i16>,
196}
197
198impl HyperlinkAlias {
199    /// Returns the name of the alias.
200    pub const fn name(&self) -> &str {
201        self.name
202    }
203
204    /// Returns a very short description of this hyperlink alias.
205    pub const fn description(&self) -> &str {
206        self.description
207    }
208
209    /// Returns the display priority of this alias.
210    ///
211    /// If no priority is set, then `None` is returned.
212    ///
213    /// The display priority is meant to reflect some special status associated
214    /// with an alias. For example, the `default` and `none` aliases have a
215    /// display priority. This is meant to encourage listing them first in
216    /// documentation.
217    ///
218    /// A lower display priority implies the alias should be shown before
219    /// aliases with a higher (or absent) display priority.
220    ///
221    /// Callers cannot rely on any specific display priority value to remain
222    /// stable across semver compatible releases of this crate.
223    pub const fn display_priority(&self) -> Option<i16> {
224        self.display_priority
225    }
226
227    /// Returns the format string of the alias.
228    const fn format(&self) -> &'static str {
229        self.format
230    }
231
232    /// Looks for the hyperlink alias defined by the given name.
233    ///
234    /// If one does not exist, `None` is returned.
235    fn find(name: &str) -> Option<&HyperlinkAlias> {
236        HYPERLINK_PATTERN_ALIASES
237            .binary_search_by_key(&name, |alias| alias.name())
238            .map(|i| &HYPERLINK_PATTERN_ALIASES[i])
239            .ok()
240    }
241}
242
243/// A static environment for hyperlink interpolation.
244///
245/// This environment permits setting the values of variables used in hyperlink
246/// interpolation that are not expected to change for the lifetime of a program.
247/// That is, these values are invariant.
248///
249/// Currently, this includes the hostname and a WSL distro prefix.
250#[derive(Clone, Debug, Default, Eq, PartialEq)]
251pub struct HyperlinkEnvironment {
252    host: Option<String>,
253    wsl_prefix: Option<String>,
254}
255
256impl HyperlinkEnvironment {
257    /// Create a new empty hyperlink environment.
258    pub fn new() -> HyperlinkEnvironment {
259        HyperlinkEnvironment::default()
260    }
261
262    /// Set the `{host}` variable, which fills in any hostname components of
263    /// a hyperlink.
264    ///
265    /// One can get the hostname in the current environment via the `hostname`
266    /// function in the `grep-cli` crate.
267    pub fn host(&mut self, host: Option<String>) -> &mut HyperlinkEnvironment {
268        self.host = host;
269        self
270    }
271
272    /// Set the `{wslprefix}` variable, which contains the WSL distro prefix.
273    /// An example value is `wsl$/Ubuntu`. The distro name can typically be
274    /// discovered from the `WSL_DISTRO_NAME` environment variable.
275    pub fn wsl_prefix(
276        &mut self,
277        wsl_prefix: Option<String>,
278    ) -> &mut HyperlinkEnvironment {
279        self.wsl_prefix = wsl_prefix;
280        self
281    }
282}
283
284/// An error that can occur when parsing a hyperlink format.
285#[derive(Clone, Debug, Eq, PartialEq)]
286pub struct HyperlinkFormatError {
287    kind: HyperlinkFormatErrorKind,
288}
289
290#[derive(Clone, Debug, Eq, PartialEq)]
291enum HyperlinkFormatErrorKind {
292    /// This occurs when there are zero variables in the format.
293    NoVariables,
294    /// This occurs when the {path} variable is missing.
295    NoPathVariable,
296    /// This occurs when the {line} variable is missing, while the {column}
297    /// variable is present.
298    NoLineVariable,
299    /// This occurs when an unknown variable is used.
300    InvalidVariable(String),
301    /// The format doesn't start with a valid scheme.
302    InvalidScheme,
303    /// This occurs when an unescaped `}` is found without a corresponding
304    /// `{` preceding it.
305    InvalidCloseVariable,
306    /// This occurs when a `{` is found without a corresponding `}` following
307    /// it.
308    UnclosedVariable,
309}
310
311impl std::error::Error for HyperlinkFormatError {}
312
313impl std::fmt::Display for HyperlinkFormatError {
314    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
315        use self::HyperlinkFormatErrorKind::*;
316
317        match self.kind {
318            NoVariables => {
319                let mut aliases = hyperlink_aliases();
320                aliases.sort_by_key(|alias| {
321                    alias.display_priority().unwrap_or(i16::MAX)
322                });
323                let names: Vec<&str> =
324                    aliases.iter().map(|alias| alias.name()).collect();
325                write!(
326                    f,
327                    "at least a {{path}} variable is required in a \
328                     hyperlink format, or otherwise use a valid alias: \
329                     {aliases}",
330                    aliases = names.join(", "),
331                )
332            }
333            NoPathVariable => {
334                write!(
335                    f,
336                    "the {{path}} variable is required in a hyperlink format",
337                )
338            }
339            NoLineVariable => {
340                write!(
341                    f,
342                    "the hyperlink format contains a {{column}} variable, \
343                     but no {{line}} variable is present",
344                )
345            }
346            InvalidVariable(ref name) => {
347                write!(
348                    f,
349                    "invalid hyperlink format variable: '{name}', choose \
350                     from: path, line, column, host, wslprefix",
351                )
352            }
353            InvalidScheme => {
354                write!(
355                    f,
356                    "the hyperlink format must start with a valid URL scheme, \
357                     i.e., [0-9A-Za-z+-.]+:",
358                )
359            }
360            InvalidCloseVariable => {
361                write!(
362                    f,
363                    "unopened variable: found '}}' without a \
364                     corresponding '{{' preceding it",
365                )
366            }
367            UnclosedVariable => {
368                write!(
369                    f,
370                    "unclosed variable: found '{{' without a \
371                     corresponding '}}' following it",
372                )
373            }
374        }
375    }
376}
377
378/// A builder for `HyperlinkFormat`.
379///
380/// Once a `HyperlinkFormat` is built, it is immutable.
381#[derive(Debug)]
382struct FormatBuilder {
383    parts: Vec<Part>,
384}
385
386impl FormatBuilder {
387    /// Creates a new hyperlink format builder.
388    fn new() -> FormatBuilder {
389        FormatBuilder { parts: vec![] }
390    }
391
392    /// Appends static text.
393    fn append_slice(&mut self, text: &[u8]) -> &mut FormatBuilder {
394        if let Some(Part::Text(contents)) = self.parts.last_mut() {
395            contents.extend_from_slice(text);
396        } else if !text.is_empty() {
397            self.parts.push(Part::Text(text.to_vec()));
398        }
399        self
400    }
401
402    /// Appends a single character.
403    fn append_char(&mut self, ch: char) -> &mut FormatBuilder {
404        self.append_slice(ch.encode_utf8(&mut [0; 4]).as_bytes())
405    }
406
407    /// Appends a variable with the given name. If the name isn't recognized,
408    /// then this returns an error.
409    fn append_var(
410        &mut self,
411        name: &str,
412    ) -> Result<&mut FormatBuilder, HyperlinkFormatError> {
413        let part = match name {
414            "host" => Part::Host,
415            "wslprefix" => Part::WSLPrefix,
416            "path" => Part::Path,
417            "line" => Part::Line,
418            "column" => Part::Column,
419            unknown => {
420                let err = HyperlinkFormatError {
421                    kind: HyperlinkFormatErrorKind::InvalidVariable(
422                        unknown.to_string(),
423                    ),
424                };
425                return Err(err);
426            }
427        };
428        self.parts.push(part);
429        Ok(self)
430    }
431
432    /// Builds the format.
433    fn build(&self) -> Result<HyperlinkFormat, HyperlinkFormatError> {
434        self.validate()?;
435        Ok(HyperlinkFormat {
436            parts: self.parts.clone(),
437            is_line_dependent: self.parts.contains(&Part::Line),
438        })
439    }
440
441    /// Validate that the format is well-formed.
442    fn validate(&self) -> Result<(), HyperlinkFormatError> {
443        use self::HyperlinkFormatErrorKind::*;
444
445        let err = |kind| HyperlinkFormatError { kind };
446        // An empty format is fine. It just means hyperlink support is
447        // disabled.
448        if self.parts.is_empty() {
449            return Ok(());
450        }
451        // If all parts are just text, then there are no variables. It's
452        // likely a reference to an invalid alias.
453        if self.parts.iter().all(|p| matches!(*p, Part::Text(_))) {
454            return Err(err(NoVariables));
455        }
456        // Even if we have other variables, no path variable means the
457        // hyperlink can't possibly work the way it is intended.
458        if !self.parts.contains(&Part::Path) {
459            return Err(err(NoPathVariable));
460        }
461        // If the {column} variable is used, then we also need a {line}
462        // variable or else {column} can't possibly work.
463        if self.parts.contains(&Part::Column)
464            && !self.parts.contains(&Part::Line)
465        {
466            return Err(err(NoLineVariable));
467        }
468        self.validate_scheme()
469    }
470
471    /// Validate that the format starts with a valid scheme. Validation is done
472    /// according to how a scheme is defined in RFC 1738 sections 2.1[1] and
473    /// 5[2]. In short, a scheme is this:
474    ///
475    /// scheme = 1*[ lowalpha | digit | "+" | "-" | "." ]
476    ///
477    /// but is case insensitive.
478    ///
479    /// [1]: https://datatracker.ietf.org/doc/html/rfc1738#section-2.1
480    /// [2]: https://datatracker.ietf.org/doc/html/rfc1738#section-5
481    fn validate_scheme(&self) -> Result<(), HyperlinkFormatError> {
482        let err_invalid_scheme = HyperlinkFormatError {
483            kind: HyperlinkFormatErrorKind::InvalidScheme,
484        };
485        let Some(Part::Text(part)) = self.parts.first() else {
486            return Err(err_invalid_scheme);
487        };
488        let Some(colon) = part.find_byte(b':') else {
489            return Err(err_invalid_scheme);
490        };
491        let scheme = &part[..colon];
492        if scheme.is_empty() {
493            return Err(err_invalid_scheme);
494        }
495        let is_valid_scheme_char = |byte| match byte {
496            b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'+' | b'-' | b'.' => {
497                true
498            }
499            _ => false,
500        };
501        if !scheme.iter().all(|&b| is_valid_scheme_char(b)) {
502            return Err(err_invalid_scheme);
503        }
504        Ok(())
505    }
506}
507
508/// A hyperlink format part.
509///
510/// A sequence of these corresponds to a complete format. (Not all sequences
511/// are valid.)
512#[derive(Clone, Debug, Eq, PartialEq)]
513enum Part {
514    /// Static text.
515    ///
516    /// We use `Vec<u8>` here (and more generally treat a format string as a
517    /// sequence of bytes) because file paths may be arbitrary bytes. A rare
518    /// case, but one for which there is no good reason to choke on.
519    Text(Vec<u8>),
520    /// Variable for the hostname.
521    Host,
522    /// Variable for a WSL path prefix.
523    WSLPrefix,
524    /// Variable for the file path.
525    Path,
526    /// Variable for the line number.
527    Line,
528    /// Variable for the column number.
529    Column,
530}
531
532impl Part {
533    /// Interpolate this part using the given `env` and `values`, and write
534    /// the result of interpolation to the buffer provided.
535    fn interpolate_to(
536        &self,
537        env: &HyperlinkEnvironment,
538        values: &Values,
539        dest: &mut Vec<u8>,
540    ) {
541        match *self {
542            Part::Text(ref text) => dest.extend_from_slice(text),
543            Part::Host => dest.extend_from_slice(
544                env.host.as_ref().map(|s| s.as_bytes()).unwrap_or(b""),
545            ),
546            Part::WSLPrefix => dest.extend_from_slice(
547                env.wsl_prefix.as_ref().map(|s| s.as_bytes()).unwrap_or(b""),
548            ),
549            Part::Path => dest.extend_from_slice(&values.path.0),
550            Part::Line => {
551                let line = DecimalFormatter::new(values.line.unwrap_or(1));
552                dest.extend_from_slice(line.as_bytes());
553            }
554            Part::Column => {
555                let column = DecimalFormatter::new(values.column.unwrap_or(1));
556                dest.extend_from_slice(column.as_bytes());
557            }
558        }
559    }
560}
561
562impl std::fmt::Display for Part {
563    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
564        match self {
565            Part::Text(text) => write!(f, "{}", String::from_utf8_lossy(text)),
566            Part::Host => write!(f, "{{host}}"),
567            Part::WSLPrefix => write!(f, "{{wslprefix}}"),
568            Part::Path => write!(f, "{{path}}"),
569            Part::Line => write!(f, "{{line}}"),
570            Part::Column => write!(f, "{{column}}"),
571        }
572    }
573}
574
575/// The values to replace the format variables with.
576///
577/// This only consists of values that depend on each path or match printed.
578/// Values that are invariant throughout the lifetime of the process are set
579/// via a [`HyperlinkEnvironment`].
580#[derive(Clone, Debug)]
581pub(crate) struct Values<'a> {
582    path: &'a HyperlinkPath,
583    line: Option<u64>,
584    column: Option<u64>,
585}
586
587impl<'a> Values<'a> {
588    /// Creates a new set of values, starting with the path given.
589    ///
590    /// Callers may also set the line and column number using the mutator
591    /// methods.
592    pub(crate) fn new(path: &'a HyperlinkPath) -> Values<'a> {
593        Values { path, line: None, column: None }
594    }
595
596    /// Sets the line number for these values.
597    ///
598    /// If a line number is not set and a hyperlink format contains a `{line}`
599    /// variable, then it is interpolated with the value of `1` automatically.
600    pub(crate) fn line(mut self, line: Option<u64>) -> Values<'a> {
601        self.line = line;
602        self
603    }
604
605    /// Sets the column number for these values.
606    ///
607    /// If a column number is not set and a hyperlink format contains a
608    /// `{column}` variable, then it is interpolated with the value of `1`
609    /// automatically.
610    pub(crate) fn column(mut self, column: Option<u64>) -> Values<'a> {
611        self.column = column;
612        self
613    }
614}
615
616/// An abstraction for interpolating a hyperlink format with values for every
617/// variable.
618///
619/// Interpolation of variables occurs through two different sources. The
620/// first is via a `HyperlinkEnvironment` for values that are expected to
621/// be invariant. This comes from the `HyperlinkConfig` used to build this
622/// interpolator. The second source is via `Values`, which is provided to
623/// `Interpolator::begin`. The `Values` contains things like the file path,
624/// line number and column number.
625#[derive(Clone, Debug)]
626pub(crate) struct Interpolator {
627    config: HyperlinkConfig,
628    buf: RefCell<Vec<u8>>,
629}
630
631impl Interpolator {
632    /// Create a new interpolator for the given hyperlink format configuration.
633    pub(crate) fn new(config: &HyperlinkConfig) -> Interpolator {
634        Interpolator { config: config.clone(), buf: RefCell::new(vec![]) }
635    }
636
637    /// Start interpolation with the given values by writing a hyperlink
638    /// to `wtr`. Subsequent writes to `wtr`, until `Interpolator::end` is
639    /// called, are the label for the hyperlink.
640    ///
641    /// This returns an interpolator status which indicates whether the
642    /// hyperlink was written. It might not be written, for example, if the
643    /// underlying writer doesn't support hyperlinks or if the hyperlink
644    /// format is empty. The status should be provided to `Interpolator::end`
645    /// as an instruction for whether to close the hyperlink or not.
646    pub(crate) fn begin<W: WriteColor>(
647        &self,
648        values: &Values,
649        mut wtr: W,
650    ) -> io::Result<InterpolatorStatus> {
651        if self.config.format().is_empty()
652            || !wtr.supports_hyperlinks()
653            || !wtr.supports_color()
654        {
655            return Ok(InterpolatorStatus::inactive());
656        }
657        let mut buf = self.buf.borrow_mut();
658        buf.clear();
659        for part in self.config.format().parts.iter() {
660            part.interpolate_to(self.config.environment(), values, &mut buf);
661        }
662        let spec = HyperlinkSpec::open(&buf);
663        wtr.set_hyperlink(&spec)?;
664        Ok(InterpolatorStatus { active: true })
665    }
666
667    /// Writes the correct escape sequences to `wtr` to close any extant
668    /// hyperlink, marking the end of a hyperlink's label.
669    ///
670    /// The status given should be returned from a corresponding
671    /// `Interpolator::begin` call. Since `begin` may not write a hyperlink
672    /// (e.g., if the underlying writer doesn't support hyperlinks), it follows
673    /// that `finish` must not close a hyperlink that was never opened. The
674    /// status indicates whether the hyperlink was opened or not.
675    pub(crate) fn finish<W: WriteColor>(
676        &self,
677        status: InterpolatorStatus,
678        mut wtr: W,
679    ) -> io::Result<()> {
680        if !status.active {
681            return Ok(());
682        }
683        wtr.set_hyperlink(&HyperlinkSpec::close())
684    }
685}
686
687/// A status indicating whether a hyperlink was written or not.
688///
689/// This is created by `Interpolator::begin` and used by `Interpolator::finish`
690/// to determine whether a hyperlink was actually opened or not. If it wasn't
691/// opened, then finishing interpolation is a no-op.
692#[derive(Debug)]
693pub(crate) struct InterpolatorStatus {
694    active: bool,
695}
696
697impl InterpolatorStatus {
698    /// Create an inactive interpolator status.
699    #[inline]
700    pub(crate) fn inactive() -> InterpolatorStatus {
701        InterpolatorStatus { active: false }
702    }
703}
704
705/// Represents the `{path}` part of a hyperlink.
706///
707/// This is the value to use as-is in the hyperlink, converted from an OS file
708/// path.
709#[derive(Clone, Debug)]
710pub(crate) struct HyperlinkPath(Vec<u8>);
711
712impl HyperlinkPath {
713    /// Returns a hyperlink path from an OS path.
714    #[cfg(unix)]
715    pub(crate) fn from_path(original_path: &Path) -> Option<HyperlinkPath> {
716        use std::os::unix::ffi::OsStrExt;
717
718        // We canonicalize the path in order to get an absolute version of it
719        // without any `.` or `..` or superfluous separators. Unfortunately,
720        // this does also remove symlinks, and in theory, it would be nice to
721        // retain them. Perhaps even simpler, we could just join the current
722        // working directory with the path and be done with it. There was
723        // some discussion about this on PR#2483, and there generally appears
724        // to be some uncertainty about the extent to which hyperlinks with
725        // things like `..` in them actually work. So for now, we do the safest
726        // thing possible even though I think it can result in worse user
727        // experience. (Because it means the path you click on and the actual
728        // path that gets followed are different, even though they ostensibly
729        // refer to the same file.)
730        //
731        // There's also the potential issue that path canonicalization is
732        // expensive since it can touch the file system. That is probably
733        // less of an issue since hyperlinks are only created when they're
734        // supported, i.e., when writing to a tty.
735        //
736        // [1]: https://github.com/BurntSushi/ripgrep/pull/2483
737        let path = match original_path.canonicalize() {
738            Ok(path) => path,
739            Err(err) => {
740                log::debug!(
741                    "hyperlink creation for {:?} failed, error occurred \
742                     during path canonicalization: {}",
743                    original_path,
744                    err,
745                );
746                return None;
747            }
748        };
749        let bytes = path.as_os_str().as_bytes();
750        // This should not be possible since one imagines that canonicalization
751        // should always return an absolute path. But it doesn't actually
752        // appear guaranteed by POSIX, so we check whether it's true or not and
753        // refuse to create a hyperlink from a relative path if it isn't.
754        if !bytes.starts_with(b"/") {
755            log::debug!(
756                "hyperlink creation for {:?} failed, canonicalization \
757                 returned {:?}, which does not start with a slash",
758                original_path,
759                path,
760            );
761            return None;
762        }
763        Some(HyperlinkPath::encode(bytes))
764    }
765
766    /// Returns a hyperlink path from an OS path.
767    #[cfg(windows)]
768    pub(crate) fn from_path(original_path: &Path) -> Option<HyperlinkPath> {
769        // On Windows, we use `std::path::absolute` instead of `Path::canonicalize`
770        // as it can be much faster since it does not touch the file system.
771        // It wraps the [`GetFullPathNameW`][1] API, except for verbatim paths
772        // (those which start with `\\?\`, see [the documentation][2] for details).
773        //
774        // Here, we strip any verbatim path prefixes since we cannot use them
775        // in hyperlinks anyway. This can only happen if the user explicitly
776        // supplies a verbatim path as input, which already needs to be absolute:
777        //
778        //   \\?\C:\dir\file.txt           (local path)
779        //   \\?\UNC\server\dir\file.txt   (network share)
780        //
781        // The `\\?\` prefix is constant for verbatim paths, and can be followed
782        // by `UNC\` (universal naming convention), which denotes a network share.
783        //
784        // Given that the default URL format on Windows is file://{path}
785        // we need to return the following from this function:
786        //
787        //   /C:/dir/file.txt        (local path)
788        //   //server/dir/file.txt   (network share)
789        //
790        // Which produces the following links:
791        //
792        //   file:///C:/dir/file.txt        (local path)
793        //   file:////server/dir/file.txt   (network share)
794        //
795        // This substitutes the {path} variable with the expected value for
796        // the most common DOS paths, but on the other hand, network paths
797        // start with a single slash, which may be unexpected. It seems to work
798        // though?
799        //
800        // Note that the following URL syntax also seems to be valid?
801        //
802        //   file://server/dir/file.txt
803        //
804        // But the initial implementation of this routine went for the format
805        // above.
806        //
807        // Also note that the file://C:/dir/file.txt syntax is not correct,
808        // even though it often works in practice.
809        //
810        // In the end, this choice was confirmed by VSCode, whose format is
811        //
812        //   vscode://file{path}:{line}:{column}
813        //
814        // and which correctly understands the following URL format for network
815        // drives:
816        //
817        //   vscode://file//server/dir/file.txt:1:1
818        //
819        // It doesn't parse any other number of slashes in "file//server" as a
820        // network path.
821        //
822        // [1]: https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew
823        // [2]: https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file
824
825        const WIN32_NAMESPACE_PREFIX: &str = r"\\?\";
826        const UNC_PREFIX: &str = r"UNC\";
827
828        let path = match std::path::absolute(original_path) {
829            Ok(path) => path,
830            Err(err) => {
831                log::debug!(
832                    "hyperlink creation for {:?} failed, error occurred \
833                     during conversion to absolute path: {}",
834                    original_path,
835                    err,
836                );
837                return None;
838            }
839        };
840        // We convert the path to a string for easier manipulation. If it
841        // wasn't valid UTF-16 (and thus could not be non-lossily transcoded
842        // to UTF-8), then we just give up. It's not clear we could make
843        // a meaningful hyperlink from it anyway. And this should be an
844        // exceptionally rare case.
845        let mut string = match path.to_str() {
846            Some(string) => string,
847            None => {
848                log::debug!(
849                    "hyperlink creation for {:?} failed, path is not \
850                     valid UTF-8",
851                    original_path,
852                );
853                return None;
854            }
855        };
856
857        // Strip verbatim path prefixes (see the comment above for details).
858        if string.starts_with(WIN32_NAMESPACE_PREFIX) {
859            string = &string[WIN32_NAMESPACE_PREFIX.len()..];
860
861            // Drop the UNC prefix if there is one, but keep the leading slash.
862            if string.starts_with(UNC_PREFIX) {
863                string = &string[(UNC_PREFIX.len() - 1)..];
864            }
865        } else if string.starts_with(r"\\") || string.starts_with(r"//") {
866            // Drop one of the two leading slashes of network paths, it will be added back.
867            string = &string[1..];
868        }
869
870        // Finally, add a leading slash. In the local file case, this turns
871        // C:\foo\bar into /C:\foo\bar (and then percent encoding turns it into
872        // /C:/foo/bar). In the network share case, this turns \share\foo\bar
873        // into /\share/foo/bar (and then percent encoding turns it into
874        // //share/foo/bar).
875        let with_slash = format!("/{string}");
876        Some(HyperlinkPath::encode(with_slash.as_bytes()))
877    }
878
879    /// For other platforms (not windows, not unix), return None and log a debug message.
880    #[cfg(not(any(windows, unix)))]
881    pub(crate) fn from_path(original_path: &Path) -> Option<HyperlinkPath> {
882        log::debug!("hyperlinks are not supported on this platform");
883        None
884    }
885
886    /// Percent-encodes a path.
887    ///
888    /// The alphanumeric ASCII characters and "-", ".", "_", "~" are unreserved
889    /// as per section 2.3 of RFC 3986 (Uniform Resource Identifier (URI):
890    /// Generic Syntax), and are not encoded. The other ASCII characters except
891    /// "/" and ":" are percent-encoded, and "\" is replaced by "/" on Windows.
892    ///
893    /// Section 4 of RFC 8089 (The "file" URI Scheme) does not mandate precise
894    /// encoding requirements for non-ASCII characters, and this implementation
895    /// leaves them unencoded. On Windows, the UrlCreateFromPathW function does
896    /// not encode non-ASCII characters. Doing so with UTF-8 encoded paths
897    /// creates invalid file:// URLs on that platform.
898    fn encode(input: &[u8]) -> HyperlinkPath {
899        let mut result = Vec::with_capacity(input.len());
900        for &byte in input.iter() {
901            match byte {
902                b'0'..=b'9'
903                | b'A'..=b'Z'
904                | b'a'..=b'z'
905                | b'/'
906                | b':'
907                | b'-'
908                | b'.'
909                | b'_'
910                | b'~'
911                | 128.. => {
912                    result.push(byte);
913                }
914                #[cfg(windows)]
915                b'\\' => {
916                    result.push(b'/');
917                }
918                _ => {
919                    const HEX: &[u8] = b"0123456789ABCDEF";
920                    result.push(b'%');
921                    result.push(HEX[(byte >> 4) as usize]);
922                    result.push(HEX[(byte & 0xF) as usize]);
923                }
924            }
925        }
926        HyperlinkPath(result)
927    }
928}
929
930/// Returns the set of hyperlink aliases supported by this crate.
931///
932/// Aliases are supported by the `FromStr` trait implementation of a
933/// [`HyperlinkFormat`]. That is, if an alias is seen, then it is automatically
934/// replaced with the corresponding format. For example, the `vscode` alias
935/// maps to `vscode://file{path}:{line}:{column}`.
936///
937/// This is exposed to allow callers to include hyperlink aliases in
938/// documentation in a way that is guaranteed to match what is actually
939/// supported.
940///
941/// The list returned is guaranteed to be sorted lexicographically
942/// by the alias name. Callers may want to re-sort the list using
943/// [`HyperlinkAlias::display_priority`] via a stable sort when showing the
944/// list to users. This will cause special aliases like `none` and `default` to
945/// appear first.
946pub fn hyperlink_aliases() -> Vec<HyperlinkAlias> {
947    HYPERLINK_PATTERN_ALIASES.iter().cloned().collect()
948}
949
950#[cfg(test)]
951mod tests {
952    use std::str::FromStr;
953
954    use super::*;
955
956    #[test]
957    fn build_format() {
958        let format = FormatBuilder::new()
959            .append_slice(b"foo://")
960            .append_slice(b"bar-")
961            .append_slice(b"baz")
962            .append_var("path")
963            .unwrap()
964            .build()
965            .unwrap();
966
967        assert_eq!(format.to_string(), "foo://bar-baz{path}");
968        assert_eq!(format.parts[0], Part::Text(b"foo://bar-baz".to_vec()));
969        assert!(!format.is_empty());
970    }
971
972    #[test]
973    fn build_empty_format() {
974        let format = FormatBuilder::new().build().unwrap();
975
976        assert!(format.is_empty());
977        assert_eq!(format, HyperlinkFormat::empty());
978        assert_eq!(format, HyperlinkFormat::default());
979    }
980
981    #[test]
982    fn handle_alias() {
983        assert!(HyperlinkFormat::from_str("file").is_ok());
984        assert!(HyperlinkFormat::from_str("none").is_ok());
985        assert!(HyperlinkFormat::from_str("none").unwrap().is_empty());
986    }
987
988    #[test]
989    fn parse_format() {
990        let format = HyperlinkFormat::from_str(
991            "foo://{host}/bar/{path}:{line}:{column}",
992        )
993        .unwrap();
994
995        assert_eq!(
996            format.to_string(),
997            "foo://{host}/bar/{path}:{line}:{column}"
998        );
999        assert_eq!(format.parts.len(), 8);
1000        assert!(format.parts.contains(&Part::Path));
1001        assert!(format.parts.contains(&Part::Line));
1002        assert!(format.parts.contains(&Part::Column));
1003    }
1004
1005    #[test]
1006    fn parse_valid() {
1007        assert!(HyperlinkFormat::from_str("").unwrap().is_empty());
1008        assert_eq!(
1009            HyperlinkFormat::from_str("foo://{path}").unwrap().to_string(),
1010            "foo://{path}"
1011        );
1012        assert_eq!(
1013            HyperlinkFormat::from_str("foo://{path}/bar").unwrap().to_string(),
1014            "foo://{path}/bar"
1015        );
1016
1017        HyperlinkFormat::from_str("f://{path}").unwrap();
1018        HyperlinkFormat::from_str("f:{path}").unwrap();
1019        HyperlinkFormat::from_str("f-+.:{path}").unwrap();
1020        HyperlinkFormat::from_str("f42:{path}").unwrap();
1021        HyperlinkFormat::from_str("42:{path}").unwrap();
1022        HyperlinkFormat::from_str("+:{path}").unwrap();
1023        HyperlinkFormat::from_str("F42:{path}").unwrap();
1024        HyperlinkFormat::from_str("F42://foo{{bar}}{path}").unwrap();
1025    }
1026
1027    #[test]
1028    fn parse_invalid() {
1029        use super::HyperlinkFormatErrorKind::*;
1030
1031        let err = |kind| HyperlinkFormatError { kind };
1032        assert_eq!(
1033            HyperlinkFormat::from_str("foo://bar").unwrap_err(),
1034            err(NoVariables),
1035        );
1036        assert_eq!(
1037            HyperlinkFormat::from_str("foo://{line}").unwrap_err(),
1038            err(NoPathVariable),
1039        );
1040        assert_eq!(
1041            HyperlinkFormat::from_str("foo://{path").unwrap_err(),
1042            err(UnclosedVariable),
1043        );
1044        assert_eq!(
1045            HyperlinkFormat::from_str("foo://{path}:{column}").unwrap_err(),
1046            err(NoLineVariable),
1047        );
1048        assert_eq!(
1049            HyperlinkFormat::from_str("{path}").unwrap_err(),
1050            err(InvalidScheme),
1051        );
1052        assert_eq!(
1053            HyperlinkFormat::from_str(":{path}").unwrap_err(),
1054            err(InvalidScheme),
1055        );
1056        assert_eq!(
1057            HyperlinkFormat::from_str("f*:{path}").unwrap_err(),
1058            err(InvalidScheme),
1059        );
1060
1061        assert_eq!(
1062            HyperlinkFormat::from_str("foo://{bar}").unwrap_err(),
1063            err(InvalidVariable("bar".to_string())),
1064        );
1065        assert_eq!(
1066            HyperlinkFormat::from_str("foo://{}}bar}").unwrap_err(),
1067            err(InvalidVariable("".to_string())),
1068        );
1069        assert_eq!(
1070            HyperlinkFormat::from_str("foo://{b}}ar}").unwrap_err(),
1071            err(InvalidVariable("b".to_string())),
1072        );
1073        assert_eq!(
1074            HyperlinkFormat::from_str("foo://{bar}}}").unwrap_err(),
1075            err(InvalidVariable("bar".to_string())),
1076        );
1077        assert_eq!(
1078            HyperlinkFormat::from_str("foo://{{bar}").unwrap_err(),
1079            err(InvalidCloseVariable),
1080        );
1081        assert_eq!(
1082            HyperlinkFormat::from_str("foo://{{{bar}").unwrap_err(),
1083            err(InvalidVariable("bar".to_string())),
1084        );
1085        assert_eq!(
1086            HyperlinkFormat::from_str("foo://{b{{ar}").unwrap_err(),
1087            err(InvalidVariable("b{{ar".to_string())),
1088        );
1089        assert_eq!(
1090            HyperlinkFormat::from_str("foo://{bar{{}").unwrap_err(),
1091            err(InvalidVariable("bar{{".to_string())),
1092        );
1093    }
1094
1095    #[test]
1096    #[cfg(windows)]
1097    fn convert_to_hyperlink_path() {
1098        let convert = |path| {
1099            String::from_utf8(
1100                HyperlinkPath::from_path(Path::new(path)).unwrap().0,
1101            )
1102            .unwrap()
1103        };
1104
1105        assert_eq!(convert(r"C:\dir\file.txt"), "/C:/dir/file.txt");
1106        assert_eq!(
1107            convert(r"C:\foo\bar\..\other\baz.txt"),
1108            "/C:/foo/other/baz.txt"
1109        );
1110
1111        assert_eq!(convert(r"\\server\dir\file.txt"), "//server/dir/file.txt");
1112        assert_eq!(
1113            convert(r"\\server\dir\foo\..\other\file.txt"),
1114            "//server/dir/other/file.txt"
1115        );
1116
1117        assert_eq!(convert(r"\\?\C:\dir\file.txt"), "/C:/dir/file.txt");
1118        assert_eq!(
1119            convert(r"\\?\UNC\server\dir\file.txt"),
1120            "//server/dir/file.txt"
1121        );
1122    }
1123
1124    #[test]
1125    fn aliases_are_sorted() {
1126        let aliases = hyperlink_aliases();
1127        let mut prev =
1128            aliases.first().expect("aliases should be non-empty").name();
1129        for alias in aliases.iter().skip(1) {
1130            let name = alias.name();
1131            assert!(
1132                name > prev,
1133                "'{prev}' should come before '{name}' in \
1134                 HYPERLINK_PATTERN_ALIASES",
1135            );
1136            prev = name;
1137        }
1138    }
1139
1140    #[test]
1141    fn alias_names_are_reasonable() {
1142        for alias in hyperlink_aliases() {
1143            // There's no hard rule here, but if we want to define an alias
1144            // with a name that doesn't pass this assert, then we should
1145            // probably flag it as worthy of consideration. For example, we
1146            // really do not want to define an alias that contains `{` or `}`,
1147            // which might confuse it for a variable.
1148            assert!(alias.name().chars().all(|c| c.is_alphanumeric()
1149                || c == '+'
1150                || c == '-'
1151                || c == '.'));
1152        }
1153    }
1154
1155    #[test]
1156    fn aliases_are_valid_formats() {
1157        for alias in hyperlink_aliases() {
1158            let (name, format) = (alias.name(), alias.format());
1159            assert!(
1160                format.parse::<HyperlinkFormat>().is_ok(),
1161                "invalid hyperlink alias '{name}': {format}",
1162            );
1163        }
1164    }
1165}