grep_printer/
hyperlink.rs

1use std::{cell::RefCell, io, path::Path, sync::Arc};
2
3use {
4    bstr::ByteSlice,
5    termcolor::{HyperlinkSpec, WriteColor},
6};
7
8use crate::{hyperlink_aliases, util::DecimalFormatter};
9
10/// Hyperlink configuration.
11///
12/// This configuration specifies both the [hyperlink format](HyperlinkFormat)
13/// and an [environment](HyperlinkConfig) for interpolating a subset of
14/// variables. The specific subset includes variables that are intended to
15/// be invariant throughout the lifetime of a process, such as a machine's
16/// hostname.
17///
18/// A hyperlink configuration can be provided to printer builders such as
19/// [`StandardBuilder::hyperlink`](crate::StandardBuilder::hyperlink).
20#[derive(Clone, Debug, Default, Eq, PartialEq)]
21pub struct HyperlinkConfig(Arc<HyperlinkConfigInner>);
22
23#[derive(Clone, Debug, Default, Eq, PartialEq)]
24struct HyperlinkConfigInner {
25    env: HyperlinkEnvironment,
26    format: HyperlinkFormat,
27}
28
29impl HyperlinkConfig {
30    /// Create a new configuration from an environment and a format.
31    pub fn new(
32        env: HyperlinkEnvironment,
33        format: HyperlinkFormat,
34    ) -> HyperlinkConfig {
35        HyperlinkConfig(Arc::new(HyperlinkConfigInner { env, format }))
36    }
37
38    /// Returns the hyperlink environment in this configuration.
39    pub(crate) fn environment(&self) -> &HyperlinkEnvironment {
40        &self.0.env
41    }
42
43    /// Returns the hyperlink format in this configuration.
44    pub(crate) fn format(&self) -> &HyperlinkFormat {
45        &self.0.format
46    }
47}
48
49/// A hyperlink format with variables.
50///
51/// This can be created by parsing a string using `HyperlinkFormat::from_str`.
52///
53/// The default format is empty. An empty format is valid and effectively
54/// disables hyperlinks.
55///
56/// # Example
57///
58/// ```
59/// use grep_printer::HyperlinkFormat;
60///
61/// let fmt = "vscode".parse::<HyperlinkFormat>()?;
62/// assert_eq!(fmt.to_string(), "vscode://file{path}:{line}:{column}");
63///
64/// # Ok::<(), Box<dyn std::error::Error>>(())
65/// ```
66#[derive(Clone, Debug, Default, Eq, PartialEq)]
67pub struct HyperlinkFormat {
68    parts: Vec<Part>,
69    is_line_dependent: bool,
70}
71
72impl HyperlinkFormat {
73    /// Creates an empty hyperlink format.
74    pub fn empty() -> HyperlinkFormat {
75        HyperlinkFormat::default()
76    }
77
78    /// Returns true if this format is empty.
79    pub fn is_empty(&self) -> bool {
80        self.parts.is_empty()
81    }
82
83    /// Creates a [`HyperlinkConfig`] from this format and the environment
84    /// given.
85    pub fn into_config(self, env: HyperlinkEnvironment) -> HyperlinkConfig {
86        HyperlinkConfig::new(env, self)
87    }
88
89    /// Returns true if the format can produce line-dependent hyperlinks.
90    pub(crate) fn is_line_dependent(&self) -> bool {
91        self.is_line_dependent
92    }
93}
94
95impl std::str::FromStr for HyperlinkFormat {
96    type Err = HyperlinkFormatError;
97
98    fn from_str(s: &str) -> Result<HyperlinkFormat, HyperlinkFormatError> {
99        use self::HyperlinkFormatErrorKind::*;
100
101        #[derive(Debug)]
102        enum State {
103            Verbatim,
104            VerbatimCloseVariable,
105            OpenVariable,
106            InVariable,
107        }
108
109        let mut builder = FormatBuilder::new();
110        let input = match hyperlink_aliases::find(s) {
111            Some(format) => format,
112            None => s,
113        };
114        let mut name = String::new();
115        let mut state = State::Verbatim;
116        let err = |kind| HyperlinkFormatError { kind };
117        for ch in input.chars() {
118            state = match state {
119                State::Verbatim => {
120                    if ch == '{' {
121                        State::OpenVariable
122                    } else if ch == '}' {
123                        State::VerbatimCloseVariable
124                    } else {
125                        builder.append_char(ch);
126                        State::Verbatim
127                    }
128                }
129                State::VerbatimCloseVariable => {
130                    if ch == '}' {
131                        builder.append_char('}');
132                        State::Verbatim
133                    } else {
134                        return Err(err(InvalidCloseVariable));
135                    }
136                }
137                State::OpenVariable => {
138                    if ch == '{' {
139                        builder.append_char('{');
140                        State::Verbatim
141                    } else {
142                        name.clear();
143                        if ch == '}' {
144                            builder.append_var(&name)?;
145                            State::Verbatim
146                        } else {
147                            name.push(ch);
148                            State::InVariable
149                        }
150                    }
151                }
152                State::InVariable => {
153                    if ch == '}' {
154                        builder.append_var(&name)?;
155                        State::Verbatim
156                    } else {
157                        name.push(ch);
158                        State::InVariable
159                    }
160                }
161            };
162        }
163        match state {
164            State::Verbatim => builder.build(),
165            State::VerbatimCloseVariable => Err(err(InvalidCloseVariable)),
166            State::OpenVariable | State::InVariable => {
167                Err(err(UnclosedVariable))
168            }
169        }
170    }
171}
172
173impl std::fmt::Display for HyperlinkFormat {
174    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
175        for part in self.parts.iter() {
176            part.fmt(f)?;
177        }
178        Ok(())
179    }
180}
181
182/// A static environment for hyperlink interpolation.
183///
184/// This environment permits setting the values of variables used in hyperlink
185/// interpolation that are not expected to change for the lifetime of a program.
186/// That is, these values are invariant.
187///
188/// Currently, this includes the hostname and a WSL distro prefix.
189#[derive(Clone, Debug, Default, Eq, PartialEq)]
190pub struct HyperlinkEnvironment {
191    host: Option<String>,
192    wsl_prefix: Option<String>,
193}
194
195impl HyperlinkEnvironment {
196    /// Create a new empty hyperlink environment.
197    pub fn new() -> HyperlinkEnvironment {
198        HyperlinkEnvironment::default()
199    }
200
201    /// Set the `{host}` variable, which fills in any hostname components of
202    /// a hyperlink.
203    ///
204    /// One can get the hostname in the current environment via the `hostname`
205    /// function in the `grep-cli` crate.
206    pub fn host(&mut self, host: Option<String>) -> &mut HyperlinkEnvironment {
207        self.host = host;
208        self
209    }
210
211    /// Set the `{wslprefix}` variable, which contains the WSL distro prefix.
212    /// An example value is `wsl$/Ubuntu`. The distro name can typically be
213    /// discovered from the `WSL_DISTRO_NAME` environment variable.
214    pub fn wsl_prefix(
215        &mut self,
216        wsl_prefix: Option<String>,
217    ) -> &mut HyperlinkEnvironment {
218        self.wsl_prefix = wsl_prefix;
219        self
220    }
221}
222
223/// An error that can occur when parsing a hyperlink format.
224#[derive(Clone, Debug, Eq, PartialEq)]
225pub struct HyperlinkFormatError {
226    kind: HyperlinkFormatErrorKind,
227}
228
229#[derive(Clone, Debug, Eq, PartialEq)]
230enum HyperlinkFormatErrorKind {
231    /// This occurs when there are zero variables in the format.
232    NoVariables,
233    /// This occurs when the {path} variable is missing.
234    NoPathVariable,
235    /// This occurs when the {line} variable is missing, while the {column}
236    /// variable is present.
237    NoLineVariable,
238    /// This occurs when an unknown variable is used.
239    InvalidVariable(String),
240    /// The format doesn't start with a valid scheme.
241    InvalidScheme,
242    /// This occurs when an unescaped `}` is found without a corresponding
243    /// `{` preceding it.
244    InvalidCloseVariable,
245    /// This occurs when a `{` is found without a corresponding `}` following
246    /// it.
247    UnclosedVariable,
248}
249
250impl std::error::Error for HyperlinkFormatError {}
251
252impl std::fmt::Display for HyperlinkFormatError {
253    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
254        use self::HyperlinkFormatErrorKind::*;
255
256        match self.kind {
257            NoVariables => {
258                let aliases = hyperlink_aliases::iter()
259                    .map(|(name, _)| name)
260                    .collect::<Vec<&str>>()
261                    .join(", ");
262                write!(
263                    f,
264                    "at least a {{path}} variable is required in a \
265                     hyperlink format, or otherwise use a valid alias: {}",
266                    aliases,
267                )
268            }
269            NoPathVariable => {
270                write!(
271                    f,
272                    "the {{path}} variable is required in a hyperlink format",
273                )
274            }
275            NoLineVariable => {
276                write!(
277                    f,
278                    "the hyperlink format contains a {{column}} variable, \
279                     but no {{line}} variable is present",
280                )
281            }
282            InvalidVariable(ref name) => {
283                write!(
284                    f,
285                    "invalid hyperlink format variable: '{name}', choose \
286                     from: path, line, column, host, wslprefix",
287                )
288            }
289            InvalidScheme => {
290                write!(
291                    f,
292                    "the hyperlink format must start with a valid URL scheme, \
293                     i.e., [0-9A-Za-z+-.]+:",
294                )
295            }
296            InvalidCloseVariable => {
297                write!(
298                    f,
299                    "unopened variable: found '}}' without a \
300                     corresponding '{{' preceding it",
301                )
302            }
303            UnclosedVariable => {
304                write!(
305                    f,
306                    "unclosed variable: found '{{' without a \
307                     corresponding '}}' following it",
308                )
309            }
310        }
311    }
312}
313
314/// A builder for `HyperlinkFormat`.
315///
316/// Once a `HyperlinkFormat` is built, it is immutable.
317#[derive(Debug)]
318struct FormatBuilder {
319    parts: Vec<Part>,
320}
321
322impl FormatBuilder {
323    /// Creates a new hyperlink format builder.
324    fn new() -> FormatBuilder {
325        FormatBuilder { parts: vec![] }
326    }
327
328    /// Appends static text.
329    fn append_slice(&mut self, text: &[u8]) -> &mut FormatBuilder {
330        if let Some(Part::Text(contents)) = self.parts.last_mut() {
331            contents.extend_from_slice(text);
332        } else if !text.is_empty() {
333            self.parts.push(Part::Text(text.to_vec()));
334        }
335        self
336    }
337
338    /// Appends a single character.
339    fn append_char(&mut self, ch: char) -> &mut FormatBuilder {
340        self.append_slice(ch.encode_utf8(&mut [0; 4]).as_bytes())
341    }
342
343    /// Appends a variable with the given name. If the name isn't recognized,
344    /// then this returns an error.
345    fn append_var(
346        &mut self,
347        name: &str,
348    ) -> Result<&mut FormatBuilder, HyperlinkFormatError> {
349        let part = match name {
350            "host" => Part::Host,
351            "wslprefix" => Part::WSLPrefix,
352            "path" => Part::Path,
353            "line" => Part::Line,
354            "column" => Part::Column,
355            unknown => {
356                let err = HyperlinkFormatError {
357                    kind: HyperlinkFormatErrorKind::InvalidVariable(
358                        unknown.to_string(),
359                    ),
360                };
361                return Err(err);
362            }
363        };
364        self.parts.push(part);
365        Ok(self)
366    }
367
368    /// Builds the format.
369    fn build(&self) -> Result<HyperlinkFormat, HyperlinkFormatError> {
370        self.validate()?;
371        Ok(HyperlinkFormat {
372            parts: self.parts.clone(),
373            is_line_dependent: self.parts.contains(&Part::Line),
374        })
375    }
376
377    /// Validate that the format is well-formed.
378    fn validate(&self) -> Result<(), HyperlinkFormatError> {
379        use self::HyperlinkFormatErrorKind::*;
380
381        let err = |kind| HyperlinkFormatError { kind };
382        // An empty format is fine. It just means hyperlink support is
383        // disabled.
384        if self.parts.is_empty() {
385            return Ok(());
386        }
387        // If all parts are just text, then there are no variables. It's
388        // likely a reference to an invalid alias.
389        if self.parts.iter().all(|p| matches!(*p, Part::Text(_))) {
390            return Err(err(NoVariables));
391        }
392        // Even if we have other variables, no path variable means the
393        // hyperlink can't possibly work the way it is intended.
394        if !self.parts.contains(&Part::Path) {
395            return Err(err(NoPathVariable));
396        }
397        // If the {column} variable is used, then we also need a {line}
398        // variable or else {column} can't possibly work.
399        if self.parts.contains(&Part::Column)
400            && !self.parts.contains(&Part::Line)
401        {
402            return Err(err(NoLineVariable));
403        }
404        self.validate_scheme()
405    }
406
407    /// Validate that the format starts with a valid scheme. Validation is done
408    /// according to how a scheme is defined in RFC 1738 sections 2.1[1] and
409    /// 5[2]. In short, a scheme is this:
410    ///
411    /// scheme = 1*[ lowalpha | digit | "+" | "-" | "." ]
412    ///
413    /// but is case insensitive.
414    ///
415    /// [1]: https://datatracker.ietf.org/doc/html/rfc1738#section-2.1
416    /// [2]: https://datatracker.ietf.org/doc/html/rfc1738#section-5
417    fn validate_scheme(&self) -> Result<(), HyperlinkFormatError> {
418        let err_invalid_scheme = HyperlinkFormatError {
419            kind: HyperlinkFormatErrorKind::InvalidScheme,
420        };
421        let Some(Part::Text(ref part)) = self.parts.first() else {
422            return Err(err_invalid_scheme);
423        };
424        let Some(colon) = part.find_byte(b':') else {
425            return Err(err_invalid_scheme);
426        };
427        let scheme = &part[..colon];
428        if scheme.is_empty() {
429            return Err(err_invalid_scheme);
430        }
431        let is_valid_scheme_char = |byte| match byte {
432            b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'+' | b'-' | b'.' => {
433                true
434            }
435            _ => false,
436        };
437        if !scheme.iter().all(|&b| is_valid_scheme_char(b)) {
438            return Err(err_invalid_scheme);
439        }
440        Ok(())
441    }
442}
443
444/// A hyperlink format part.
445///
446/// A sequence of these corresponds to a complete format. (Not all sequences
447/// are valid.)
448#[derive(Clone, Debug, Eq, PartialEq)]
449enum Part {
450    /// Static text.
451    ///
452    /// We use `Vec<u8>` here (and more generally treat a format string as a
453    /// sequence of bytes) because file paths may be arbitrary bytes. A rare
454    /// case, but one for which there is no good reason to choke on.
455    Text(Vec<u8>),
456    /// Variable for the hostname.
457    Host,
458    /// Variable for a WSL path prefix.
459    WSLPrefix,
460    /// Variable for the file path.
461    Path,
462    /// Variable for the line number.
463    Line,
464    /// Variable for the column number.
465    Column,
466}
467
468impl Part {
469    /// Interpolate this part using the given `env` and `values`, and write
470    /// the result of interpolation to the buffer provided.
471    fn interpolate_to(
472        &self,
473        env: &HyperlinkEnvironment,
474        values: &Values,
475        dest: &mut Vec<u8>,
476    ) {
477        match self {
478            Part::Text(ref text) => dest.extend_from_slice(text),
479            Part::Host => dest.extend_from_slice(
480                env.host.as_ref().map(|s| s.as_bytes()).unwrap_or(b""),
481            ),
482            Part::WSLPrefix => dest.extend_from_slice(
483                env.wsl_prefix.as_ref().map(|s| s.as_bytes()).unwrap_or(b""),
484            ),
485            Part::Path => dest.extend_from_slice(&values.path.0),
486            Part::Line => {
487                let line = DecimalFormatter::new(values.line.unwrap_or(1));
488                dest.extend_from_slice(line.as_bytes());
489            }
490            Part::Column => {
491                let column = DecimalFormatter::new(values.column.unwrap_or(1));
492                dest.extend_from_slice(column.as_bytes());
493            }
494        }
495    }
496}
497
498impl std::fmt::Display for Part {
499    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
500        match self {
501            Part::Text(text) => write!(f, "{}", String::from_utf8_lossy(text)),
502            Part::Host => write!(f, "{{host}}"),
503            Part::WSLPrefix => write!(f, "{{wslprefix}}"),
504            Part::Path => write!(f, "{{path}}"),
505            Part::Line => write!(f, "{{line}}"),
506            Part::Column => write!(f, "{{column}}"),
507        }
508    }
509}
510
511/// The values to replace the format variables with.
512///
513/// This only consists of values that depend on each path or match printed.
514/// Values that are invariant throughout the lifetime of the process are set
515/// via a [`HyperlinkEnvironment`].
516#[derive(Clone, Debug)]
517pub(crate) struct Values<'a> {
518    path: &'a HyperlinkPath,
519    line: Option<u64>,
520    column: Option<u64>,
521}
522
523impl<'a> Values<'a> {
524    /// Creates a new set of values, starting with the path given.
525    ///
526    /// Callers may also set the line and column number using the mutator
527    /// methods.
528    pub(crate) fn new(path: &'a HyperlinkPath) -> Values<'a> {
529        Values { path, line: None, column: None }
530    }
531
532    /// Sets the line number for these values.
533    ///
534    /// If a line number is not set and a hyperlink format contains a `{line}`
535    /// variable, then it is interpolated with the value of `1` automatically.
536    pub(crate) fn line(mut self, line: Option<u64>) -> Values<'a> {
537        self.line = line;
538        self
539    }
540
541    /// Sets the column number for these values.
542    ///
543    /// If a column number is not set and a hyperlink format contains a
544    /// `{column}` variable, then it is interpolated with the value of `1`
545    /// automatically.
546    pub(crate) fn column(mut self, column: Option<u64>) -> Values<'a> {
547        self.column = column;
548        self
549    }
550}
551
552/// An abstraction for interpolating a hyperlink format with values for every
553/// variable.
554///
555/// Interpolation of variables occurs through two different sources. The
556/// first is via a `HyperlinkEnvironment` for values that are expected to
557/// be invariant. This comes from the `HyperlinkConfig` used to build this
558/// interpolator. The second source is via `Values`, which is provided to
559/// `Interpolator::begin`. The `Values` contains things like the file path,
560/// line number and column number.
561#[derive(Clone, Debug)]
562pub(crate) struct Interpolator {
563    config: HyperlinkConfig,
564    buf: RefCell<Vec<u8>>,
565}
566
567impl Interpolator {
568    /// Create a new interpolator for the given hyperlink format configuration.
569    pub(crate) fn new(config: &HyperlinkConfig) -> Interpolator {
570        Interpolator { config: config.clone(), buf: RefCell::new(vec![]) }
571    }
572
573    /// Start interpolation with the given values by writing a hyperlink
574    /// to `wtr`. Subsequent writes to `wtr`, until `Interpolator::end` is
575    /// called, are the label for the hyperlink.
576    ///
577    /// This returns an interpolator status which indicates whether the
578    /// hyperlink was written. It might not be written, for example, if the
579    /// underlying writer doesn't support hyperlinks or if the hyperlink
580    /// format is empty. The status should be provided to `Interpolator::end`
581    /// as an instruction for whether to close the hyperlink or not.
582    pub(crate) fn begin<W: WriteColor>(
583        &self,
584        values: &Values,
585        mut wtr: W,
586    ) -> io::Result<InterpolatorStatus> {
587        if self.config.format().is_empty()
588            || !wtr.supports_hyperlinks()
589            || !wtr.supports_color()
590        {
591            return Ok(InterpolatorStatus::inactive());
592        }
593        let mut buf = self.buf.borrow_mut();
594        buf.clear();
595        for part in self.config.format().parts.iter() {
596            part.interpolate_to(self.config.environment(), values, &mut buf);
597        }
598        let spec = HyperlinkSpec::open(&buf);
599        wtr.set_hyperlink(&spec)?;
600        Ok(InterpolatorStatus { active: true })
601    }
602
603    /// Writes the correct escape sequences to `wtr` to close any extant
604    /// hyperlink, marking the end of a hyperlink's label.
605    ///
606    /// The status given should be returned from a corresponding
607    /// `Interpolator::begin` call. Since `begin` may not write a hyperlink
608    /// (e.g., if the underlying writer doesn't support hyperlinks), it follows
609    /// that `finish` must not close a hyperlink that was never opened. The
610    /// status indicates whether the hyperlink was opened or not.
611    pub(crate) fn finish<W: WriteColor>(
612        &self,
613        status: InterpolatorStatus,
614        mut wtr: W,
615    ) -> io::Result<()> {
616        if !status.active {
617            return Ok(());
618        }
619        wtr.set_hyperlink(&HyperlinkSpec::close())
620    }
621}
622
623/// A status indicating whether a hyperlink was written or not.
624///
625/// This is created by `Interpolator::begin` and used by `Interpolator::finish`
626/// to determine whether a hyperlink was actually opened or not. If it wasn't
627/// opened, then finishing interpolation is a no-op.
628#[derive(Debug)]
629pub(crate) struct InterpolatorStatus {
630    active: bool,
631}
632
633impl InterpolatorStatus {
634    /// Create an inactive interpolator status.
635    #[inline]
636    pub(crate) fn inactive() -> InterpolatorStatus {
637        InterpolatorStatus { active: false }
638    }
639}
640
641/// Represents the `{path}` part of a hyperlink.
642///
643/// This is the value to use as-is in the hyperlink, converted from an OS file
644/// path.
645#[derive(Clone, Debug)]
646pub(crate) struct HyperlinkPath(Vec<u8>);
647
648impl HyperlinkPath {
649    /// Returns a hyperlink path from an OS path.
650    #[cfg(unix)]
651    pub(crate) fn from_path(original_path: &Path) -> Option<HyperlinkPath> {
652        use std::os::unix::ffi::OsStrExt;
653
654        // We canonicalize the path in order to get an absolute version of it
655        // without any `.` or `..` or superfluous separators. Unfortunately,
656        // this does also remove symlinks, and in theory, it would be nice to
657        // retain them. Perhaps even simpler, we could just join the current
658        // working directory with the path and be done with it. There was
659        // some discussion about this on PR#2483, and there generally appears
660        // to be some uncertainty about the extent to which hyperlinks with
661        // things like `..` in them actually work. So for now, we do the safest
662        // thing possible even though I think it can result in worse user
663        // experience. (Because it means the path you click on and the actual
664        // path that gets followed are different, even though they ostensibly
665        // refer to the same file.)
666        //
667        // There's also the potential issue that path canonicalization is
668        // expensive since it can touch the file system. That is probably
669        // less of an issue since hyperlinks are only created when they're
670        // supported, i.e., when writing to a tty.
671        //
672        // [1]: https://github.com/BurntSushi/ripgrep/pull/2483
673        let path = match original_path.canonicalize() {
674            Ok(path) => path,
675            Err(err) => {
676                log::debug!(
677                    "hyperlink creation for {:?} failed, error occurred \
678                     during path canonicalization: {}",
679                    original_path,
680                    err,
681                );
682                return None;
683            }
684        };
685        let bytes = path.as_os_str().as_bytes();
686        // This should not be possible since one imagines that canonicalization
687        // should always return an absolute path. But it doesn't actually
688        // appear guaranteed by POSIX, so we check whether it's true or not and
689        // refuse to create a hyperlink from a relative path if it isn't.
690        if !bytes.starts_with(b"/") {
691            log::debug!(
692                "hyperlink creation for {:?} failed, canonicalization \
693                 returned {:?}, which does not start with a slash",
694                original_path,
695                path,
696            );
697            return None;
698        }
699        Some(HyperlinkPath::encode(bytes))
700    }
701
702    /// Returns a hyperlink path from an OS path.
703    #[cfg(windows)]
704    pub(crate) fn from_path(original_path: &Path) -> Option<HyperlinkPath> {
705        // On Windows, Path::canonicalize returns the result of
706        // GetFinalPathNameByHandleW with VOLUME_NAME_DOS,
707        // which produces paths such as the following:
708        //
709        //   \\?\C:\dir\file.txt           (local path)
710        //   \\?\UNC\server\dir\file.txt   (network share)
711        //
712        // The \\?\ prefix comes from VOLUME_NAME_DOS and is constant.
713        // It is followed either by the drive letter, or by UNC\
714        // (universal naming convention), which denotes a network share.
715        //
716        // Given that the default URL format on Windows is file://{path}
717        // we need to return the following from this function:
718        //
719        //   /C:/dir/file.txt        (local path)
720        //   //server/dir/file.txt   (network share)
721        //
722        // Which produces the following links:
723        //
724        //   file:///C:/dir/file.txt        (local path)
725        //   file:////server/dir/file.txt   (network share)
726        //
727        // This substitutes the {path} variable with the expected value for
728        // the most common DOS paths, but on the other hand, network paths
729        // start with a single slash, which may be unexpected. It seems to work
730        // though?
731        //
732        // Note that the following URL syntax also seems to be valid?
733        //
734        //   file://server/dir/file.txt
735        //
736        // But the initial implementation of this routine went for the format
737        // above.
738        //
739        // Also note that the file://C:/dir/file.txt syntax is not correct,
740        // even though it often works in practice.
741        //
742        // In the end, this choice was confirmed by VSCode, whose format is
743        //
744        //   vscode://file{path}:{line}:{column}
745        //
746        // and which correctly understands the following URL format for network
747        // drives:
748        //
749        //   vscode://file//server/dir/file.txt:1:1
750        //
751        // It doesn't parse any other number of slashes in "file//server" as a
752        // network path.
753
754        const WIN32_NAMESPACE_PREFIX: &str = r"\\?\";
755        const UNC_PREFIX: &str = r"UNC\";
756
757        // As for Unix, we canonicalize the path to make sure we have an
758        // absolute path.
759        let path = match original_path.canonicalize() {
760            Ok(path) => path,
761            Err(err) => {
762                log::debug!(
763                    "hyperlink creation for {:?} failed, error occurred \
764                     during path canonicalization: {}",
765                    original_path,
766                    err,
767                );
768                return None;
769            }
770        };
771        // We convert the path to a string for easier manipulation. If it
772        // wasn't valid UTF-16 (and thus could not be non-lossily transcoded
773        // to UTF-8), then we just give up. It's not clear we could make
774        // a meaningful hyperlink from it anyway. And this should be an
775        // exceptionally rare case.
776        let mut string = match path.to_str() {
777            Some(string) => string,
778            None => {
779                log::debug!(
780                    "hyperlink creation for {:?} failed, path is not \
781                     valid UTF-8",
782                    original_path,
783                );
784                return None;
785            }
786        };
787        // As the comment above says, we expect all canonicalized paths to
788        // begin with a \\?\. If it doesn't, then something weird is happening
789        // and we should just give up.
790        if !string.starts_with(WIN32_NAMESPACE_PREFIX) {
791            log::debug!(
792                "hyperlink creation for {:?} failed, canonicalization \
793                 returned {:?}, which does not start with \\\\?\\",
794                original_path,
795                path,
796            );
797            return None;
798        }
799        string = &string[WIN32_NAMESPACE_PREFIX.len()..];
800
801        // And as above, drop the UNC prefix too, but keep the leading slash.
802        if string.starts_with(UNC_PREFIX) {
803            string = &string[(UNC_PREFIX.len() - 1)..];
804        }
805        // Finally, add a leading slash. In the local file case, this turns
806        // C:\foo\bar into /C:\foo\bar (and then percent encoding turns it into
807        // /C:/foo/bar). In the network share case, this turns \share\foo\bar
808        // into /\share/foo/bar (and then percent encoding turns it into
809        // //share/foo/bar).
810        let with_slash = format!("/{string}");
811        Some(HyperlinkPath::encode(with_slash.as_bytes()))
812    }
813
814    /// For other platforms (not windows, not unix), return None and log a debug message.
815    #[cfg(not(any(windows, unix)))]
816    pub(crate) fn from_path(original_path: &Path) -> Option<HyperlinkPath> {
817        log::debug!("hyperlinks are not supported on this platform");
818        None
819    }
820
821    /// Percent-encodes a path.
822    ///
823    /// The alphanumeric ASCII characters and "-", ".", "_", "~" are unreserved
824    /// as per section 2.3 of RFC 3986 (Uniform Resource Identifier (URI):
825    /// Generic Syntax), and are not encoded. The other ASCII characters except
826    /// "/" and ":" are percent-encoded, and "\" is replaced by "/" on Windows.
827    ///
828    /// Section 4 of RFC 8089 (The "file" URI Scheme) does not mandate precise
829    /// encoding requirements for non-ASCII characters, and this implementation
830    /// leaves them unencoded. On Windows, the UrlCreateFromPathW function does
831    /// not encode non-ASCII characters. Doing so with UTF-8 encoded paths
832    /// creates invalid file:// URLs on that platform.
833    fn encode(input: &[u8]) -> HyperlinkPath {
834        let mut result = Vec::with_capacity(input.len());
835        for &byte in input.iter() {
836            match byte {
837                b'0'..=b'9'
838                | b'A'..=b'Z'
839                | b'a'..=b'z'
840                | b'/'
841                | b':'
842                | b'-'
843                | b'.'
844                | b'_'
845                | b'~'
846                | 128.. => {
847                    result.push(byte);
848                }
849                #[cfg(windows)]
850                b'\\' => {
851                    result.push(b'/');
852                }
853                _ => {
854                    const HEX: &[u8] = b"0123456789ABCDEF";
855                    result.push(b'%');
856                    result.push(HEX[(byte >> 4) as usize]);
857                    result.push(HEX[(byte & 0xF) as usize]);
858                }
859            }
860        }
861        HyperlinkPath(result)
862    }
863}
864
865#[cfg(test)]
866mod tests {
867    use std::str::FromStr;
868
869    use super::*;
870
871    #[test]
872    fn build_format() {
873        let format = FormatBuilder::new()
874            .append_slice(b"foo://")
875            .append_slice(b"bar-")
876            .append_slice(b"baz")
877            .append_var("path")
878            .unwrap()
879            .build()
880            .unwrap();
881
882        assert_eq!(format.to_string(), "foo://bar-baz{path}");
883        assert_eq!(format.parts[0], Part::Text(b"foo://bar-baz".to_vec()));
884        assert!(!format.is_empty());
885    }
886
887    #[test]
888    fn build_empty_format() {
889        let format = FormatBuilder::new().build().unwrap();
890
891        assert!(format.is_empty());
892        assert_eq!(format, HyperlinkFormat::empty());
893        assert_eq!(format, HyperlinkFormat::default());
894    }
895
896    #[test]
897    fn handle_alias() {
898        assert!(HyperlinkFormat::from_str("file").is_ok());
899        assert!(HyperlinkFormat::from_str("none").is_ok());
900        assert!(HyperlinkFormat::from_str("none").unwrap().is_empty());
901    }
902
903    #[test]
904    fn parse_format() {
905        let format = HyperlinkFormat::from_str(
906            "foo://{host}/bar/{path}:{line}:{column}",
907        )
908        .unwrap();
909
910        assert_eq!(
911            format.to_string(),
912            "foo://{host}/bar/{path}:{line}:{column}"
913        );
914        assert_eq!(format.parts.len(), 8);
915        assert!(format.parts.contains(&Part::Path));
916        assert!(format.parts.contains(&Part::Line));
917        assert!(format.parts.contains(&Part::Column));
918    }
919
920    #[test]
921    fn parse_valid() {
922        assert!(HyperlinkFormat::from_str("").unwrap().is_empty());
923        assert_eq!(
924            HyperlinkFormat::from_str("foo://{path}").unwrap().to_string(),
925            "foo://{path}"
926        );
927        assert_eq!(
928            HyperlinkFormat::from_str("foo://{path}/bar").unwrap().to_string(),
929            "foo://{path}/bar"
930        );
931
932        HyperlinkFormat::from_str("f://{path}").unwrap();
933        HyperlinkFormat::from_str("f:{path}").unwrap();
934        HyperlinkFormat::from_str("f-+.:{path}").unwrap();
935        HyperlinkFormat::from_str("f42:{path}").unwrap();
936        HyperlinkFormat::from_str("42:{path}").unwrap();
937        HyperlinkFormat::from_str("+:{path}").unwrap();
938        HyperlinkFormat::from_str("F42:{path}").unwrap();
939        HyperlinkFormat::from_str("F42://foo{{bar}}{path}").unwrap();
940    }
941
942    #[test]
943    fn parse_invalid() {
944        use super::HyperlinkFormatErrorKind::*;
945
946        let err = |kind| HyperlinkFormatError { kind };
947        assert_eq!(
948            HyperlinkFormat::from_str("foo://bar").unwrap_err(),
949            err(NoVariables),
950        );
951        assert_eq!(
952            HyperlinkFormat::from_str("foo://{line}").unwrap_err(),
953            err(NoPathVariable),
954        );
955        assert_eq!(
956            HyperlinkFormat::from_str("foo://{path").unwrap_err(),
957            err(UnclosedVariable),
958        );
959        assert_eq!(
960            HyperlinkFormat::from_str("foo://{path}:{column}").unwrap_err(),
961            err(NoLineVariable),
962        );
963        assert_eq!(
964            HyperlinkFormat::from_str("{path}").unwrap_err(),
965            err(InvalidScheme),
966        );
967        assert_eq!(
968            HyperlinkFormat::from_str(":{path}").unwrap_err(),
969            err(InvalidScheme),
970        );
971        assert_eq!(
972            HyperlinkFormat::from_str("f*:{path}").unwrap_err(),
973            err(InvalidScheme),
974        );
975
976        assert_eq!(
977            HyperlinkFormat::from_str("foo://{bar}").unwrap_err(),
978            err(InvalidVariable("bar".to_string())),
979        );
980        assert_eq!(
981            HyperlinkFormat::from_str("foo://{}}bar}").unwrap_err(),
982            err(InvalidVariable("".to_string())),
983        );
984        assert_eq!(
985            HyperlinkFormat::from_str("foo://{b}}ar}").unwrap_err(),
986            err(InvalidVariable("b".to_string())),
987        );
988        assert_eq!(
989            HyperlinkFormat::from_str("foo://{bar}}}").unwrap_err(),
990            err(InvalidVariable("bar".to_string())),
991        );
992        assert_eq!(
993            HyperlinkFormat::from_str("foo://{{bar}").unwrap_err(),
994            err(InvalidCloseVariable),
995        );
996        assert_eq!(
997            HyperlinkFormat::from_str("foo://{{{bar}").unwrap_err(),
998            err(InvalidVariable("bar".to_string())),
999        );
1000        assert_eq!(
1001            HyperlinkFormat::from_str("foo://{b{{ar}").unwrap_err(),
1002            err(InvalidVariable("b{{ar".to_string())),
1003        );
1004        assert_eq!(
1005            HyperlinkFormat::from_str("foo://{bar{{}").unwrap_err(),
1006            err(InvalidVariable("bar{{".to_string())),
1007        );
1008    }
1009}