grep_printer/hyperlink.rs
1use std::{cell::RefCell, io, path::Path, sync::Arc};
2
3use {
4 bstr::ByteSlice,
5 termcolor::{HyperlinkSpec, WriteColor},
6};
7
8use crate::{hyperlink_aliases, util::DecimalFormatter};
9
10/// Hyperlink configuration.
11///
12/// This configuration specifies both the [hyperlink format](HyperlinkFormat)
13/// and an [environment](HyperlinkConfig) for interpolating a subset of
14/// variables. The specific subset includes variables that are intended to
15/// be invariant throughout the lifetime of a process, such as a machine's
16/// hostname.
17///
18/// A hyperlink configuration can be provided to printer builders such as
19/// [`StandardBuilder::hyperlink`](crate::StandardBuilder::hyperlink).
20#[derive(Clone, Debug, Default, Eq, PartialEq)]
21pub struct HyperlinkConfig(Arc<HyperlinkConfigInner>);
22
23#[derive(Clone, Debug, Default, Eq, PartialEq)]
24struct HyperlinkConfigInner {
25 env: HyperlinkEnvironment,
26 format: HyperlinkFormat,
27}
28
29impl HyperlinkConfig {
30 /// Create a new configuration from an environment and a format.
31 pub fn new(
32 env: HyperlinkEnvironment,
33 format: HyperlinkFormat,
34 ) -> HyperlinkConfig {
35 HyperlinkConfig(Arc::new(HyperlinkConfigInner { env, format }))
36 }
37
38 /// Returns the hyperlink environment in this configuration.
39 pub(crate) fn environment(&self) -> &HyperlinkEnvironment {
40 &self.0.env
41 }
42
43 /// Returns the hyperlink format in this configuration.
44 pub(crate) fn format(&self) -> &HyperlinkFormat {
45 &self.0.format
46 }
47}
48
49/// A hyperlink format with variables.
50///
51/// This can be created by parsing a string using `HyperlinkFormat::from_str`.
52///
53/// The default format is empty. An empty format is valid and effectively
54/// disables hyperlinks.
55///
56/// # Example
57///
58/// ```
59/// use grep_printer::HyperlinkFormat;
60///
61/// let fmt = "vscode".parse::<HyperlinkFormat>()?;
62/// assert_eq!(fmt.to_string(), "vscode://file{path}:{line}:{column}");
63///
64/// # Ok::<(), Box<dyn std::error::Error>>(())
65/// ```
66#[derive(Clone, Debug, Default, Eq, PartialEq)]
67pub struct HyperlinkFormat {
68 parts: Vec<Part>,
69 is_line_dependent: bool,
70}
71
72impl HyperlinkFormat {
73 /// Creates an empty hyperlink format.
74 pub fn empty() -> HyperlinkFormat {
75 HyperlinkFormat::default()
76 }
77
78 /// Returns true if this format is empty.
79 pub fn is_empty(&self) -> bool {
80 self.parts.is_empty()
81 }
82
83 /// Creates a [`HyperlinkConfig`] from this format and the environment
84 /// given.
85 pub fn into_config(self, env: HyperlinkEnvironment) -> HyperlinkConfig {
86 HyperlinkConfig::new(env, self)
87 }
88
89 /// Returns true if the format can produce line-dependent hyperlinks.
90 pub(crate) fn is_line_dependent(&self) -> bool {
91 self.is_line_dependent
92 }
93}
94
95impl std::str::FromStr for HyperlinkFormat {
96 type Err = HyperlinkFormatError;
97
98 fn from_str(s: &str) -> Result<HyperlinkFormat, HyperlinkFormatError> {
99 use self::HyperlinkFormatErrorKind::*;
100
101 #[derive(Debug)]
102 enum State {
103 Verbatim,
104 VerbatimCloseVariable,
105 OpenVariable,
106 InVariable,
107 }
108
109 let mut builder = FormatBuilder::new();
110 let input = match hyperlink_aliases::find(s) {
111 Some(format) => format,
112 None => s,
113 };
114 let mut name = String::new();
115 let mut state = State::Verbatim;
116 let err = |kind| HyperlinkFormatError { kind };
117 for ch in input.chars() {
118 state = match state {
119 State::Verbatim => {
120 if ch == '{' {
121 State::OpenVariable
122 } else if ch == '}' {
123 State::VerbatimCloseVariable
124 } else {
125 builder.append_char(ch);
126 State::Verbatim
127 }
128 }
129 State::VerbatimCloseVariable => {
130 if ch == '}' {
131 builder.append_char('}');
132 State::Verbatim
133 } else {
134 return Err(err(InvalidCloseVariable));
135 }
136 }
137 State::OpenVariable => {
138 if ch == '{' {
139 builder.append_char('{');
140 State::Verbatim
141 } else {
142 name.clear();
143 if ch == '}' {
144 builder.append_var(&name)?;
145 State::Verbatim
146 } else {
147 name.push(ch);
148 State::InVariable
149 }
150 }
151 }
152 State::InVariable => {
153 if ch == '}' {
154 builder.append_var(&name)?;
155 State::Verbatim
156 } else {
157 name.push(ch);
158 State::InVariable
159 }
160 }
161 };
162 }
163 match state {
164 State::Verbatim => builder.build(),
165 State::VerbatimCloseVariable => Err(err(InvalidCloseVariable)),
166 State::OpenVariable | State::InVariable => {
167 Err(err(UnclosedVariable))
168 }
169 }
170 }
171}
172
173impl std::fmt::Display for HyperlinkFormat {
174 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
175 for part in self.parts.iter() {
176 part.fmt(f)?;
177 }
178 Ok(())
179 }
180}
181
182/// A static environment for hyperlink interpolation.
183///
184/// This environment permits setting the values of variables used in hyperlink
185/// interpolation that are not expected to change for the lifetime of a program.
186/// That is, these values are invariant.
187///
188/// Currently, this includes the hostname and a WSL distro prefix.
189#[derive(Clone, Debug, Default, Eq, PartialEq)]
190pub struct HyperlinkEnvironment {
191 host: Option<String>,
192 wsl_prefix: Option<String>,
193}
194
195impl HyperlinkEnvironment {
196 /// Create a new empty hyperlink environment.
197 pub fn new() -> HyperlinkEnvironment {
198 HyperlinkEnvironment::default()
199 }
200
201 /// Set the `{host}` variable, which fills in any hostname components of
202 /// a hyperlink.
203 ///
204 /// One can get the hostname in the current environment via the `hostname`
205 /// function in the `grep-cli` crate.
206 pub fn host(&mut self, host: Option<String>) -> &mut HyperlinkEnvironment {
207 self.host = host;
208 self
209 }
210
211 /// Set the `{wslprefix}` variable, which contains the WSL distro prefix.
212 /// An example value is `wsl$/Ubuntu`. The distro name can typically be
213 /// discovered from the `WSL_DISTRO_NAME` environment variable.
214 pub fn wsl_prefix(
215 &mut self,
216 wsl_prefix: Option<String>,
217 ) -> &mut HyperlinkEnvironment {
218 self.wsl_prefix = wsl_prefix;
219 self
220 }
221}
222
223/// An error that can occur when parsing a hyperlink format.
224#[derive(Clone, Debug, Eq, PartialEq)]
225pub struct HyperlinkFormatError {
226 kind: HyperlinkFormatErrorKind,
227}
228
229#[derive(Clone, Debug, Eq, PartialEq)]
230enum HyperlinkFormatErrorKind {
231 /// This occurs when there are zero variables in the format.
232 NoVariables,
233 /// This occurs when the {path} variable is missing.
234 NoPathVariable,
235 /// This occurs when the {line} variable is missing, while the {column}
236 /// variable is present.
237 NoLineVariable,
238 /// This occurs when an unknown variable is used.
239 InvalidVariable(String),
240 /// The format doesn't start with a valid scheme.
241 InvalidScheme,
242 /// This occurs when an unescaped `}` is found without a corresponding
243 /// `{` preceding it.
244 InvalidCloseVariable,
245 /// This occurs when a `{` is found without a corresponding `}` following
246 /// it.
247 UnclosedVariable,
248}
249
250impl std::error::Error for HyperlinkFormatError {}
251
252impl std::fmt::Display for HyperlinkFormatError {
253 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
254 use self::HyperlinkFormatErrorKind::*;
255
256 match self.kind {
257 NoVariables => {
258 let aliases = hyperlink_aliases::iter()
259 .map(|(name, _)| name)
260 .collect::<Vec<&str>>()
261 .join(", ");
262 write!(
263 f,
264 "at least a {{path}} variable is required in a \
265 hyperlink format, or otherwise use a valid alias: {}",
266 aliases,
267 )
268 }
269 NoPathVariable => {
270 write!(
271 f,
272 "the {{path}} variable is required in a hyperlink format",
273 )
274 }
275 NoLineVariable => {
276 write!(
277 f,
278 "the hyperlink format contains a {{column}} variable, \
279 but no {{line}} variable is present",
280 )
281 }
282 InvalidVariable(ref name) => {
283 write!(
284 f,
285 "invalid hyperlink format variable: '{name}', choose \
286 from: path, line, column, host, wslprefix",
287 )
288 }
289 InvalidScheme => {
290 write!(
291 f,
292 "the hyperlink format must start with a valid URL scheme, \
293 i.e., [0-9A-Za-z+-.]+:",
294 )
295 }
296 InvalidCloseVariable => {
297 write!(
298 f,
299 "unopened variable: found '}}' without a \
300 corresponding '{{' preceding it",
301 )
302 }
303 UnclosedVariable => {
304 write!(
305 f,
306 "unclosed variable: found '{{' without a \
307 corresponding '}}' following it",
308 )
309 }
310 }
311 }
312}
313
314/// A builder for `HyperlinkFormat`.
315///
316/// Once a `HyperlinkFormat` is built, it is immutable.
317#[derive(Debug)]
318struct FormatBuilder {
319 parts: Vec<Part>,
320}
321
322impl FormatBuilder {
323 /// Creates a new hyperlink format builder.
324 fn new() -> FormatBuilder {
325 FormatBuilder { parts: vec![] }
326 }
327
328 /// Appends static text.
329 fn append_slice(&mut self, text: &[u8]) -> &mut FormatBuilder {
330 if let Some(Part::Text(contents)) = self.parts.last_mut() {
331 contents.extend_from_slice(text);
332 } else if !text.is_empty() {
333 self.parts.push(Part::Text(text.to_vec()));
334 }
335 self
336 }
337
338 /// Appends a single character.
339 fn append_char(&mut self, ch: char) -> &mut FormatBuilder {
340 self.append_slice(ch.encode_utf8(&mut [0; 4]).as_bytes())
341 }
342
343 /// Appends a variable with the given name. If the name isn't recognized,
344 /// then this returns an error.
345 fn append_var(
346 &mut self,
347 name: &str,
348 ) -> Result<&mut FormatBuilder, HyperlinkFormatError> {
349 let part = match name {
350 "host" => Part::Host,
351 "wslprefix" => Part::WSLPrefix,
352 "path" => Part::Path,
353 "line" => Part::Line,
354 "column" => Part::Column,
355 unknown => {
356 let err = HyperlinkFormatError {
357 kind: HyperlinkFormatErrorKind::InvalidVariable(
358 unknown.to_string(),
359 ),
360 };
361 return Err(err);
362 }
363 };
364 self.parts.push(part);
365 Ok(self)
366 }
367
368 /// Builds the format.
369 fn build(&self) -> Result<HyperlinkFormat, HyperlinkFormatError> {
370 self.validate()?;
371 Ok(HyperlinkFormat {
372 parts: self.parts.clone(),
373 is_line_dependent: self.parts.contains(&Part::Line),
374 })
375 }
376
377 /// Validate that the format is well-formed.
378 fn validate(&self) -> Result<(), HyperlinkFormatError> {
379 use self::HyperlinkFormatErrorKind::*;
380
381 let err = |kind| HyperlinkFormatError { kind };
382 // An empty format is fine. It just means hyperlink support is
383 // disabled.
384 if self.parts.is_empty() {
385 return Ok(());
386 }
387 // If all parts are just text, then there are no variables. It's
388 // likely a reference to an invalid alias.
389 if self.parts.iter().all(|p| matches!(*p, Part::Text(_))) {
390 return Err(err(NoVariables));
391 }
392 // Even if we have other variables, no path variable means the
393 // hyperlink can't possibly work the way it is intended.
394 if !self.parts.contains(&Part::Path) {
395 return Err(err(NoPathVariable));
396 }
397 // If the {column} variable is used, then we also need a {line}
398 // variable or else {column} can't possibly work.
399 if self.parts.contains(&Part::Column)
400 && !self.parts.contains(&Part::Line)
401 {
402 return Err(err(NoLineVariable));
403 }
404 self.validate_scheme()
405 }
406
407 /// Validate that the format starts with a valid scheme. Validation is done
408 /// according to how a scheme is defined in RFC 1738 sections 2.1[1] and
409 /// 5[2]. In short, a scheme is this:
410 ///
411 /// scheme = 1*[ lowalpha | digit | "+" | "-" | "." ]
412 ///
413 /// but is case insensitive.
414 ///
415 /// [1]: https://datatracker.ietf.org/doc/html/rfc1738#section-2.1
416 /// [2]: https://datatracker.ietf.org/doc/html/rfc1738#section-5
417 fn validate_scheme(&self) -> Result<(), HyperlinkFormatError> {
418 let err_invalid_scheme = HyperlinkFormatError {
419 kind: HyperlinkFormatErrorKind::InvalidScheme,
420 };
421 let Some(Part::Text(ref part)) = self.parts.first() else {
422 return Err(err_invalid_scheme);
423 };
424 let Some(colon) = part.find_byte(b':') else {
425 return Err(err_invalid_scheme);
426 };
427 let scheme = &part[..colon];
428 if scheme.is_empty() {
429 return Err(err_invalid_scheme);
430 }
431 let is_valid_scheme_char = |byte| match byte {
432 b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'+' | b'-' | b'.' => {
433 true
434 }
435 _ => false,
436 };
437 if !scheme.iter().all(|&b| is_valid_scheme_char(b)) {
438 return Err(err_invalid_scheme);
439 }
440 Ok(())
441 }
442}
443
444/// A hyperlink format part.
445///
446/// A sequence of these corresponds to a complete format. (Not all sequences
447/// are valid.)
448#[derive(Clone, Debug, Eq, PartialEq)]
449enum Part {
450 /// Static text.
451 ///
452 /// We use `Vec<u8>` here (and more generally treat a format string as a
453 /// sequence of bytes) because file paths may be arbitrary bytes. A rare
454 /// case, but one for which there is no good reason to choke on.
455 Text(Vec<u8>),
456 /// Variable for the hostname.
457 Host,
458 /// Variable for a WSL path prefix.
459 WSLPrefix,
460 /// Variable for the file path.
461 Path,
462 /// Variable for the line number.
463 Line,
464 /// Variable for the column number.
465 Column,
466}
467
468impl Part {
469 /// Interpolate this part using the given `env` and `values`, and write
470 /// the result of interpolation to the buffer provided.
471 fn interpolate_to(
472 &self,
473 env: &HyperlinkEnvironment,
474 values: &Values,
475 dest: &mut Vec<u8>,
476 ) {
477 match self {
478 Part::Text(ref text) => dest.extend_from_slice(text),
479 Part::Host => dest.extend_from_slice(
480 env.host.as_ref().map(|s| s.as_bytes()).unwrap_or(b""),
481 ),
482 Part::WSLPrefix => dest.extend_from_slice(
483 env.wsl_prefix.as_ref().map(|s| s.as_bytes()).unwrap_or(b""),
484 ),
485 Part::Path => dest.extend_from_slice(&values.path.0),
486 Part::Line => {
487 let line = DecimalFormatter::new(values.line.unwrap_or(1));
488 dest.extend_from_slice(line.as_bytes());
489 }
490 Part::Column => {
491 let column = DecimalFormatter::new(values.column.unwrap_or(1));
492 dest.extend_from_slice(column.as_bytes());
493 }
494 }
495 }
496}
497
498impl std::fmt::Display for Part {
499 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
500 match self {
501 Part::Text(text) => write!(f, "{}", String::from_utf8_lossy(text)),
502 Part::Host => write!(f, "{{host}}"),
503 Part::WSLPrefix => write!(f, "{{wslprefix}}"),
504 Part::Path => write!(f, "{{path}}"),
505 Part::Line => write!(f, "{{line}}"),
506 Part::Column => write!(f, "{{column}}"),
507 }
508 }
509}
510
511/// The values to replace the format variables with.
512///
513/// This only consists of values that depend on each path or match printed.
514/// Values that are invariant throughout the lifetime of the process are set
515/// via a [`HyperlinkEnvironment`].
516#[derive(Clone, Debug)]
517pub(crate) struct Values<'a> {
518 path: &'a HyperlinkPath,
519 line: Option<u64>,
520 column: Option<u64>,
521}
522
523impl<'a> Values<'a> {
524 /// Creates a new set of values, starting with the path given.
525 ///
526 /// Callers may also set the line and column number using the mutator
527 /// methods.
528 pub(crate) fn new(path: &'a HyperlinkPath) -> Values<'a> {
529 Values { path, line: None, column: None }
530 }
531
532 /// Sets the line number for these values.
533 ///
534 /// If a line number is not set and a hyperlink format contains a `{line}`
535 /// variable, then it is interpolated with the value of `1` automatically.
536 pub(crate) fn line(mut self, line: Option<u64>) -> Values<'a> {
537 self.line = line;
538 self
539 }
540
541 /// Sets the column number for these values.
542 ///
543 /// If a column number is not set and a hyperlink format contains a
544 /// `{column}` variable, then it is interpolated with the value of `1`
545 /// automatically.
546 pub(crate) fn column(mut self, column: Option<u64>) -> Values<'a> {
547 self.column = column;
548 self
549 }
550}
551
552/// An abstraction for interpolating a hyperlink format with values for every
553/// variable.
554///
555/// Interpolation of variables occurs through two different sources. The
556/// first is via a `HyperlinkEnvironment` for values that are expected to
557/// be invariant. This comes from the `HyperlinkConfig` used to build this
558/// interpolator. The second source is via `Values`, which is provided to
559/// `Interpolator::begin`. The `Values` contains things like the file path,
560/// line number and column number.
561#[derive(Clone, Debug)]
562pub(crate) struct Interpolator {
563 config: HyperlinkConfig,
564 buf: RefCell<Vec<u8>>,
565}
566
567impl Interpolator {
568 /// Create a new interpolator for the given hyperlink format configuration.
569 pub(crate) fn new(config: &HyperlinkConfig) -> Interpolator {
570 Interpolator { config: config.clone(), buf: RefCell::new(vec![]) }
571 }
572
573 /// Start interpolation with the given values by writing a hyperlink
574 /// to `wtr`. Subsequent writes to `wtr`, until `Interpolator::end` is
575 /// called, are the label for the hyperlink.
576 ///
577 /// This returns an interpolator status which indicates whether the
578 /// hyperlink was written. It might not be written, for example, if the
579 /// underlying writer doesn't support hyperlinks or if the hyperlink
580 /// format is empty. The status should be provided to `Interpolator::end`
581 /// as an instruction for whether to close the hyperlink or not.
582 pub(crate) fn begin<W: WriteColor>(
583 &self,
584 values: &Values,
585 mut wtr: W,
586 ) -> io::Result<InterpolatorStatus> {
587 if self.config.format().is_empty()
588 || !wtr.supports_hyperlinks()
589 || !wtr.supports_color()
590 {
591 return Ok(InterpolatorStatus::inactive());
592 }
593 let mut buf = self.buf.borrow_mut();
594 buf.clear();
595 for part in self.config.format().parts.iter() {
596 part.interpolate_to(self.config.environment(), values, &mut buf);
597 }
598 let spec = HyperlinkSpec::open(&buf);
599 wtr.set_hyperlink(&spec)?;
600 Ok(InterpolatorStatus { active: true })
601 }
602
603 /// Writes the correct escape sequences to `wtr` to close any extant
604 /// hyperlink, marking the end of a hyperlink's label.
605 ///
606 /// The status given should be returned from a corresponding
607 /// `Interpolator::begin` call. Since `begin` may not write a hyperlink
608 /// (e.g., if the underlying writer doesn't support hyperlinks), it follows
609 /// that `finish` must not close a hyperlink that was never opened. The
610 /// status indicates whether the hyperlink was opened or not.
611 pub(crate) fn finish<W: WriteColor>(
612 &self,
613 status: InterpolatorStatus,
614 mut wtr: W,
615 ) -> io::Result<()> {
616 if !status.active {
617 return Ok(());
618 }
619 wtr.set_hyperlink(&HyperlinkSpec::close())
620 }
621}
622
623/// A status indicating whether a hyperlink was written or not.
624///
625/// This is created by `Interpolator::begin` and used by `Interpolator::finish`
626/// to determine whether a hyperlink was actually opened or not. If it wasn't
627/// opened, then finishing interpolation is a no-op.
628#[derive(Debug)]
629pub(crate) struct InterpolatorStatus {
630 active: bool,
631}
632
633impl InterpolatorStatus {
634 /// Create an inactive interpolator status.
635 #[inline]
636 pub(crate) fn inactive() -> InterpolatorStatus {
637 InterpolatorStatus { active: false }
638 }
639}
640
641/// Represents the `{path}` part of a hyperlink.
642///
643/// This is the value to use as-is in the hyperlink, converted from an OS file
644/// path.
645#[derive(Clone, Debug)]
646pub(crate) struct HyperlinkPath(Vec<u8>);
647
648impl HyperlinkPath {
649 /// Returns a hyperlink path from an OS path.
650 #[cfg(unix)]
651 pub(crate) fn from_path(original_path: &Path) -> Option<HyperlinkPath> {
652 use std::os::unix::ffi::OsStrExt;
653
654 // We canonicalize the path in order to get an absolute version of it
655 // without any `.` or `..` or superfluous separators. Unfortunately,
656 // this does also remove symlinks, and in theory, it would be nice to
657 // retain them. Perhaps even simpler, we could just join the current
658 // working directory with the path and be done with it. There was
659 // some discussion about this on PR#2483, and there generally appears
660 // to be some uncertainty about the extent to which hyperlinks with
661 // things like `..` in them actually work. So for now, we do the safest
662 // thing possible even though I think it can result in worse user
663 // experience. (Because it means the path you click on and the actual
664 // path that gets followed are different, even though they ostensibly
665 // refer to the same file.)
666 //
667 // There's also the potential issue that path canonicalization is
668 // expensive since it can touch the file system. That is probably
669 // less of an issue since hyperlinks are only created when they're
670 // supported, i.e., when writing to a tty.
671 //
672 // [1]: https://github.com/BurntSushi/ripgrep/pull/2483
673 let path = match original_path.canonicalize() {
674 Ok(path) => path,
675 Err(err) => {
676 log::debug!(
677 "hyperlink creation for {:?} failed, error occurred \
678 during path canonicalization: {}",
679 original_path,
680 err,
681 );
682 return None;
683 }
684 };
685 let bytes = path.as_os_str().as_bytes();
686 // This should not be possible since one imagines that canonicalization
687 // should always return an absolute path. But it doesn't actually
688 // appear guaranteed by POSIX, so we check whether it's true or not and
689 // refuse to create a hyperlink from a relative path if it isn't.
690 if !bytes.starts_with(b"/") {
691 log::debug!(
692 "hyperlink creation for {:?} failed, canonicalization \
693 returned {:?}, which does not start with a slash",
694 original_path,
695 path,
696 );
697 return None;
698 }
699 Some(HyperlinkPath::encode(bytes))
700 }
701
702 /// Returns a hyperlink path from an OS path.
703 #[cfg(windows)]
704 pub(crate) fn from_path(original_path: &Path) -> Option<HyperlinkPath> {
705 // On Windows, Path::canonicalize returns the result of
706 // GetFinalPathNameByHandleW with VOLUME_NAME_DOS,
707 // which produces paths such as the following:
708 //
709 // \\?\C:\dir\file.txt (local path)
710 // \\?\UNC\server\dir\file.txt (network share)
711 //
712 // The \\?\ prefix comes from VOLUME_NAME_DOS and is constant.
713 // It is followed either by the drive letter, or by UNC\
714 // (universal naming convention), which denotes a network share.
715 //
716 // Given that the default URL format on Windows is file://{path}
717 // we need to return the following from this function:
718 //
719 // /C:/dir/file.txt (local path)
720 // //server/dir/file.txt (network share)
721 //
722 // Which produces the following links:
723 //
724 // file:///C:/dir/file.txt (local path)
725 // file:////server/dir/file.txt (network share)
726 //
727 // This substitutes the {path} variable with the expected value for
728 // the most common DOS paths, but on the other hand, network paths
729 // start with a single slash, which may be unexpected. It seems to work
730 // though?
731 //
732 // Note that the following URL syntax also seems to be valid?
733 //
734 // file://server/dir/file.txt
735 //
736 // But the initial implementation of this routine went for the format
737 // above.
738 //
739 // Also note that the file://C:/dir/file.txt syntax is not correct,
740 // even though it often works in practice.
741 //
742 // In the end, this choice was confirmed by VSCode, whose format is
743 //
744 // vscode://file{path}:{line}:{column}
745 //
746 // and which correctly understands the following URL format for network
747 // drives:
748 //
749 // vscode://file//server/dir/file.txt:1:1
750 //
751 // It doesn't parse any other number of slashes in "file//server" as a
752 // network path.
753
754 const WIN32_NAMESPACE_PREFIX: &str = r"\\?\";
755 const UNC_PREFIX: &str = r"UNC\";
756
757 // As for Unix, we canonicalize the path to make sure we have an
758 // absolute path.
759 let path = match original_path.canonicalize() {
760 Ok(path) => path,
761 Err(err) => {
762 log::debug!(
763 "hyperlink creation for {:?} failed, error occurred \
764 during path canonicalization: {}",
765 original_path,
766 err,
767 );
768 return None;
769 }
770 };
771 // We convert the path to a string for easier manipulation. If it
772 // wasn't valid UTF-16 (and thus could not be non-lossily transcoded
773 // to UTF-8), then we just give up. It's not clear we could make
774 // a meaningful hyperlink from it anyway. And this should be an
775 // exceptionally rare case.
776 let mut string = match path.to_str() {
777 Some(string) => string,
778 None => {
779 log::debug!(
780 "hyperlink creation for {:?} failed, path is not \
781 valid UTF-8",
782 original_path,
783 );
784 return None;
785 }
786 };
787 // As the comment above says, we expect all canonicalized paths to
788 // begin with a \\?\. If it doesn't, then something weird is happening
789 // and we should just give up.
790 if !string.starts_with(WIN32_NAMESPACE_PREFIX) {
791 log::debug!(
792 "hyperlink creation for {:?} failed, canonicalization \
793 returned {:?}, which does not start with \\\\?\\",
794 original_path,
795 path,
796 );
797 return None;
798 }
799 string = &string[WIN32_NAMESPACE_PREFIX.len()..];
800
801 // And as above, drop the UNC prefix too, but keep the leading slash.
802 if string.starts_with(UNC_PREFIX) {
803 string = &string[(UNC_PREFIX.len() - 1)..];
804 }
805 // Finally, add a leading slash. In the local file case, this turns
806 // C:\foo\bar into /C:\foo\bar (and then percent encoding turns it into
807 // /C:/foo/bar). In the network share case, this turns \share\foo\bar
808 // into /\share/foo/bar (and then percent encoding turns it into
809 // //share/foo/bar).
810 let with_slash = format!("/{string}");
811 Some(HyperlinkPath::encode(with_slash.as_bytes()))
812 }
813
814 /// For other platforms (not windows, not unix), return None and log a debug message.
815 #[cfg(not(any(windows, unix)))]
816 pub(crate) fn from_path(original_path: &Path) -> Option<HyperlinkPath> {
817 log::debug!("hyperlinks are not supported on this platform");
818 None
819 }
820
821 /// Percent-encodes a path.
822 ///
823 /// The alphanumeric ASCII characters and "-", ".", "_", "~" are unreserved
824 /// as per section 2.3 of RFC 3986 (Uniform Resource Identifier (URI):
825 /// Generic Syntax), and are not encoded. The other ASCII characters except
826 /// "/" and ":" are percent-encoded, and "\" is replaced by "/" on Windows.
827 ///
828 /// Section 4 of RFC 8089 (The "file" URI Scheme) does not mandate precise
829 /// encoding requirements for non-ASCII characters, and this implementation
830 /// leaves them unencoded. On Windows, the UrlCreateFromPathW function does
831 /// not encode non-ASCII characters. Doing so with UTF-8 encoded paths
832 /// creates invalid file:// URLs on that platform.
833 fn encode(input: &[u8]) -> HyperlinkPath {
834 let mut result = Vec::with_capacity(input.len());
835 for &byte in input.iter() {
836 match byte {
837 b'0'..=b'9'
838 | b'A'..=b'Z'
839 | b'a'..=b'z'
840 | b'/'
841 | b':'
842 | b'-'
843 | b'.'
844 | b'_'
845 | b'~'
846 | 128.. => {
847 result.push(byte);
848 }
849 #[cfg(windows)]
850 b'\\' => {
851 result.push(b'/');
852 }
853 _ => {
854 const HEX: &[u8] = b"0123456789ABCDEF";
855 result.push(b'%');
856 result.push(HEX[(byte >> 4) as usize]);
857 result.push(HEX[(byte & 0xF) as usize]);
858 }
859 }
860 }
861 HyperlinkPath(result)
862 }
863}
864
865#[cfg(test)]
866mod tests {
867 use std::str::FromStr;
868
869 use super::*;
870
871 #[test]
872 fn build_format() {
873 let format = FormatBuilder::new()
874 .append_slice(b"foo://")
875 .append_slice(b"bar-")
876 .append_slice(b"baz")
877 .append_var("path")
878 .unwrap()
879 .build()
880 .unwrap();
881
882 assert_eq!(format.to_string(), "foo://bar-baz{path}");
883 assert_eq!(format.parts[0], Part::Text(b"foo://bar-baz".to_vec()));
884 assert!(!format.is_empty());
885 }
886
887 #[test]
888 fn build_empty_format() {
889 let format = FormatBuilder::new().build().unwrap();
890
891 assert!(format.is_empty());
892 assert_eq!(format, HyperlinkFormat::empty());
893 assert_eq!(format, HyperlinkFormat::default());
894 }
895
896 #[test]
897 fn handle_alias() {
898 assert!(HyperlinkFormat::from_str("file").is_ok());
899 assert!(HyperlinkFormat::from_str("none").is_ok());
900 assert!(HyperlinkFormat::from_str("none").unwrap().is_empty());
901 }
902
903 #[test]
904 fn parse_format() {
905 let format = HyperlinkFormat::from_str(
906 "foo://{host}/bar/{path}:{line}:{column}",
907 )
908 .unwrap();
909
910 assert_eq!(
911 format.to_string(),
912 "foo://{host}/bar/{path}:{line}:{column}"
913 );
914 assert_eq!(format.parts.len(), 8);
915 assert!(format.parts.contains(&Part::Path));
916 assert!(format.parts.contains(&Part::Line));
917 assert!(format.parts.contains(&Part::Column));
918 }
919
920 #[test]
921 fn parse_valid() {
922 assert!(HyperlinkFormat::from_str("").unwrap().is_empty());
923 assert_eq!(
924 HyperlinkFormat::from_str("foo://{path}").unwrap().to_string(),
925 "foo://{path}"
926 );
927 assert_eq!(
928 HyperlinkFormat::from_str("foo://{path}/bar").unwrap().to_string(),
929 "foo://{path}/bar"
930 );
931
932 HyperlinkFormat::from_str("f://{path}").unwrap();
933 HyperlinkFormat::from_str("f:{path}").unwrap();
934 HyperlinkFormat::from_str("f-+.:{path}").unwrap();
935 HyperlinkFormat::from_str("f42:{path}").unwrap();
936 HyperlinkFormat::from_str("42:{path}").unwrap();
937 HyperlinkFormat::from_str("+:{path}").unwrap();
938 HyperlinkFormat::from_str("F42:{path}").unwrap();
939 HyperlinkFormat::from_str("F42://foo{{bar}}{path}").unwrap();
940 }
941
942 #[test]
943 fn parse_invalid() {
944 use super::HyperlinkFormatErrorKind::*;
945
946 let err = |kind| HyperlinkFormatError { kind };
947 assert_eq!(
948 HyperlinkFormat::from_str("foo://bar").unwrap_err(),
949 err(NoVariables),
950 );
951 assert_eq!(
952 HyperlinkFormat::from_str("foo://{line}").unwrap_err(),
953 err(NoPathVariable),
954 );
955 assert_eq!(
956 HyperlinkFormat::from_str("foo://{path").unwrap_err(),
957 err(UnclosedVariable),
958 );
959 assert_eq!(
960 HyperlinkFormat::from_str("foo://{path}:{column}").unwrap_err(),
961 err(NoLineVariable),
962 );
963 assert_eq!(
964 HyperlinkFormat::from_str("{path}").unwrap_err(),
965 err(InvalidScheme),
966 );
967 assert_eq!(
968 HyperlinkFormat::from_str(":{path}").unwrap_err(),
969 err(InvalidScheme),
970 );
971 assert_eq!(
972 HyperlinkFormat::from_str("f*:{path}").unwrap_err(),
973 err(InvalidScheme),
974 );
975
976 assert_eq!(
977 HyperlinkFormat::from_str("foo://{bar}").unwrap_err(),
978 err(InvalidVariable("bar".to_string())),
979 );
980 assert_eq!(
981 HyperlinkFormat::from_str("foo://{}}bar}").unwrap_err(),
982 err(InvalidVariable("".to_string())),
983 );
984 assert_eq!(
985 HyperlinkFormat::from_str("foo://{b}}ar}").unwrap_err(),
986 err(InvalidVariable("b".to_string())),
987 );
988 assert_eq!(
989 HyperlinkFormat::from_str("foo://{bar}}}").unwrap_err(),
990 err(InvalidVariable("bar".to_string())),
991 );
992 assert_eq!(
993 HyperlinkFormat::from_str("foo://{{bar}").unwrap_err(),
994 err(InvalidCloseVariable),
995 );
996 assert_eq!(
997 HyperlinkFormat::from_str("foo://{{{bar}").unwrap_err(),
998 err(InvalidVariable("bar".to_string())),
999 );
1000 assert_eq!(
1001 HyperlinkFormat::from_str("foo://{b{{ar}").unwrap_err(),
1002 err(InvalidVariable("b{{ar".to_string())),
1003 );
1004 assert_eq!(
1005 HyperlinkFormat::from_str("foo://{bar{{}").unwrap_err(),
1006 err(InvalidVariable("bar{{".to_string())),
1007 );
1008 }
1009}