grep_printer/hyperlink/mod.rs
1use std::{cell::RefCell, io, path::Path, sync::Arc};
2
3use {
4 bstr::ByteSlice,
5 termcolor::{HyperlinkSpec, WriteColor},
6};
7
8use crate::util::DecimalFormatter;
9
10use self::aliases::HYPERLINK_PATTERN_ALIASES;
11
12mod aliases;
13
14/// Hyperlink configuration.
15///
16/// This configuration specifies both the [hyperlink format](HyperlinkFormat)
17/// and an [environment](HyperlinkConfig) for interpolating a subset of
18/// variables. The specific subset includes variables that are intended to
19/// be invariant throughout the lifetime of a process, such as a machine's
20/// hostname.
21///
22/// A hyperlink configuration can be provided to printer builders such as
23/// [`StandardBuilder::hyperlink`](crate::StandardBuilder::hyperlink).
24#[derive(Clone, Debug, Default, Eq, PartialEq)]
25pub struct HyperlinkConfig(Arc<HyperlinkConfigInner>);
26
27#[derive(Clone, Debug, Default, Eq, PartialEq)]
28struct HyperlinkConfigInner {
29 env: HyperlinkEnvironment,
30 format: HyperlinkFormat,
31}
32
33impl HyperlinkConfig {
34 /// Create a new configuration from an environment and a format.
35 pub fn new(
36 env: HyperlinkEnvironment,
37 format: HyperlinkFormat,
38 ) -> HyperlinkConfig {
39 HyperlinkConfig(Arc::new(HyperlinkConfigInner { env, format }))
40 }
41
42 /// Returns the hyperlink environment in this configuration.
43 pub(crate) fn environment(&self) -> &HyperlinkEnvironment {
44 &self.0.env
45 }
46
47 /// Returns the hyperlink format in this configuration.
48 pub(crate) fn format(&self) -> &HyperlinkFormat {
49 &self.0.format
50 }
51}
52
53/// A hyperlink format with variables.
54///
55/// This can be created by parsing a string using `HyperlinkFormat::from_str`.
56///
57/// The default format is empty. An empty format is valid and effectively
58/// disables hyperlinks.
59///
60/// # Example
61///
62/// ```
63/// use grep_printer::HyperlinkFormat;
64///
65/// let fmt = "vscode".parse::<HyperlinkFormat>()?;
66/// assert_eq!(fmt.to_string(), "vscode://file{path}:{line}:{column}");
67///
68/// # Ok::<(), Box<dyn std::error::Error>>(())
69/// ```
70#[derive(Clone, Debug, Default, Eq, PartialEq)]
71pub struct HyperlinkFormat {
72 parts: Vec<Part>,
73 is_line_dependent: bool,
74}
75
76impl HyperlinkFormat {
77 /// Creates an empty hyperlink format.
78 pub fn empty() -> HyperlinkFormat {
79 HyperlinkFormat::default()
80 }
81
82 /// Returns true if this format is empty.
83 pub fn is_empty(&self) -> bool {
84 self.parts.is_empty()
85 }
86
87 /// Creates a [`HyperlinkConfig`] from this format and the environment
88 /// given.
89 pub fn into_config(self, env: HyperlinkEnvironment) -> HyperlinkConfig {
90 HyperlinkConfig::new(env, self)
91 }
92
93 /// Returns true if the format can produce line-dependent hyperlinks.
94 pub(crate) fn is_line_dependent(&self) -> bool {
95 self.is_line_dependent
96 }
97}
98
99impl std::str::FromStr for HyperlinkFormat {
100 type Err = HyperlinkFormatError;
101
102 fn from_str(s: &str) -> Result<HyperlinkFormat, HyperlinkFormatError> {
103 use self::HyperlinkFormatErrorKind::*;
104
105 #[derive(Debug)]
106 enum State {
107 Verbatim,
108 VerbatimCloseVariable,
109 OpenVariable,
110 InVariable,
111 }
112
113 let mut builder = FormatBuilder::new();
114 let input = match HyperlinkAlias::find(s) {
115 Some(alias) => alias.format(),
116 None => s,
117 };
118 let mut name = String::new();
119 let mut state = State::Verbatim;
120 let err = |kind| HyperlinkFormatError { kind };
121 for ch in input.chars() {
122 state = match state {
123 State::Verbatim => {
124 if ch == '{' {
125 State::OpenVariable
126 } else if ch == '}' {
127 State::VerbatimCloseVariable
128 } else {
129 builder.append_char(ch);
130 State::Verbatim
131 }
132 }
133 State::VerbatimCloseVariable => {
134 if ch == '}' {
135 builder.append_char('}');
136 State::Verbatim
137 } else {
138 return Err(err(InvalidCloseVariable));
139 }
140 }
141 State::OpenVariable => {
142 if ch == '{' {
143 builder.append_char('{');
144 State::Verbatim
145 } else {
146 name.clear();
147 if ch == '}' {
148 builder.append_var(&name)?;
149 State::Verbatim
150 } else {
151 name.push(ch);
152 State::InVariable
153 }
154 }
155 }
156 State::InVariable => {
157 if ch == '}' {
158 builder.append_var(&name)?;
159 State::Verbatim
160 } else {
161 name.push(ch);
162 State::InVariable
163 }
164 }
165 };
166 }
167 match state {
168 State::Verbatim => builder.build(),
169 State::VerbatimCloseVariable => Err(err(InvalidCloseVariable)),
170 State::OpenVariable | State::InVariable => {
171 Err(err(UnclosedVariable))
172 }
173 }
174 }
175}
176
177impl std::fmt::Display for HyperlinkFormat {
178 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
179 for part in self.parts.iter() {
180 part.fmt(f)?;
181 }
182 Ok(())
183 }
184}
185
186/// An alias for a hyperlink format.
187///
188/// Hyperlink aliases are built-in formats, therefore they hold static values.
189/// Some of their features are usable in const blocks.
190#[derive(Clone, Debug)]
191pub struct HyperlinkAlias {
192 name: &'static str,
193 description: &'static str,
194 format: &'static str,
195 display_priority: Option<i16>,
196}
197
198impl HyperlinkAlias {
199 /// Returns the name of the alias.
200 pub const fn name(&self) -> &str {
201 self.name
202 }
203
204 /// Returns a very short description of this hyperlink alias.
205 pub const fn description(&self) -> &str {
206 self.description
207 }
208
209 /// Returns the display priority of this alias.
210 ///
211 /// If no priority is set, then `None` is returned.
212 ///
213 /// The display priority is meant to reflect some special status associated
214 /// with an alias. For example, the `default` and `none` aliases have a
215 /// display priority. This is meant to encourage listing them first in
216 /// documentation.
217 ///
218 /// A lower display priority implies the alias should be shown before
219 /// aliases with a higher (or absent) display priority.
220 ///
221 /// Callers cannot rely on any specific display priority value to remain
222 /// stable across semver compatible releases of this crate.
223 pub const fn display_priority(&self) -> Option<i16> {
224 self.display_priority
225 }
226
227 /// Returns the format string of the alias.
228 const fn format(&self) -> &'static str {
229 self.format
230 }
231
232 /// Looks for the hyperlink alias defined by the given name.
233 ///
234 /// If one does not exist, `None` is returned.
235 fn find(name: &str) -> Option<&HyperlinkAlias> {
236 HYPERLINK_PATTERN_ALIASES
237 .binary_search_by_key(&name, |alias| alias.name())
238 .map(|i| &HYPERLINK_PATTERN_ALIASES[i])
239 .ok()
240 }
241}
242
243/// A static environment for hyperlink interpolation.
244///
245/// This environment permits setting the values of variables used in hyperlink
246/// interpolation that are not expected to change for the lifetime of a program.
247/// That is, these values are invariant.
248///
249/// Currently, this includes the hostname and a WSL distro prefix.
250#[derive(Clone, Debug, Default, Eq, PartialEq)]
251pub struct HyperlinkEnvironment {
252 host: Option<String>,
253 wsl_prefix: Option<String>,
254}
255
256impl HyperlinkEnvironment {
257 /// Create a new empty hyperlink environment.
258 pub fn new() -> HyperlinkEnvironment {
259 HyperlinkEnvironment::default()
260 }
261
262 /// Set the `{host}` variable, which fills in any hostname components of
263 /// a hyperlink.
264 ///
265 /// One can get the hostname in the current environment via the `hostname`
266 /// function in the `grep-cli` crate.
267 pub fn host(&mut self, host: Option<String>) -> &mut HyperlinkEnvironment {
268 self.host = host;
269 self
270 }
271
272 /// Set the `{wslprefix}` variable, which contains the WSL distro prefix.
273 /// An example value is `wsl$/Ubuntu`. The distro name can typically be
274 /// discovered from the `WSL_DISTRO_NAME` environment variable.
275 pub fn wsl_prefix(
276 &mut self,
277 wsl_prefix: Option<String>,
278 ) -> &mut HyperlinkEnvironment {
279 self.wsl_prefix = wsl_prefix;
280 self
281 }
282}
283
284/// An error that can occur when parsing a hyperlink format.
285#[derive(Clone, Debug, Eq, PartialEq)]
286pub struct HyperlinkFormatError {
287 kind: HyperlinkFormatErrorKind,
288}
289
290#[derive(Clone, Debug, Eq, PartialEq)]
291enum HyperlinkFormatErrorKind {
292 /// This occurs when there are zero variables in the format.
293 NoVariables,
294 /// This occurs when the {path} variable is missing.
295 NoPathVariable,
296 /// This occurs when the {line} variable is missing, while the {column}
297 /// variable is present.
298 NoLineVariable,
299 /// This occurs when an unknown variable is used.
300 InvalidVariable(String),
301 /// The format doesn't start with a valid scheme.
302 InvalidScheme,
303 /// This occurs when an unescaped `}` is found without a corresponding
304 /// `{` preceding it.
305 InvalidCloseVariable,
306 /// This occurs when a `{` is found without a corresponding `}` following
307 /// it.
308 UnclosedVariable,
309}
310
311impl std::error::Error for HyperlinkFormatError {}
312
313impl std::fmt::Display for HyperlinkFormatError {
314 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
315 use self::HyperlinkFormatErrorKind::*;
316
317 match self.kind {
318 NoVariables => {
319 let mut aliases = hyperlink_aliases();
320 aliases.sort_by_key(|alias| {
321 alias.display_priority().unwrap_or(i16::MAX)
322 });
323 let names: Vec<&str> =
324 aliases.iter().map(|alias| alias.name()).collect();
325 write!(
326 f,
327 "at least a {{path}} variable is required in a \
328 hyperlink format, or otherwise use a valid alias: \
329 {aliases}",
330 aliases = names.join(", "),
331 )
332 }
333 NoPathVariable => {
334 write!(
335 f,
336 "the {{path}} variable is required in a hyperlink format",
337 )
338 }
339 NoLineVariable => {
340 write!(
341 f,
342 "the hyperlink format contains a {{column}} variable, \
343 but no {{line}} variable is present",
344 )
345 }
346 InvalidVariable(ref name) => {
347 write!(
348 f,
349 "invalid hyperlink format variable: '{name}', choose \
350 from: path, line, column, host, wslprefix",
351 )
352 }
353 InvalidScheme => {
354 write!(
355 f,
356 "the hyperlink format must start with a valid URL scheme, \
357 i.e., [0-9A-Za-z+-.]+:",
358 )
359 }
360 InvalidCloseVariable => {
361 write!(
362 f,
363 "unopened variable: found '}}' without a \
364 corresponding '{{' preceding it",
365 )
366 }
367 UnclosedVariable => {
368 write!(
369 f,
370 "unclosed variable: found '{{' without a \
371 corresponding '}}' following it",
372 )
373 }
374 }
375 }
376}
377
378/// A builder for `HyperlinkFormat`.
379///
380/// Once a `HyperlinkFormat` is built, it is immutable.
381#[derive(Debug)]
382struct FormatBuilder {
383 parts: Vec<Part>,
384}
385
386impl FormatBuilder {
387 /// Creates a new hyperlink format builder.
388 fn new() -> FormatBuilder {
389 FormatBuilder { parts: vec![] }
390 }
391
392 /// Appends static text.
393 fn append_slice(&mut self, text: &[u8]) -> &mut FormatBuilder {
394 if let Some(Part::Text(contents)) = self.parts.last_mut() {
395 contents.extend_from_slice(text);
396 } else if !text.is_empty() {
397 self.parts.push(Part::Text(text.to_vec()));
398 }
399 self
400 }
401
402 /// Appends a single character.
403 fn append_char(&mut self, ch: char) -> &mut FormatBuilder {
404 self.append_slice(ch.encode_utf8(&mut [0; 4]).as_bytes())
405 }
406
407 /// Appends a variable with the given name. If the name isn't recognized,
408 /// then this returns an error.
409 fn append_var(
410 &mut self,
411 name: &str,
412 ) -> Result<&mut FormatBuilder, HyperlinkFormatError> {
413 let part = match name {
414 "host" => Part::Host,
415 "wslprefix" => Part::WSLPrefix,
416 "path" => Part::Path,
417 "line" => Part::Line,
418 "column" => Part::Column,
419 unknown => {
420 let err = HyperlinkFormatError {
421 kind: HyperlinkFormatErrorKind::InvalidVariable(
422 unknown.to_string(),
423 ),
424 };
425 return Err(err);
426 }
427 };
428 self.parts.push(part);
429 Ok(self)
430 }
431
432 /// Builds the format.
433 fn build(&self) -> Result<HyperlinkFormat, HyperlinkFormatError> {
434 self.validate()?;
435 Ok(HyperlinkFormat {
436 parts: self.parts.clone(),
437 is_line_dependent: self.parts.contains(&Part::Line),
438 })
439 }
440
441 /// Validate that the format is well-formed.
442 fn validate(&self) -> Result<(), HyperlinkFormatError> {
443 use self::HyperlinkFormatErrorKind::*;
444
445 let err = |kind| HyperlinkFormatError { kind };
446 // An empty format is fine. It just means hyperlink support is
447 // disabled.
448 if self.parts.is_empty() {
449 return Ok(());
450 }
451 // If all parts are just text, then there are no variables. It's
452 // likely a reference to an invalid alias.
453 if self.parts.iter().all(|p| matches!(*p, Part::Text(_))) {
454 return Err(err(NoVariables));
455 }
456 // Even if we have other variables, no path variable means the
457 // hyperlink can't possibly work the way it is intended.
458 if !self.parts.contains(&Part::Path) {
459 return Err(err(NoPathVariable));
460 }
461 // If the {column} variable is used, then we also need a {line}
462 // variable or else {column} can't possibly work.
463 if self.parts.contains(&Part::Column)
464 && !self.parts.contains(&Part::Line)
465 {
466 return Err(err(NoLineVariable));
467 }
468 self.validate_scheme()
469 }
470
471 /// Validate that the format starts with a valid scheme. Validation is done
472 /// according to how a scheme is defined in RFC 1738 sections 2.1[1] and
473 /// 5[2]. In short, a scheme is this:
474 ///
475 /// scheme = 1*[ lowalpha | digit | "+" | "-" | "." ]
476 ///
477 /// but is case insensitive.
478 ///
479 /// [1]: https://datatracker.ietf.org/doc/html/rfc1738#section-2.1
480 /// [2]: https://datatracker.ietf.org/doc/html/rfc1738#section-5
481 fn validate_scheme(&self) -> Result<(), HyperlinkFormatError> {
482 let err_invalid_scheme = HyperlinkFormatError {
483 kind: HyperlinkFormatErrorKind::InvalidScheme,
484 };
485 let Some(Part::Text(part)) = self.parts.first() else {
486 return Err(err_invalid_scheme);
487 };
488 let Some(colon) = part.find_byte(b':') else {
489 return Err(err_invalid_scheme);
490 };
491 let scheme = &part[..colon];
492 if scheme.is_empty() {
493 return Err(err_invalid_scheme);
494 }
495 let is_valid_scheme_char = |byte| match byte {
496 b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'+' | b'-' | b'.' => {
497 true
498 }
499 _ => false,
500 };
501 if !scheme.iter().all(|&b| is_valid_scheme_char(b)) {
502 return Err(err_invalid_scheme);
503 }
504 Ok(())
505 }
506}
507
508/// A hyperlink format part.
509///
510/// A sequence of these corresponds to a complete format. (Not all sequences
511/// are valid.)
512#[derive(Clone, Debug, Eq, PartialEq)]
513enum Part {
514 /// Static text.
515 ///
516 /// We use `Vec<u8>` here (and more generally treat a format string as a
517 /// sequence of bytes) because file paths may be arbitrary bytes. A rare
518 /// case, but one for which there is no good reason to choke on.
519 Text(Vec<u8>),
520 /// Variable for the hostname.
521 Host,
522 /// Variable for a WSL path prefix.
523 WSLPrefix,
524 /// Variable for the file path.
525 Path,
526 /// Variable for the line number.
527 Line,
528 /// Variable for the column number.
529 Column,
530}
531
532impl Part {
533 /// Interpolate this part using the given `env` and `values`, and write
534 /// the result of interpolation to the buffer provided.
535 fn interpolate_to(
536 &self,
537 env: &HyperlinkEnvironment,
538 values: &Values,
539 dest: &mut Vec<u8>,
540 ) {
541 match *self {
542 Part::Text(ref text) => dest.extend_from_slice(text),
543 Part::Host => dest.extend_from_slice(
544 env.host.as_ref().map(|s| s.as_bytes()).unwrap_or(b""),
545 ),
546 Part::WSLPrefix => dest.extend_from_slice(
547 env.wsl_prefix.as_ref().map(|s| s.as_bytes()).unwrap_or(b""),
548 ),
549 Part::Path => dest.extend_from_slice(&values.path.0),
550 Part::Line => {
551 let line = DecimalFormatter::new(values.line.unwrap_or(1));
552 dest.extend_from_slice(line.as_bytes());
553 }
554 Part::Column => {
555 let column = DecimalFormatter::new(values.column.unwrap_or(1));
556 dest.extend_from_slice(column.as_bytes());
557 }
558 }
559 }
560}
561
562impl std::fmt::Display for Part {
563 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
564 match self {
565 Part::Text(text) => write!(f, "{}", String::from_utf8_lossy(text)),
566 Part::Host => write!(f, "{{host}}"),
567 Part::WSLPrefix => write!(f, "{{wslprefix}}"),
568 Part::Path => write!(f, "{{path}}"),
569 Part::Line => write!(f, "{{line}}"),
570 Part::Column => write!(f, "{{column}}"),
571 }
572 }
573}
574
575/// The values to replace the format variables with.
576///
577/// This only consists of values that depend on each path or match printed.
578/// Values that are invariant throughout the lifetime of the process are set
579/// via a [`HyperlinkEnvironment`].
580#[derive(Clone, Debug)]
581pub(crate) struct Values<'a> {
582 path: &'a HyperlinkPath,
583 line: Option<u64>,
584 column: Option<u64>,
585}
586
587impl<'a> Values<'a> {
588 /// Creates a new set of values, starting with the path given.
589 ///
590 /// Callers may also set the line and column number using the mutator
591 /// methods.
592 pub(crate) fn new(path: &'a HyperlinkPath) -> Values<'a> {
593 Values { path, line: None, column: None }
594 }
595
596 /// Sets the line number for these values.
597 ///
598 /// If a line number is not set and a hyperlink format contains a `{line}`
599 /// variable, then it is interpolated with the value of `1` automatically.
600 pub(crate) fn line(mut self, line: Option<u64>) -> Values<'a> {
601 self.line = line;
602 self
603 }
604
605 /// Sets the column number for these values.
606 ///
607 /// If a column number is not set and a hyperlink format contains a
608 /// `{column}` variable, then it is interpolated with the value of `1`
609 /// automatically.
610 pub(crate) fn column(mut self, column: Option<u64>) -> Values<'a> {
611 self.column = column;
612 self
613 }
614}
615
616/// An abstraction for interpolating a hyperlink format with values for every
617/// variable.
618///
619/// Interpolation of variables occurs through two different sources. The
620/// first is via a `HyperlinkEnvironment` for values that are expected to
621/// be invariant. This comes from the `HyperlinkConfig` used to build this
622/// interpolator. The second source is via `Values`, which is provided to
623/// `Interpolator::begin`. The `Values` contains things like the file path,
624/// line number and column number.
625#[derive(Clone, Debug)]
626pub(crate) struct Interpolator {
627 config: HyperlinkConfig,
628 buf: RefCell<Vec<u8>>,
629}
630
631impl Interpolator {
632 /// Create a new interpolator for the given hyperlink format configuration.
633 pub(crate) fn new(config: &HyperlinkConfig) -> Interpolator {
634 Interpolator { config: config.clone(), buf: RefCell::new(vec![]) }
635 }
636
637 /// Start interpolation with the given values by writing a hyperlink
638 /// to `wtr`. Subsequent writes to `wtr`, until `Interpolator::end` is
639 /// called, are the label for the hyperlink.
640 ///
641 /// This returns an interpolator status which indicates whether the
642 /// hyperlink was written. It might not be written, for example, if the
643 /// underlying writer doesn't support hyperlinks or if the hyperlink
644 /// format is empty. The status should be provided to `Interpolator::end`
645 /// as an instruction for whether to close the hyperlink or not.
646 pub(crate) fn begin<W: WriteColor>(
647 &self,
648 values: &Values,
649 mut wtr: W,
650 ) -> io::Result<InterpolatorStatus> {
651 if self.config.format().is_empty()
652 || !wtr.supports_hyperlinks()
653 || !wtr.supports_color()
654 {
655 return Ok(InterpolatorStatus::inactive());
656 }
657 let mut buf = self.buf.borrow_mut();
658 buf.clear();
659 for part in self.config.format().parts.iter() {
660 part.interpolate_to(self.config.environment(), values, &mut buf);
661 }
662 let spec = HyperlinkSpec::open(&buf);
663 wtr.set_hyperlink(&spec)?;
664 Ok(InterpolatorStatus { active: true })
665 }
666
667 /// Writes the correct escape sequences to `wtr` to close any extant
668 /// hyperlink, marking the end of a hyperlink's label.
669 ///
670 /// The status given should be returned from a corresponding
671 /// `Interpolator::begin` call. Since `begin` may not write a hyperlink
672 /// (e.g., if the underlying writer doesn't support hyperlinks), it follows
673 /// that `finish` must not close a hyperlink that was never opened. The
674 /// status indicates whether the hyperlink was opened or not.
675 pub(crate) fn finish<W: WriteColor>(
676 &self,
677 status: InterpolatorStatus,
678 mut wtr: W,
679 ) -> io::Result<()> {
680 if !status.active {
681 return Ok(());
682 }
683 wtr.set_hyperlink(&HyperlinkSpec::close())
684 }
685}
686
687/// A status indicating whether a hyperlink was written or not.
688///
689/// This is created by `Interpolator::begin` and used by `Interpolator::finish`
690/// to determine whether a hyperlink was actually opened or not. If it wasn't
691/// opened, then finishing interpolation is a no-op.
692#[derive(Debug)]
693pub(crate) struct InterpolatorStatus {
694 active: bool,
695}
696
697impl InterpolatorStatus {
698 /// Create an inactive interpolator status.
699 #[inline]
700 pub(crate) fn inactive() -> InterpolatorStatus {
701 InterpolatorStatus { active: false }
702 }
703}
704
705/// Represents the `{path}` part of a hyperlink.
706///
707/// This is the value to use as-is in the hyperlink, converted from an OS file
708/// path.
709#[derive(Clone, Debug)]
710pub(crate) struct HyperlinkPath(Vec<u8>);
711
712impl HyperlinkPath {
713 /// Returns a hyperlink path from an OS path.
714 #[cfg(unix)]
715 pub(crate) fn from_path(original_path: &Path) -> Option<HyperlinkPath> {
716 use std::os::unix::ffi::OsStrExt;
717
718 // We canonicalize the path in order to get an absolute version of it
719 // without any `.` or `..` or superfluous separators. Unfortunately,
720 // this does also remove symlinks, and in theory, it would be nice to
721 // retain them. Perhaps even simpler, we could just join the current
722 // working directory with the path and be done with it. There was
723 // some discussion about this on PR#2483, and there generally appears
724 // to be some uncertainty about the extent to which hyperlinks with
725 // things like `..` in them actually work. So for now, we do the safest
726 // thing possible even though I think it can result in worse user
727 // experience. (Because it means the path you click on and the actual
728 // path that gets followed are different, even though they ostensibly
729 // refer to the same file.)
730 //
731 // There's also the potential issue that path canonicalization is
732 // expensive since it can touch the file system. That is probably
733 // less of an issue since hyperlinks are only created when they're
734 // supported, i.e., when writing to a tty.
735 //
736 // [1]: https://github.com/BurntSushi/ripgrep/pull/2483
737 let path = match original_path.canonicalize() {
738 Ok(path) => path,
739 Err(err) => {
740 log::debug!(
741 "hyperlink creation for {:?} failed, error occurred \
742 during path canonicalization: {}",
743 original_path,
744 err,
745 );
746 return None;
747 }
748 };
749 let bytes = path.as_os_str().as_bytes();
750 // This should not be possible since one imagines that canonicalization
751 // should always return an absolute path. But it doesn't actually
752 // appear guaranteed by POSIX, so we check whether it's true or not and
753 // refuse to create a hyperlink from a relative path if it isn't.
754 if !bytes.starts_with(b"/") {
755 log::debug!(
756 "hyperlink creation for {:?} failed, canonicalization \
757 returned {:?}, which does not start with a slash",
758 original_path,
759 path,
760 );
761 return None;
762 }
763 Some(HyperlinkPath::encode(bytes))
764 }
765
766 /// Returns a hyperlink path from an OS path.
767 #[cfg(windows)]
768 pub(crate) fn from_path(original_path: &Path) -> Option<HyperlinkPath> {
769 // On Windows, we use `std::path::absolute` instead of `Path::canonicalize`
770 // as it can be much faster since it does not touch the file system.
771 // It wraps the [`GetFullPathNameW`][1] API, except for verbatim paths
772 // (those which start with `\\?\`, see [the documentation][2] for details).
773 //
774 // Here, we strip any verbatim path prefixes since we cannot use them
775 // in hyperlinks anyway. This can only happen if the user explicitly
776 // supplies a verbatim path as input, which already needs to be absolute:
777 //
778 // \\?\C:\dir\file.txt (local path)
779 // \\?\UNC\server\dir\file.txt (network share)
780 //
781 // The `\\?\` prefix is constant for verbatim paths, and can be followed
782 // by `UNC\` (universal naming convention), which denotes a network share.
783 //
784 // Given that the default URL format on Windows is file://{path}
785 // we need to return the following from this function:
786 //
787 // /C:/dir/file.txt (local path)
788 // //server/dir/file.txt (network share)
789 //
790 // Which produces the following links:
791 //
792 // file:///C:/dir/file.txt (local path)
793 // file:////server/dir/file.txt (network share)
794 //
795 // This substitutes the {path} variable with the expected value for
796 // the most common DOS paths, but on the other hand, network paths
797 // start with a single slash, which may be unexpected. It seems to work
798 // though?
799 //
800 // Note that the following URL syntax also seems to be valid?
801 //
802 // file://server/dir/file.txt
803 //
804 // But the initial implementation of this routine went for the format
805 // above.
806 //
807 // Also note that the file://C:/dir/file.txt syntax is not correct,
808 // even though it often works in practice.
809 //
810 // In the end, this choice was confirmed by VSCode, whose format is
811 //
812 // vscode://file{path}:{line}:{column}
813 //
814 // and which correctly understands the following URL format for network
815 // drives:
816 //
817 // vscode://file//server/dir/file.txt:1:1
818 //
819 // It doesn't parse any other number of slashes in "file//server" as a
820 // network path.
821 //
822 // [1]: https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew
823 // [2]: https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file
824
825 const WIN32_NAMESPACE_PREFIX: &str = r"\\?\";
826 const UNC_PREFIX: &str = r"UNC\";
827
828 let path = match std::path::absolute(original_path) {
829 Ok(path) => path,
830 Err(err) => {
831 log::debug!(
832 "hyperlink creation for {:?} failed, error occurred \
833 during conversion to absolute path: {}",
834 original_path,
835 err,
836 );
837 return None;
838 }
839 };
840 // We convert the path to a string for easier manipulation. If it
841 // wasn't valid UTF-16 (and thus could not be non-lossily transcoded
842 // to UTF-8), then we just give up. It's not clear we could make
843 // a meaningful hyperlink from it anyway. And this should be an
844 // exceptionally rare case.
845 let mut string = match path.to_str() {
846 Some(string) => string,
847 None => {
848 log::debug!(
849 "hyperlink creation for {:?} failed, path is not \
850 valid UTF-8",
851 original_path,
852 );
853 return None;
854 }
855 };
856
857 // Strip verbatim path prefixes (see the comment above for details).
858 if string.starts_with(WIN32_NAMESPACE_PREFIX) {
859 string = &string[WIN32_NAMESPACE_PREFIX.len()..];
860
861 // Drop the UNC prefix if there is one, but keep the leading slash.
862 if string.starts_with(UNC_PREFIX) {
863 string = &string[(UNC_PREFIX.len() - 1)..];
864 }
865 } else if string.starts_with(r"\\") || string.starts_with(r"//") {
866 // Drop one of the two leading slashes of network paths, it will be added back.
867 string = &string[1..];
868 }
869
870 // Finally, add a leading slash. In the local file case, this turns
871 // C:\foo\bar into /C:\foo\bar (and then percent encoding turns it into
872 // /C:/foo/bar). In the network share case, this turns \share\foo\bar
873 // into /\share/foo/bar (and then percent encoding turns it into
874 // //share/foo/bar).
875 let with_slash = format!("/{string}");
876 Some(HyperlinkPath::encode(with_slash.as_bytes()))
877 }
878
879 /// For other platforms (not windows, not unix), return None and log a debug message.
880 #[cfg(not(any(windows, unix)))]
881 pub(crate) fn from_path(original_path: &Path) -> Option<HyperlinkPath> {
882 log::debug!("hyperlinks are not supported on this platform");
883 None
884 }
885
886 /// Percent-encodes a path.
887 ///
888 /// The alphanumeric ASCII characters and "-", ".", "_", "~" are unreserved
889 /// as per section 2.3 of RFC 3986 (Uniform Resource Identifier (URI):
890 /// Generic Syntax), and are not encoded. The other ASCII characters except
891 /// "/" and ":" are percent-encoded, and "\" is replaced by "/" on Windows.
892 ///
893 /// Section 4 of RFC 8089 (The "file" URI Scheme) does not mandate precise
894 /// encoding requirements for non-ASCII characters, and this implementation
895 /// leaves them unencoded. On Windows, the UrlCreateFromPathW function does
896 /// not encode non-ASCII characters. Doing so with UTF-8 encoded paths
897 /// creates invalid file:// URLs on that platform.
898 fn encode(input: &[u8]) -> HyperlinkPath {
899 let mut result = Vec::with_capacity(input.len());
900 for &byte in input.iter() {
901 match byte {
902 b'0'..=b'9'
903 | b'A'..=b'Z'
904 | b'a'..=b'z'
905 | b'/'
906 | b':'
907 | b'-'
908 | b'.'
909 | b'_'
910 | b'~'
911 | 128.. => {
912 result.push(byte);
913 }
914 #[cfg(windows)]
915 b'\\' => {
916 result.push(b'/');
917 }
918 _ => {
919 const HEX: &[u8] = b"0123456789ABCDEF";
920 result.push(b'%');
921 result.push(HEX[(byte >> 4) as usize]);
922 result.push(HEX[(byte & 0xF) as usize]);
923 }
924 }
925 }
926 HyperlinkPath(result)
927 }
928}
929
930/// Returns the set of hyperlink aliases supported by this crate.
931///
932/// Aliases are supported by the `FromStr` trait implementation of a
933/// [`HyperlinkFormat`]. That is, if an alias is seen, then it is automatically
934/// replaced with the corresponding format. For example, the `vscode` alias
935/// maps to `vscode://file{path}:{line}:{column}`.
936///
937/// This is exposed to allow callers to include hyperlink aliases in
938/// documentation in a way that is guaranteed to match what is actually
939/// supported.
940///
941/// The list returned is guaranteed to be sorted lexicographically
942/// by the alias name. Callers may want to re-sort the list using
943/// [`HyperlinkAlias::display_priority`] via a stable sort when showing the
944/// list to users. This will cause special aliases like `none` and `default` to
945/// appear first.
946pub fn hyperlink_aliases() -> Vec<HyperlinkAlias> {
947 HYPERLINK_PATTERN_ALIASES.iter().cloned().collect()
948}
949
950#[cfg(test)]
951mod tests {
952 use std::str::FromStr;
953
954 use super::*;
955
956 #[test]
957 fn build_format() {
958 let format = FormatBuilder::new()
959 .append_slice(b"foo://")
960 .append_slice(b"bar-")
961 .append_slice(b"baz")
962 .append_var("path")
963 .unwrap()
964 .build()
965 .unwrap();
966
967 assert_eq!(format.to_string(), "foo://bar-baz{path}");
968 assert_eq!(format.parts[0], Part::Text(b"foo://bar-baz".to_vec()));
969 assert!(!format.is_empty());
970 }
971
972 #[test]
973 fn build_empty_format() {
974 let format = FormatBuilder::new().build().unwrap();
975
976 assert!(format.is_empty());
977 assert_eq!(format, HyperlinkFormat::empty());
978 assert_eq!(format, HyperlinkFormat::default());
979 }
980
981 #[test]
982 fn handle_alias() {
983 assert!(HyperlinkFormat::from_str("file").is_ok());
984 assert!(HyperlinkFormat::from_str("none").is_ok());
985 assert!(HyperlinkFormat::from_str("none").unwrap().is_empty());
986 }
987
988 #[test]
989 fn parse_format() {
990 let format = HyperlinkFormat::from_str(
991 "foo://{host}/bar/{path}:{line}:{column}",
992 )
993 .unwrap();
994
995 assert_eq!(
996 format.to_string(),
997 "foo://{host}/bar/{path}:{line}:{column}"
998 );
999 assert_eq!(format.parts.len(), 8);
1000 assert!(format.parts.contains(&Part::Path));
1001 assert!(format.parts.contains(&Part::Line));
1002 assert!(format.parts.contains(&Part::Column));
1003 }
1004
1005 #[test]
1006 fn parse_valid() {
1007 assert!(HyperlinkFormat::from_str("").unwrap().is_empty());
1008 assert_eq!(
1009 HyperlinkFormat::from_str("foo://{path}").unwrap().to_string(),
1010 "foo://{path}"
1011 );
1012 assert_eq!(
1013 HyperlinkFormat::from_str("foo://{path}/bar").unwrap().to_string(),
1014 "foo://{path}/bar"
1015 );
1016
1017 HyperlinkFormat::from_str("f://{path}").unwrap();
1018 HyperlinkFormat::from_str("f:{path}").unwrap();
1019 HyperlinkFormat::from_str("f-+.:{path}").unwrap();
1020 HyperlinkFormat::from_str("f42:{path}").unwrap();
1021 HyperlinkFormat::from_str("42:{path}").unwrap();
1022 HyperlinkFormat::from_str("+:{path}").unwrap();
1023 HyperlinkFormat::from_str("F42:{path}").unwrap();
1024 HyperlinkFormat::from_str("F42://foo{{bar}}{path}").unwrap();
1025 }
1026
1027 #[test]
1028 fn parse_invalid() {
1029 use super::HyperlinkFormatErrorKind::*;
1030
1031 let err = |kind| HyperlinkFormatError { kind };
1032 assert_eq!(
1033 HyperlinkFormat::from_str("foo://bar").unwrap_err(),
1034 err(NoVariables),
1035 );
1036 assert_eq!(
1037 HyperlinkFormat::from_str("foo://{line}").unwrap_err(),
1038 err(NoPathVariable),
1039 );
1040 assert_eq!(
1041 HyperlinkFormat::from_str("foo://{path").unwrap_err(),
1042 err(UnclosedVariable),
1043 );
1044 assert_eq!(
1045 HyperlinkFormat::from_str("foo://{path}:{column}").unwrap_err(),
1046 err(NoLineVariable),
1047 );
1048 assert_eq!(
1049 HyperlinkFormat::from_str("{path}").unwrap_err(),
1050 err(InvalidScheme),
1051 );
1052 assert_eq!(
1053 HyperlinkFormat::from_str(":{path}").unwrap_err(),
1054 err(InvalidScheme),
1055 );
1056 assert_eq!(
1057 HyperlinkFormat::from_str("f*:{path}").unwrap_err(),
1058 err(InvalidScheme),
1059 );
1060
1061 assert_eq!(
1062 HyperlinkFormat::from_str("foo://{bar}").unwrap_err(),
1063 err(InvalidVariable("bar".to_string())),
1064 );
1065 assert_eq!(
1066 HyperlinkFormat::from_str("foo://{}}bar}").unwrap_err(),
1067 err(InvalidVariable("".to_string())),
1068 );
1069 assert_eq!(
1070 HyperlinkFormat::from_str("foo://{b}}ar}").unwrap_err(),
1071 err(InvalidVariable("b".to_string())),
1072 );
1073 assert_eq!(
1074 HyperlinkFormat::from_str("foo://{bar}}}").unwrap_err(),
1075 err(InvalidVariable("bar".to_string())),
1076 );
1077 assert_eq!(
1078 HyperlinkFormat::from_str("foo://{{bar}").unwrap_err(),
1079 err(InvalidCloseVariable),
1080 );
1081 assert_eq!(
1082 HyperlinkFormat::from_str("foo://{{{bar}").unwrap_err(),
1083 err(InvalidVariable("bar".to_string())),
1084 );
1085 assert_eq!(
1086 HyperlinkFormat::from_str("foo://{b{{ar}").unwrap_err(),
1087 err(InvalidVariable("b{{ar".to_string())),
1088 );
1089 assert_eq!(
1090 HyperlinkFormat::from_str("foo://{bar{{}").unwrap_err(),
1091 err(InvalidVariable("bar{{".to_string())),
1092 );
1093 }
1094
1095 #[test]
1096 #[cfg(windows)]
1097 fn convert_to_hyperlink_path() {
1098 let convert = |path| {
1099 String::from_utf8(
1100 HyperlinkPath::from_path(Path::new(path)).unwrap().0,
1101 )
1102 .unwrap()
1103 };
1104
1105 assert_eq!(convert(r"C:\dir\file.txt"), "/C:/dir/file.txt");
1106 assert_eq!(
1107 convert(r"C:\foo\bar\..\other\baz.txt"),
1108 "/C:/foo/other/baz.txt"
1109 );
1110
1111 assert_eq!(convert(r"\\server\dir\file.txt"), "//server/dir/file.txt");
1112 assert_eq!(
1113 convert(r"\\server\dir\foo\..\other\file.txt"),
1114 "//server/dir/other/file.txt"
1115 );
1116
1117 assert_eq!(convert(r"\\?\C:\dir\file.txt"), "/C:/dir/file.txt");
1118 assert_eq!(
1119 convert(r"\\?\UNC\server\dir\file.txt"),
1120 "//server/dir/file.txt"
1121 );
1122 }
1123
1124 #[test]
1125 fn aliases_are_sorted() {
1126 let aliases = hyperlink_aliases();
1127 let mut prev =
1128 aliases.first().expect("aliases should be non-empty").name();
1129 for alias in aliases.iter().skip(1) {
1130 let name = alias.name();
1131 assert!(
1132 name > prev,
1133 "'{prev}' should come before '{name}' in \
1134 HYPERLINK_PATTERN_ALIASES",
1135 );
1136 prev = name;
1137 }
1138 }
1139
1140 #[test]
1141 fn alias_names_are_reasonable() {
1142 for alias in hyperlink_aliases() {
1143 // There's no hard rule here, but if we want to define an alias
1144 // with a name that doesn't pass this assert, then we should
1145 // probably flag it as worthy of consideration. For example, we
1146 // really do not want to define an alias that contains `{` or `}`,
1147 // which might confuse it for a variable.
1148 assert!(alias.name().chars().all(|c| c.is_alphanumeric()
1149 || c == '+'
1150 || c == '-'
1151 || c == '.'));
1152 }
1153 }
1154
1155 #[test]
1156 fn aliases_are_valid_formats() {
1157 for alias in hyperlink_aliases() {
1158 let (name, format) = (alias.name(), alias.format());
1159 assert!(
1160 format.parse::<HyperlinkFormat>().is_ok(),
1161 "invalid hyperlink alias '{name}': {format}",
1162 );
1163 }
1164 }
1165}