1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
use crate::{ascii::Char, quoter::QuoterSealed, Quotable, Quoter};
/// Quote byte strings for use with `/bin/sh`.
///
/// # Notes
///
/// The following escapes seem to be "okay":
///
/// ```text
/// \a alert (bell)
/// \b backspace
/// \f form feed
/// \n new line
/// \r carriage return
/// \t horizontal tab
/// \v vertical tab
/// \\ backslash
/// \nnn the eight-bit character whose value is the octal value nnn
/// ```
///
/// I wasn't able to find any definitive statement of exactly how Bourne Shell
/// strings should be quoted, mainly because "Bourne Shell" or `/bin/sh` can
/// refer to many different pieces of software: Bash has a Bourne Shell mode,
/// `/bin/sh` on Ubuntu is actually Dash, and on macOS 12.3 (and later, and
/// possibly earlier) all bets are off:
///
/// > `sh` is a POSIX-compliant command interpreter (shell). It is implemented
/// > by re-execing as either `bash(1)`, `dash(1)`, or `zsh(1)` as determined by
/// > the symbolic link located at `/private/var/select/sh`. If
/// > `/private/var/select/sh` does not exist or does not point to a valid
/// > shell, `sh` will use one of the supported shells.
///
/// ⚠️ In practice, however, bytes between 0x80 and 0xff inclusive **cannot** be
/// escaped with `\nnn` notation. The shell simply ignores these escapes and
/// treats `\nnn` as a literal string of 4 characters. Hence, in this module,
/// these bytes are reproduced as-is within the quoted string output, with no
/// special escaping.
///
/// The code in this module sticks to escape sequences that I consider
/// "standard" by a heuristic known only to me. It operates byte by byte, making
/// no special allowances for multi-byte character sets. In other words, it's up
/// to the caller to figure out encoding for non-ASCII characters. A significant
/// use case for this code is to quote filenames into scripts, and on *nix
/// variants I understand that filenames are essentially arrays of bytes, even
/// if the OS adds some normalisation and case-insensitivity on top.
///
/// If you have some expertise in this area I would love to hear from you.
///
#[derive(Debug, Clone, Copy)]
pub struct Sh;
impl Quoter for Sh {}
/// Expose [`Quoter`] implementation as default impl too, for convenience.
impl QuoterSealed for Sh {
fn quote<'a, S: ?Sized + Into<Quotable<'a>>>(s: S) -> Vec<u8> {
Self::quote(s)
}
fn quote_into<'a, S: ?Sized + Into<Quotable<'a>>>(s: S, sout: &mut Vec<u8>) {
Self::quote_into(s, sout)
}
}
impl Sh {
/// Quote a string of bytes into a new `Vec<u8>`.
///
/// This will return one of the following:
/// - The string as-is, if no quoting is necessary.
/// - A quoted string containing ANSI-C-like escapes, like `'foo\nbar'`.
///
/// See [`quote_into`](#method.quote_into) for a variant that extends an
/// existing `Vec` instead of allocating a new one.
///
/// # Examples
///
/// ```
/// # use shell_quote::{Sh, Quoter};
/// assert_eq!(Sh::quote("foobar"), b"foobar");
/// assert_eq!(Sh::quote("foo bar"), b"'foo bar'");
/// ```
///
pub fn quote<'a, S: ?Sized + Into<Quotable<'a>>>(s: S) -> Vec<u8> {
let sin: Quotable<'a> = s.into();
if let Some(esc) = escape_prepare(sin.bytes) {
// This may be a pointless optimisation, but calculate the memory
// needed to avoid reallocations as we construct the output. Since
// we know we're going to use single quotes, we also add 2 bytes.
let size: usize = esc.iter().map(escape_size).sum();
let mut sout = Vec::with_capacity(size + 2);
escape_chars(esc, &mut sout); // Do the work.
sout
} else {
sin.bytes.into()
}
}
/// Quote a string of bytes into an existing `Vec<u8>`.
///
/// See [`quote`](#method.quote) for more details.
///
/// # Examples
///
/// ```
/// # use shell_quote::{Sh, Quoter};
/// let mut buf = Vec::with_capacity(128);
/// Sh::quote_into("foobar", &mut buf);
/// buf.push(b' '); // Add a space.
/// Sh::quote_into("foo bar", &mut buf);
/// assert_eq!(buf, b"foobar 'foo bar'");
/// ```
///
pub fn quote_into<'a, S: ?Sized + Into<Quotable<'a>>>(s: S, sout: &mut Vec<u8>) {
let sin: Quotable<'a> = s.into();
if let Some(esc) = escape_prepare(sin.bytes) {
// This may be a pointless optimisation, but calculate the memory
// needed to avoid reallocations as we construct the output. Since
// we know we're going to use single quotes, we also add 2 bytes.
let size: usize = esc.iter().map(escape_size).sum();
sout.reserve(size + 2);
escape_chars(esc, sout); // Do the work.
} else {
sout.extend(sin.bytes);
}
}
}
// ----------------------------------------------------------------------------
fn escape_prepare(sin: &[u8]) -> Option<Vec<Char>> {
let esc: Vec<_> = sin.iter().map(Char::from).collect();
// An optimisation: if the string is not empty and contains only "safe"
// characters we can avoid further work.
if esc.is_empty() {
Some(esc)
} else if esc.iter().all(Char::is_inert) {
None
} else {
Some(esc)
}
}
fn escape_chars(esc: Vec<Char>, sout: &mut Vec<u8>) {
// Push a Bourne-style '...' escaped string into `sout`.
sout.extend(b"'");
for mode in esc {
use Char::*;
match mode {
Bell => sout.extend(b"\\a"),
Backspace => sout.extend(b"\\b"),
Escape => sout.extend(b"\\033"),
FormFeed => sout.extend(b"\\f"),
NewLine => sout.extend(b"\\n"),
CarriageReturn => sout.extend(b"\\r"),
HorizontalTab => sout.extend(b"\\t"),
VerticalTab => sout.extend(b"\\v"),
Control(ch) => sout.extend(format!("\\{:03o}", ch).bytes()),
Backslash => sout.extend(b"\\\\"),
SingleQuote => sout.extend(b"\\047"),
DoubleQuote => sout.extend(b"\""),
Delete => sout.push(0x7F),
PrintableInert(ch) => sout.push(ch),
Printable(ch) => sout.push(ch),
Extended(ch) => sout.push(ch),
}
}
sout.push(b'\'');
}
fn escape_size(char: &Char) -> usize {
use Char::*;
match char {
Bell => 2,
Backspace => 2,
Escape => 4,
FormFeed => 2,
NewLine => 2,
CarriageReturn => 2,
HorizontalTab => 2,
VerticalTab => 2,
Control(_) => 4,
Backslash => 2,
SingleQuote => 4,
DoubleQuote => 1,
Delete => 4,
PrintableInert(_) => 1,
Printable(_) => 1,
Extended(_) => 4,
}
}