shell_quote/sh.rs
1#![cfg(feature = "sh")]
2
3use crate::{ascii::Char, Quotable, QuoteInto};
4
5/// Quote byte strings for use with `/bin/sh`.
6///
7/// # ⚠️ Warning
8///
9/// There is no escape sequence for bytes between 0x80 and 0xFF – these must be
10/// reproduced exactly in the quoted output – hence **it is not possible to
11/// safely create or quote into an existing [`String`]** with [`Sh`] because
12/// these bytes would be misinterpreted as a second or subsequent byte of a
13/// [multi-byte UTF-8 code point representation][utf-8-encoding].
14///
15/// [utf-8-encoding]: https://en.wikipedia.org/wiki/UTF-8#Encoding
16///
17/// If you're not using bytes between 0x80 and 0xFF, a workaround is to instead
18/// quote into a [`Vec<u8>`] and convert that into a string using
19/// [`String::from_utf8`]. The key difference is that `from_utf8` returns a
20/// [`Result`] which the caller must deal with.
21///
22/// # Compatibility
23///
24/// Quoted/escaped strings produced by [`Sh`] also work in Bash, Dash, and Z
25/// Shell.
26///
27/// The quoted/escaped strings it produces are different to those coming from
28/// [`Bash`][`crate::Bash`] or its alias [`Zsh`][`crate::Zsh`]. Those strings
29/// won't work in a pure `/bin/sh` shell like Dash, but they are better for
30/// humans to read, to copy and paste. For example, [`Sh`] does not (and cannot)
31/// escape control characters, but characters like `BEL` and `TAB` (and others)
32/// are represented by `\\a` and `\\t` respectively by [`Bash`][`crate::Bash`].
33///
34/// # Notes
35///
36/// I wasn't able to find any definitive statement of exactly how Bourne Shell
37/// strings should be quoted, mainly because "Bourne Shell" or `/bin/sh` can
38/// refer to many different pieces of software: Bash has a Bourne Shell mode,
39/// `/bin/sh` on Ubuntu is actually Dash, and on macOS 12.3 (and later, and
40/// possibly earlier) all bets are off:
41///
42/// > `sh` is a POSIX-compliant command interpreter (shell). It is implemented
43/// > by re-execing as either `bash(1)`, `dash(1)`, or `zsh(1)` as determined by
44/// > the symbolic link located at `/private/var/select/sh`. If
45/// > `/private/var/select/sh` does not exist or does not point to a valid
46/// > shell, `sh` will use one of the supported shells.
47///
48/// However, [dash](https://en.wikipedia.org/wiki/Almquist_shell#dash) appears
49/// to be the de facto `/bin/sh` these days, having been formally adopted in
50/// Ubuntu and Debian, and also available as `/bin/dash` on macOS.
51///
52/// From dash(1):
53///
54/// > ## Quoting
55/// >
56/// > Quoting is used to remove the special meaning of certain characters or
57/// > words to the shell, such as operators, whitespace, or keywords. There
58/// > are three types of quoting: matched single quotes, matched double
59/// > quotes, and backslash.
60/// >
61/// > ## Backslash
62/// >
63/// > A backslash preserves the literal meaning of the following character,
64/// > with the exception of ⟨newline⟩. A backslash preceding a ⟨newline⟩ is
65/// > treated as a line continuation.
66/// >
67/// > ## Single Quotes
68/// >
69/// > Enclosing characters in single quotes preserves the literal meaning of
70/// > all the characters (except single quotes, making it impossible to put
71/// > single-quotes in a single-quoted string).
72/// >
73/// > ## Double Quotes
74/// >
75/// > Enclosing characters within double quotes preserves the literal meaning
76/// > of all characters except dollarsign ($), backquote (`), and backslash
77/// > (\). The backslash inside double quotes is historically weird, and
78/// > serves to quote only the following characters:
79/// >
80/// > ```text
81/// > $ ` " \ <newline>.
82/// > ```
83/// >
84/// > Otherwise it remains literal.
85///
86/// The code in this module operates byte by byte, making no special allowances
87/// for multi-byte character sets. In other words, it's up to the caller to
88/// figure out encoding for non-ASCII characters. A significant use case for
89/// this code is to quote filenames into scripts, and on *nix variants I
90/// understand that filenames are essentially arrays of bytes, even if the OS
91/// adds some normalisation and case-insensitivity on top.
92///
93#[derive(Debug, Clone, Copy)]
94pub struct Sh;
95
96impl QuoteInto<Vec<u8>> for Sh {
97 fn quote_into<'q, S: Into<Quotable<'q>>>(s: S, out: &mut Vec<u8>) {
98 Self::quote_into_vec(s, out);
99 }
100}
101
102#[cfg(unix)]
103impl QuoteInto<std::ffi::OsString> for Sh {
104 fn quote_into<'q, S: Into<Quotable<'q>>>(s: S, out: &mut std::ffi::OsString) {
105 use std::os::unix::ffi::OsStringExt;
106 let s = Self::quote_vec(s);
107 let s = std::ffi::OsString::from_vec(s);
108 out.push(s);
109 }
110}
111
112#[cfg(feature = "bstr")]
113impl QuoteInto<bstr::BString> for Sh {
114 fn quote_into<'q, S: Into<Quotable<'q>>>(s: S, out: &mut bstr::BString) {
115 let s = Self::quote_vec(s);
116 out.extend(s);
117 }
118}
119
120impl Sh {
121 /// Quote a string of bytes into a new `Vec<u8>`.
122 ///
123 /// This will return one of the following:
124 /// - The string as-is, if no quoting is necessary.
125 /// - A string containing single-quoted sections, like `foo' bar'`.
126 ///
127 /// See [`quote_into_vec`][`Self::quote_into_vec`] for a variant that
128 /// extends an existing `Vec` instead of allocating a new one.
129 ///
130 /// # Examples
131 ///
132 /// ```
133 /// # use shell_quote::Sh;
134 /// assert_eq!(Sh::quote_vec("foobar"), b"foobar");
135 /// assert_eq!(Sh::quote_vec("foo bar"), b"foo' bar'");
136 /// ```
137 ///
138 pub fn quote_vec<'a, S: Into<Quotable<'a>>>(s: S) -> Vec<u8> {
139 let bytes = match s.into() {
140 Quotable::Bytes(bytes) => bytes,
141 Quotable::Text(s) => s.as_bytes(),
142 };
143 match escape_prepare(bytes) {
144 Prepared::Empty => vec![b'\'', b'\''],
145 Prepared::Inert => bytes.into(),
146 Prepared::Escape(esc) => {
147 // Here, previously, an optimisation precalculated the required
148 // capacity of the output `Vec` to avoid reallocations later on,
149 // but benchmarks showed that it was slower. It _may_ have
150 // lowered maximum RAM required, but that was not measured.
151 let mut sout = Vec::new();
152 escape_chars(esc, &mut sout);
153 sout
154 }
155 }
156 }
157
158 /// Quote a string of bytes into an existing `Vec<u8>`.
159 ///
160 /// See [`quote_vec`][`Self::quote_vec`] for more details.
161 ///
162 /// # Examples
163 ///
164 /// ```
165 /// # use shell_quote::Sh;
166 /// let mut buf = Vec::with_capacity(128);
167 /// Sh::quote_into_vec("foobar", &mut buf);
168 /// buf.push(b' '); // Add a space.
169 /// Sh::quote_into_vec("foo bar", &mut buf);
170 /// assert_eq!(buf, b"foobar foo' bar'");
171 /// ```
172 ///
173 pub fn quote_into_vec<'a, S: Into<Quotable<'a>>>(s: S, sout: &mut Vec<u8>) {
174 let bytes = match s.into() {
175 Quotable::Bytes(bytes) => bytes,
176 Quotable::Text(s) => s.as_bytes(),
177 };
178 match escape_prepare(bytes) {
179 Prepared::Empty => sout.extend(b"''"),
180 Prepared::Inert => sout.extend(bytes),
181 Prepared::Escape(esc) => {
182 // Here, previously, an optimisation precalculated the required
183 // capacity of the output `Vec` to avoid reallocations later on,
184 // but benchmarks showed that it was slower. It _may_ have
185 // lowered maximum RAM required, but that was not measured.
186 escape_chars(esc, sout);
187 }
188 }
189 }
190}
191
192// ----------------------------------------------------------------------------
193
194enum Prepared {
195 Empty,
196 Inert,
197 Escape(Vec<Char>),
198}
199
200fn escape_prepare(sin: &[u8]) -> Prepared {
201 let esc: Vec<_> = sin.iter().map(Char::from).collect();
202 // An optimisation: if the string is not empty and contains only "safe"
203 // characters we can avoid further work.
204 if esc.is_empty() {
205 Prepared::Empty
206 } else if esc.iter().all(Char::is_inert) {
207 Prepared::Inert
208 } else {
209 Prepared::Escape(esc)
210 }
211}
212
213fn escape_chars(esc: Vec<Char>, sout: &mut Vec<u8>) {
214 let mut inside_quotes = false;
215 for mode in esc {
216 use Char::*;
217 match mode {
218 PrintableInert(ch) | Extended(ch) => sout.push(ch),
219 Control(ch) | Printable(ch) => {
220 if inside_quotes {
221 sout.push(ch);
222 } else {
223 sout.push(b'\'');
224 inside_quotes = true;
225 sout.push(ch);
226 }
227 }
228 SingleQuote => {
229 if inside_quotes {
230 sout.extend(b"'\\'");
231 inside_quotes = false;
232 } else {
233 sout.extend(b"\\'");
234 }
235 }
236 ch => {
237 if inside_quotes {
238 sout.push(ch.code());
239 } else {
240 sout.push(b'\'');
241 inside_quotes = true;
242 sout.push(ch.code());
243 }
244 }
245 }
246 }
247 if inside_quotes {
248 sout.push(b'\'');
249 }
250}