shell_quote/bash.rs
1#![cfg(feature = "bash")]
2
3use crate::{Quotable, QuoteInto};
4
5/// Quote byte strings for use with Bash, the GNU Bourne-Again Shell.
6///
7/// # Compatibility
8///
9/// Quoted/escaped strings produced by [`Bash`] work in both Bash and Z Shell.
10///
11/// # ⚠️ Warning
12///
13/// It is _possible_ to encode NUL in a Bash string, but Bash appears to then
14/// truncate the rest of the string after that point **or** sometimes it filters
15/// the NUL out. It's not yet clear to me when/why each behaviour is chosen.
16///
17/// If you're quoting UTF-8 content this may not be a problem since there is
18/// only one code point – the null character itself – that will ever produce a
19/// NUL byte. To avoid this problem entirely, consider using [Modified
20/// UTF-8][modified-utf-8] so that the NUL byte can never appear in a valid byte
21/// stream.
22///
23/// [modified-utf-8]: https://en.wikipedia.org/wiki/UTF-8#Modified_UTF-8
24///
25/// # Notes
26///
27/// From bash(1):
28///
29/// Words of the form $'string' are treated specially. The word expands to
30/// string, with backslash-escaped characters replaced as specified by the
31/// ANSI C standard. Backslash escape sequences, if present, are decoded as
32/// follows:
33///
34/// ```text
35/// \a alert (bell)
36/// \b backspace
37/// \e an escape character
38/// \f form feed
39/// \n new line
40/// \r carriage return
41/// \t horizontal tab
42/// \v vertical tab
43/// \\ backslash
44/// \' single quote
45/// \nnn the eight-bit character whose value is the
46/// octal value nnn (one to three digits)
47/// \xHH the eight-bit character whose value is the
48/// hexadecimal value HH (one or two hex digits)
49/// \cx a control-x character
50/// ```
51///
52/// Bash allows, in newer versions, for non-ASCII Unicode characters with
53/// `\uHHHH` and `\UXXXXXXXX` syntax inside these [ANSI C quoted
54/// strings][ansi-c-quoting], but we avoid this and work only with bytes. Part
55/// of the problem is that it's not clear how Bash then works with these
56/// strings. Does it encode these characters into bytes according to the user's
57/// current locale? Are strings in Bash now natively Unicode?
58///
59/// For now it's up to the caller to figure out encoding. A significant use case
60/// for this code is to quote filenames into scripts, and on *nix variants I
61/// understand that filenames are essentially arrays of bytes, even if the OS
62/// adds some normalisation and case-insensitivity on top.
63///
64/// [ansi-c-quoting]:
65/// https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html
66///
67#[derive(Debug, Clone, Copy)]
68pub struct Bash;
69
70// ----------------------------------------------------------------------------
71
72impl QuoteInto<Vec<u8>> for Bash {
73 fn quote_into<'q, S: Into<Quotable<'q>>>(s: S, out: &mut Vec<u8>) {
74 Self::quote_into_vec(s, out);
75 }
76}
77
78impl QuoteInto<String> for Bash {
79 fn quote_into<'q, S: Into<Quotable<'q>>>(s: S, out: &mut String) {
80 Self::quote_into_vec(s, unsafe { out.as_mut_vec() })
81 }
82}
83
84#[cfg(unix)]
85impl QuoteInto<std::ffi::OsString> for Bash {
86 fn quote_into<'q, S: Into<Quotable<'q>>>(s: S, out: &mut std::ffi::OsString) {
87 use std::os::unix::ffi::OsStringExt;
88 let s = Self::quote_vec(s);
89 let s = std::ffi::OsString::from_vec(s);
90 out.push(s);
91 }
92}
93
94#[cfg(feature = "bstr")]
95impl QuoteInto<bstr::BString> for Bash {
96 fn quote_into<'q, S: Into<Quotable<'q>>>(s: S, out: &mut bstr::BString) {
97 let s = Self::quote_vec(s);
98 out.extend(s);
99 }
100}
101
102// ----------------------------------------------------------------------------
103
104impl Bash {
105 /// Quote a string of bytes into a new `Vec<u8>`.
106 ///
107 /// This will return one of the following:
108 /// - The string as-is, if no escaping is necessary.
109 /// - An [ANSI-C escaped string][ansi-c-quoting], like `$'foo\nbar'`.
110 ///
111 /// See [`quote_into_vec`][`Self::quote_into_vec`] for a variant that
112 /// extends an existing `Vec` instead of allocating a new one.
113 ///
114 /// # Examples
115 ///
116 /// ```
117 /// # use shell_quote::Bash;
118 /// assert_eq!(Bash::quote_vec("foobar"), b"foobar");
119 /// assert_eq!(Bash::quote_vec("foo bar"), b"$'foo bar'");
120 /// ```
121 ///
122 /// [ansi-c-quoting]:
123 /// https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html
124 ///
125 pub fn quote_vec<'a, S: Into<Quotable<'a>>>(s: S) -> Vec<u8> {
126 // Here, previously, in the `Escape` cases, an optimisation
127 // precalculated the required capacity of the output `Vec` to avoid
128 // reallocations later on, but benchmarks showed that it was slower. It
129 // _may_ have lowered maximum RAM required, but that was not measured.
130 match s.into() {
131 Quotable::Bytes(bytes) => match bytes::escape_prepare(bytes) {
132 bytes::Prepared::Empty => vec![b'\'', b'\''],
133 bytes::Prepared::Inert => bytes.into(),
134 bytes::Prepared::Escape(esc) => {
135 let mut sout = Vec::new();
136 bytes::escape_chars(esc, &mut sout);
137 sout
138 }
139 },
140 Quotable::Text(text) => match text::escape_prepare(text) {
141 text::Prepared::Empty => vec![b'\'', b'\''],
142 text::Prepared::Inert => text.into(),
143 text::Prepared::Escape(esc) => {
144 let mut sout = Vec::new();
145 text::escape_chars(esc, &mut sout);
146 sout
147 }
148 },
149 }
150 }
151
152 /// Quote a string of bytes into an existing `Vec<u8>`.
153 ///
154 /// See [`quote_vec`][`Self::quote_vec`] for more details.
155 ///
156 /// # Examples
157 ///
158 /// ```
159 /// # use shell_quote::Bash;
160 /// let mut buf = Vec::with_capacity(128);
161 /// Bash::quote_into_vec("foobar", &mut buf);
162 /// buf.push(b' '); // Add a space.
163 /// Bash::quote_into_vec("foo bar", &mut buf);
164 /// assert_eq!(buf, b"foobar $'foo bar'");
165 /// ```
166 ///
167 pub fn quote_into_vec<'a, S: Into<Quotable<'a>>>(s: S, sout: &mut Vec<u8>) {
168 // Here, previously, in the `Escape` cases, an optimisation
169 // precalculated the required capacity of the output `Vec` to avoid
170 // reallocations later on, but benchmarks showed that it was slower. It
171 // _may_ have lowered maximum RAM required, but that was not measured.
172 match s.into() {
173 Quotable::Bytes(bytes) => match bytes::escape_prepare(bytes) {
174 bytes::Prepared::Empty => sout.extend(b"''"),
175 bytes::Prepared::Inert => sout.extend(bytes),
176 bytes::Prepared::Escape(esc) => bytes::escape_chars(esc, sout),
177 },
178 Quotable::Text(text) => match text::escape_prepare(text) {
179 text::Prepared::Empty => sout.extend(b"''"),
180 text::Prepared::Inert => sout.extend(text.as_bytes()),
181 text::Prepared::Escape(esc) => text::escape_chars(esc, sout),
182 },
183 }
184 }
185}
186
187// ----------------------------------------------------------------------------
188
189mod bytes {
190 use super::u8_to_hex_escape;
191 use crate::ascii::Char;
192
193 pub enum Prepared {
194 Empty,
195 Inert,
196 Escape(Vec<Char>),
197 }
198
199 pub fn escape_prepare(sin: &[u8]) -> Prepared {
200 let esc: Vec<_> = sin.iter().map(Char::from).collect();
201 // An optimisation: if the string is not empty and contains only "safe"
202 // characters we can avoid further work.
203 if esc.is_empty() {
204 Prepared::Empty
205 } else if esc.iter().all(Char::is_inert) {
206 Prepared::Inert
207 } else {
208 Prepared::Escape(esc)
209 }
210 }
211
212 pub fn escape_chars(esc: Vec<Char>, sout: &mut Vec<u8>) {
213 // Push a Bash-style $'...' quoted string into `sout`.
214 sout.extend(b"$'");
215 for mode in esc {
216 use Char::*;
217 match mode {
218 Bell => sout.extend(b"\\a"),
219 Backspace => sout.extend(b"\\b"),
220 Escape => sout.extend(b"\\e"),
221 FormFeed => sout.extend(b"\\f"),
222 NewLine => sout.extend(b"\\n"),
223 CarriageReturn => sout.extend(b"\\r"),
224 HorizontalTab => sout.extend(b"\\t"),
225 VerticalTab => sout.extend(b"\\v"),
226 Control(ch) => sout.extend(&u8_to_hex_escape(ch)),
227 Backslash => sout.extend(b"\\\\"),
228 SingleQuote => sout.extend(b"\\'"),
229 DoubleQuote => sout.extend(b"\""),
230 Delete => sout.extend(b"\\x7F"),
231 PrintableInert(ch) => sout.push(ch),
232 Printable(ch) => sout.push(ch),
233 Extended(ch) => sout.extend(&u8_to_hex_escape(ch)),
234 }
235 }
236 sout.push(b'\'');
237 }
238}
239
240// ----------------------------------------------------------------------------
241
242mod text {
243 use super::u8_to_hex_escape;
244 use crate::utf8::Char;
245
246 pub enum Prepared {
247 Empty,
248 Inert,
249 Escape(Vec<Char>),
250 }
251
252 pub fn escape_prepare(sin: &str) -> Prepared {
253 let esc: Vec<_> = sin.chars().map(Char::from).collect();
254 // An optimisation: if the string is not empty and contains only "safe"
255 // characters we can avoid further work.
256 if esc.is_empty() {
257 Prepared::Empty
258 } else if esc.iter().all(Char::is_inert) {
259 Prepared::Inert
260 } else {
261 Prepared::Escape(esc)
262 }
263 }
264
265 pub fn escape_chars(esc: Vec<Char>, sout: &mut Vec<u8>) {
266 // Push a Bash-style $'...' quoted string into `sout`.
267 sout.extend(b"$'");
268 let buf = &mut [0u8; 4];
269 for mode in esc {
270 use Char::*;
271 match mode {
272 Bell => sout.extend(b"\\a"),
273 Backspace => sout.extend(b"\\b"),
274 Escape => sout.extend(b"\\e"),
275 FormFeed => sout.extend(b"\\f"),
276 NewLine => sout.extend(b"\\n"),
277 CarriageReturn => sout.extend(b"\\r"),
278 HorizontalTab => sout.extend(b"\\t"),
279 VerticalTab => sout.extend(b"\\v"),
280 Control(ch) => sout.extend(&u8_to_hex_escape(ch)),
281 Backslash => sout.extend(b"\\\\"),
282 SingleQuote => sout.extend(b"\\'"),
283 DoubleQuote => sout.extend(b"\""),
284 Delete => sout.extend(b"\\x7F"),
285 PrintableInert(ch) => sout.push(ch),
286 Printable(ch) => sout.push(ch),
287 Utf8(ch) => sout.extend(ch.encode_utf8(buf).as_bytes()),
288 }
289 }
290 sout.push(b'\'');
291 }
292}
293
294// ----------------------------------------------------------------------------
295
296/// Escape a byte as a 4-byte hex escape sequence.
297///
298/// The `\\xHH` format (backslash, a literal "x", two hex characters) is
299/// understood by many shells.
300#[inline]
301fn u8_to_hex_escape(ch: u8) -> [u8; 4] {
302 const HEX_DIGITS: &[u8] = b"0123456789ABCDEF";
303 [
304 b'\\',
305 b'x',
306 HEX_DIGITS[(ch >> 4) as usize],
307 HEX_DIGITS[(ch & 0xF) as usize],
308 ]
309}
310
311#[cfg(test)]
312#[test]
313fn test_u8_to_hex_escape() {
314 for ch in u8::MIN..=u8::MAX {
315 let expected = format!("\\x{ch:02X}");
316 let observed = u8_to_hex_escape(ch);
317 let observed = std::str::from_utf8(&observed).unwrap();
318 assert_eq!(observed, &expected);
319 }
320}