shell_quote/
bash.rs

1#![cfg(feature = "bash")]
2
3use crate::{Quotable, QuoteInto};
4
5/// Quote byte strings for use with Bash, the GNU Bourne-Again Shell.
6///
7/// # Compatibility
8///
9/// Quoted/escaped strings produced by [`Bash`] work in both Bash and Z Shell.
10///
11/// # ⚠️ Warning
12///
13/// It is _possible_ to encode NUL in a Bash string, but Bash appears to then
14/// truncate the rest of the string after that point **or** sometimes it filters
15/// the NUL out. It's not yet clear to me when/why each behaviour is chosen.
16///
17/// If you're quoting UTF-8 content this may not be a problem since there is
18/// only one code point – the null character itself – that will ever produce a
19/// NUL byte. To avoid this problem entirely, consider using [Modified
20/// UTF-8][modified-utf-8] so that the NUL byte can never appear in a valid byte
21/// stream.
22///
23/// [modified-utf-8]: https://en.wikipedia.org/wiki/UTF-8#Modified_UTF-8
24///
25/// # Notes
26///
27/// From bash(1):
28///
29///   Words of the form $'string' are treated specially. The word expands to
30///   string, with backslash-escaped characters replaced as specified by the
31///   ANSI C standard. Backslash escape sequences, if present, are decoded as
32///   follows:
33///
34///   ```text
35///   \a     alert (bell)
36///   \b     backspace
37///   \e     an escape character
38///   \f     form feed
39///   \n     new line
40///   \r     carriage return
41///   \t     horizontal tab
42///   \v     vertical tab
43///   \\     backslash
44///   \'     single quote
45///   \nnn   the eight-bit character whose value is the
46///          octal value nnn (one to three digits)
47///   \xHH   the eight-bit character whose value is the
48///          hexadecimal value HH (one or two hex digits)
49///   \cx    a control-x character
50///   ```
51///
52/// Bash allows, in newer versions, for non-ASCII Unicode characters with
53/// `\uHHHH` and `\UXXXXXXXX` syntax inside these [ANSI C quoted
54/// strings][ansi-c-quoting], but we avoid this and work only with bytes. Part
55/// of the problem is that it's not clear how Bash then works with these
56/// strings. Does it encode these characters into bytes according to the user's
57/// current locale? Are strings in Bash now natively Unicode?
58///
59/// For now it's up to the caller to figure out encoding. A significant use case
60/// for this code is to quote filenames into scripts, and on *nix variants I
61/// understand that filenames are essentially arrays of bytes, even if the OS
62/// adds some normalisation and case-insensitivity on top.
63///
64/// [ansi-c-quoting]:
65///     https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html
66///
67#[derive(Debug, Clone, Copy)]
68pub struct Bash;
69
70// ----------------------------------------------------------------------------
71
72impl QuoteInto<Vec<u8>> for Bash {
73    fn quote_into<'q, S: Into<Quotable<'q>>>(s: S, out: &mut Vec<u8>) {
74        Self::quote_into_vec(s, out);
75    }
76}
77
78impl QuoteInto<String> for Bash {
79    fn quote_into<'q, S: Into<Quotable<'q>>>(s: S, out: &mut String) {
80        Self::quote_into_vec(s, unsafe { out.as_mut_vec() })
81    }
82}
83
84#[cfg(unix)]
85impl QuoteInto<std::ffi::OsString> for Bash {
86    fn quote_into<'q, S: Into<Quotable<'q>>>(s: S, out: &mut std::ffi::OsString) {
87        use std::os::unix::ffi::OsStringExt;
88        let s = Self::quote_vec(s);
89        let s = std::ffi::OsString::from_vec(s);
90        out.push(s);
91    }
92}
93
94#[cfg(feature = "bstr")]
95impl QuoteInto<bstr::BString> for Bash {
96    fn quote_into<'q, S: Into<Quotable<'q>>>(s: S, out: &mut bstr::BString) {
97        let s = Self::quote_vec(s);
98        out.extend(s);
99    }
100}
101
102// ----------------------------------------------------------------------------
103
104impl Bash {
105    /// Quote a string of bytes into a new `Vec<u8>`.
106    ///
107    /// This will return one of the following:
108    /// - The string as-is, if no escaping is necessary.
109    /// - An [ANSI-C escaped string][ansi-c-quoting], like `$'foo\nbar'`.
110    ///
111    /// See [`quote_into_vec`][`Self::quote_into_vec`] for a variant that
112    /// extends an existing `Vec` instead of allocating a new one.
113    ///
114    /// # Examples
115    ///
116    /// ```
117    /// # use shell_quote::Bash;
118    /// assert_eq!(Bash::quote_vec("foobar"), b"foobar");
119    /// assert_eq!(Bash::quote_vec("foo bar"), b"$'foo bar'");
120    /// ```
121    ///
122    /// [ansi-c-quoting]:
123    ///     https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html
124    ///
125    pub fn quote_vec<'a, S: Into<Quotable<'a>>>(s: S) -> Vec<u8> {
126        // Here, previously, in the `Escape` cases, an optimisation
127        // precalculated the required capacity of the output `Vec` to avoid
128        // reallocations later on, but benchmarks showed that it was slower. It
129        // _may_ have lowered maximum RAM required, but that was not measured.
130        match s.into() {
131            Quotable::Bytes(bytes) => match bytes::escape_prepare(bytes) {
132                bytes::Prepared::Empty => vec![b'\'', b'\''],
133                bytes::Prepared::Inert => bytes.into(),
134                bytes::Prepared::Escape(esc) => {
135                    let mut sout = Vec::new();
136                    bytes::escape_chars(esc, &mut sout);
137                    sout
138                }
139            },
140            Quotable::Text(text) => match text::escape_prepare(text) {
141                text::Prepared::Empty => vec![b'\'', b'\''],
142                text::Prepared::Inert => text.into(),
143                text::Prepared::Escape(esc) => {
144                    let mut sout = Vec::new();
145                    text::escape_chars(esc, &mut sout);
146                    sout
147                }
148            },
149        }
150    }
151
152    /// Quote a string of bytes into an existing `Vec<u8>`.
153    ///
154    /// See [`quote_vec`][`Self::quote_vec`] for more details.
155    ///
156    /// # Examples
157    ///
158    /// ```
159    /// # use shell_quote::Bash;
160    /// let mut buf = Vec::with_capacity(128);
161    /// Bash::quote_into_vec("foobar", &mut buf);
162    /// buf.push(b' ');  // Add a space.
163    /// Bash::quote_into_vec("foo bar", &mut buf);
164    /// assert_eq!(buf, b"foobar $'foo bar'");
165    /// ```
166    ///
167    pub fn quote_into_vec<'a, S: Into<Quotable<'a>>>(s: S, sout: &mut Vec<u8>) {
168        // Here, previously, in the `Escape` cases, an optimisation
169        // precalculated the required capacity of the output `Vec` to avoid
170        // reallocations later on, but benchmarks showed that it was slower. It
171        // _may_ have lowered maximum RAM required, but that was not measured.
172        match s.into() {
173            Quotable::Bytes(bytes) => match bytes::escape_prepare(bytes) {
174                bytes::Prepared::Empty => sout.extend(b"''"),
175                bytes::Prepared::Inert => sout.extend(bytes),
176                bytes::Prepared::Escape(esc) => bytes::escape_chars(esc, sout),
177            },
178            Quotable::Text(text) => match text::escape_prepare(text) {
179                text::Prepared::Empty => sout.extend(b"''"),
180                text::Prepared::Inert => sout.extend(text.as_bytes()),
181                text::Prepared::Escape(esc) => text::escape_chars(esc, sout),
182            },
183        }
184    }
185}
186
187// ----------------------------------------------------------------------------
188
189mod bytes {
190    use super::u8_to_hex_escape;
191    use crate::ascii::Char;
192
193    pub enum Prepared {
194        Empty,
195        Inert,
196        Escape(Vec<Char>),
197    }
198
199    pub fn escape_prepare(sin: &[u8]) -> Prepared {
200        let esc: Vec<_> = sin.iter().map(Char::from).collect();
201        // An optimisation: if the string is not empty and contains only "safe"
202        // characters we can avoid further work.
203        if esc.is_empty() {
204            Prepared::Empty
205        } else if esc.iter().all(Char::is_inert) {
206            Prepared::Inert
207        } else {
208            Prepared::Escape(esc)
209        }
210    }
211
212    pub fn escape_chars(esc: Vec<Char>, sout: &mut Vec<u8>) {
213        // Push a Bash-style $'...' quoted string into `sout`.
214        sout.extend(b"$'");
215        for mode in esc {
216            use Char::*;
217            match mode {
218                Bell => sout.extend(b"\\a"),
219                Backspace => sout.extend(b"\\b"),
220                Escape => sout.extend(b"\\e"),
221                FormFeed => sout.extend(b"\\f"),
222                NewLine => sout.extend(b"\\n"),
223                CarriageReturn => sout.extend(b"\\r"),
224                HorizontalTab => sout.extend(b"\\t"),
225                VerticalTab => sout.extend(b"\\v"),
226                Control(ch) => sout.extend(&u8_to_hex_escape(ch)),
227                Backslash => sout.extend(b"\\\\"),
228                SingleQuote => sout.extend(b"\\'"),
229                DoubleQuote => sout.extend(b"\""),
230                Delete => sout.extend(b"\\x7F"),
231                PrintableInert(ch) => sout.push(ch),
232                Printable(ch) => sout.push(ch),
233                Extended(ch) => sout.extend(&u8_to_hex_escape(ch)),
234            }
235        }
236        sout.push(b'\'');
237    }
238}
239
240// ----------------------------------------------------------------------------
241
242mod text {
243    use super::u8_to_hex_escape;
244    use crate::utf8::Char;
245
246    pub enum Prepared {
247        Empty,
248        Inert,
249        Escape(Vec<Char>),
250    }
251
252    pub fn escape_prepare(sin: &str) -> Prepared {
253        let esc: Vec<_> = sin.chars().map(Char::from).collect();
254        // An optimisation: if the string is not empty and contains only "safe"
255        // characters we can avoid further work.
256        if esc.is_empty() {
257            Prepared::Empty
258        } else if esc.iter().all(Char::is_inert) {
259            Prepared::Inert
260        } else {
261            Prepared::Escape(esc)
262        }
263    }
264
265    pub fn escape_chars(esc: Vec<Char>, sout: &mut Vec<u8>) {
266        // Push a Bash-style $'...' quoted string into `sout`.
267        sout.extend(b"$'");
268        let buf = &mut [0u8; 4];
269        for mode in esc {
270            use Char::*;
271            match mode {
272                Bell => sout.extend(b"\\a"),
273                Backspace => sout.extend(b"\\b"),
274                Escape => sout.extend(b"\\e"),
275                FormFeed => sout.extend(b"\\f"),
276                NewLine => sout.extend(b"\\n"),
277                CarriageReturn => sout.extend(b"\\r"),
278                HorizontalTab => sout.extend(b"\\t"),
279                VerticalTab => sout.extend(b"\\v"),
280                Control(ch) => sout.extend(&u8_to_hex_escape(ch)),
281                Backslash => sout.extend(b"\\\\"),
282                SingleQuote => sout.extend(b"\\'"),
283                DoubleQuote => sout.extend(b"\""),
284                Delete => sout.extend(b"\\x7F"),
285                PrintableInert(ch) => sout.push(ch),
286                Printable(ch) => sout.push(ch),
287                Utf8(ch) => sout.extend(ch.encode_utf8(buf).as_bytes()),
288            }
289        }
290        sout.push(b'\'');
291    }
292}
293
294// ----------------------------------------------------------------------------
295
296/// Escape a byte as a 4-byte hex escape sequence.
297///
298/// The `\\xHH` format (backslash, a literal "x", two hex characters) is
299/// understood by many shells.
300#[inline]
301fn u8_to_hex_escape(ch: u8) -> [u8; 4] {
302    const HEX_DIGITS: &[u8] = b"0123456789ABCDEF";
303    [
304        b'\\',
305        b'x',
306        HEX_DIGITS[(ch >> 4) as usize],
307        HEX_DIGITS[(ch & 0xF) as usize],
308    ]
309}
310
311#[cfg(test)]
312#[test]
313fn test_u8_to_hex_escape() {
314    for ch in u8::MIN..=u8::MAX {
315        let expected = format!("\\x{ch:02X}");
316        let observed = u8_to_hex_escape(ch);
317        let observed = std::str::from_utf8(&observed).unwrap();
318        assert_eq!(observed, &expected);
319    }
320}