shell_quote/
sh.rs

1#![cfg(feature = "sh")]
2
3use crate::{ascii::Char, Quotable, QuoteInto};
4
5/// Quote byte strings for use with `/bin/sh`.
6///
7/// # ⚠️ Warning
8///
9/// There is no escape sequence for bytes between 0x80 and 0xFF – these must be
10/// reproduced exactly in the quoted output – hence **it is not possible to
11/// safely create or quote into an existing [`String`]** with [`Sh`] because
12/// these bytes would be misinterpreted as a second or subsequent byte of a
13/// [multi-byte UTF-8 code point representation][utf-8-encoding].
14///
15/// [utf-8-encoding]: https://en.wikipedia.org/wiki/UTF-8#Encoding
16///
17/// If you're not using bytes between 0x80 and 0xFF, a workaround is to instead
18/// quote into a [`Vec<u8>`] and convert that into a string using
19/// [`String::from_utf8`]. The key difference is that `from_utf8` returns a
20/// [`Result`] which the caller must deal with.
21///
22/// # Compatibility
23///
24/// Quoted/escaped strings produced by [`Sh`] also work in Bash, Dash, and Z
25/// Shell.
26///
27/// The quoted/escaped strings it produces are different to those coming from
28/// [`Bash`][`crate::Bash`] or its alias [`Zsh`][`crate::Zsh`]. Those strings
29/// won't work in a pure `/bin/sh` shell like Dash, but they are better for
30/// humans to read, to copy and paste. For example, [`Sh`] does not (and cannot)
31/// escape control characters, but characters like `BEL` and `TAB` (and others)
32/// are represented by `\\a` and `\\t` respectively by [`Bash`][`crate::Bash`].
33///
34/// # Notes
35///
36/// I wasn't able to find any definitive statement of exactly how Bourne Shell
37/// strings should be quoted, mainly because "Bourne Shell" or `/bin/sh` can
38/// refer to many different pieces of software: Bash has a Bourne Shell mode,
39/// `/bin/sh` on Ubuntu is actually Dash, and on macOS 12.3 (and later, and
40/// possibly earlier) all bets are off:
41///
42/// > `sh` is a POSIX-compliant command interpreter (shell). It is implemented
43/// > by re-execing as either `bash(1)`, `dash(1)`, or `zsh(1)` as determined by
44/// > the symbolic link located at `/private/var/select/sh`. If
45/// > `/private/var/select/sh` does not exist or does not point to a valid
46/// > shell, `sh` will use one of the supported shells.
47///
48/// However, [dash](https://en.wikipedia.org/wiki/Almquist_shell#dash) appears
49/// to be the de facto `/bin/sh` these days, having been formally adopted in
50/// Ubuntu and Debian, and also available as `/bin/dash` on macOS.
51///
52/// From dash(1):
53///
54/// > ## Quoting
55/// >
56/// >   Quoting is used to remove the special meaning of certain characters or
57/// >   words to the shell, such as operators, whitespace, or keywords.  There
58/// >   are three types of quoting: matched single quotes, matched double
59/// >   quotes, and backslash.
60/// >
61/// > ## Backslash
62/// >
63/// >   A backslash preserves the literal meaning of the following character,
64/// >   with the exception of ⟨newline⟩.  A backslash preceding a ⟨newline⟩ is
65/// >   treated as a line continuation.
66/// >
67/// > ## Single Quotes
68/// >
69/// >   Enclosing characters in single quotes preserves the literal meaning of
70/// >   all the characters (except single quotes, making it impossible to put
71/// >   single-quotes in a single-quoted string).
72/// >
73/// > ## Double Quotes
74/// >
75/// >   Enclosing characters within double quotes preserves the literal meaning
76/// >   of all characters except dollarsign ($), backquote (`), and backslash
77/// >   (\).  The backslash inside double quotes is historically weird, and
78/// >   serves to quote only the following characters:
79/// >
80/// >   ```text
81/// >   $ ` " \ <newline>.
82/// >   ```
83/// >
84/// >   Otherwise it remains literal.
85///
86/// The code in this module operates byte by byte, making no special allowances
87/// for multi-byte character sets. In other words, it's up to the caller to
88/// figure out encoding for non-ASCII characters. A significant use case for
89/// this code is to quote filenames into scripts, and on *nix variants I
90/// understand that filenames are essentially arrays of bytes, even if the OS
91/// adds some normalisation and case-insensitivity on top.
92///
93#[derive(Debug, Clone, Copy)]
94pub struct Sh;
95
96impl QuoteInto<Vec<u8>> for Sh {
97    fn quote_into<'q, S: Into<Quotable<'q>>>(s: S, out: &mut Vec<u8>) {
98        Self::quote_into_vec(s, out);
99    }
100}
101
102#[cfg(unix)]
103impl QuoteInto<std::ffi::OsString> for Sh {
104    fn quote_into<'q, S: Into<Quotable<'q>>>(s: S, out: &mut std::ffi::OsString) {
105        use std::os::unix::ffi::OsStringExt;
106        let s = Self::quote_vec(s);
107        let s = std::ffi::OsString::from_vec(s);
108        out.push(s);
109    }
110}
111
112#[cfg(feature = "bstr")]
113impl QuoteInto<bstr::BString> for Sh {
114    fn quote_into<'q, S: Into<Quotable<'q>>>(s: S, out: &mut bstr::BString) {
115        let s = Self::quote_vec(s);
116        out.extend(s);
117    }
118}
119
120impl Sh {
121    /// Quote a string of bytes into a new `Vec<u8>`.
122    ///
123    /// This will return one of the following:
124    /// - The string as-is, if no quoting is necessary.
125    /// - A string containing single-quoted sections, like `foo' bar'`.
126    ///
127    /// See [`quote_into_vec`][`Self::quote_into_vec`] for a variant that
128    /// extends an existing `Vec` instead of allocating a new one.
129    ///
130    /// # Examples
131    ///
132    /// ```
133    /// # use shell_quote::Sh;
134    /// assert_eq!(Sh::quote_vec("foobar"), b"foobar");
135    /// assert_eq!(Sh::quote_vec("foo bar"), b"foo' bar'");
136    /// ```
137    ///
138    pub fn quote_vec<'a, S: Into<Quotable<'a>>>(s: S) -> Vec<u8> {
139        let bytes = match s.into() {
140            Quotable::Bytes(bytes) => bytes,
141            Quotable::Text(s) => s.as_bytes(),
142        };
143        match escape_prepare(bytes) {
144            Prepared::Empty => vec![b'\'', b'\''],
145            Prepared::Inert => bytes.into(),
146            Prepared::Escape(esc) => {
147                // Here, previously, an optimisation precalculated the required
148                // capacity of the output `Vec` to avoid reallocations later on,
149                // but benchmarks showed that it was slower. It _may_ have
150                // lowered maximum RAM required, but that was not measured.
151                let mut sout = Vec::new();
152                escape_chars(esc, &mut sout);
153                sout
154            }
155        }
156    }
157
158    /// Quote a string of bytes into an existing `Vec<u8>`.
159    ///
160    /// See [`quote_vec`][`Self::quote_vec`] for more details.
161    ///
162    /// # Examples
163    ///
164    /// ```
165    /// # use shell_quote::Sh;
166    /// let mut buf = Vec::with_capacity(128);
167    /// Sh::quote_into_vec("foobar", &mut buf);
168    /// buf.push(b' ');  // Add a space.
169    /// Sh::quote_into_vec("foo bar", &mut buf);
170    /// assert_eq!(buf, b"foobar foo' bar'");
171    /// ```
172    ///
173    pub fn quote_into_vec<'a, S: Into<Quotable<'a>>>(s: S, sout: &mut Vec<u8>) {
174        let bytes = match s.into() {
175            Quotable::Bytes(bytes) => bytes,
176            Quotable::Text(s) => s.as_bytes(),
177        };
178        match escape_prepare(bytes) {
179            Prepared::Empty => sout.extend(b"''"),
180            Prepared::Inert => sout.extend(bytes),
181            Prepared::Escape(esc) => {
182                // Here, previously, an optimisation precalculated the required
183                // capacity of the output `Vec` to avoid reallocations later on,
184                // but benchmarks showed that it was slower. It _may_ have
185                // lowered maximum RAM required, but that was not measured.
186                escape_chars(esc, sout);
187            }
188        }
189    }
190}
191
192// ----------------------------------------------------------------------------
193
194enum Prepared {
195    Empty,
196    Inert,
197    Escape(Vec<Char>),
198}
199
200fn escape_prepare(sin: &[u8]) -> Prepared {
201    let esc: Vec<_> = sin.iter().map(Char::from).collect();
202    // An optimisation: if the string is not empty and contains only "safe"
203    // characters we can avoid further work.
204    if esc.is_empty() {
205        Prepared::Empty
206    } else if esc.iter().all(Char::is_inert) {
207        Prepared::Inert
208    } else {
209        Prepared::Escape(esc)
210    }
211}
212
213fn escape_chars(esc: Vec<Char>, sout: &mut Vec<u8>) {
214    let mut inside_quotes = false;
215    for mode in esc {
216        use Char::*;
217        match mode {
218            PrintableInert(ch) | Extended(ch) => sout.push(ch),
219            Control(ch) | Printable(ch) => {
220                if inside_quotes {
221                    sout.push(ch);
222                } else {
223                    sout.push(b'\'');
224                    inside_quotes = true;
225                    sout.push(ch);
226                }
227            }
228            SingleQuote => {
229                if inside_quotes {
230                    sout.extend(b"'\\'");
231                    inside_quotes = false;
232                } else {
233                    sout.extend(b"\\'");
234                }
235            }
236            ch => {
237                if inside_quotes {
238                    sout.push(ch.code());
239                } else {
240                    sout.push(b'\'');
241                    inside_quotes = true;
242                    sout.push(ch.code());
243                }
244            }
245        }
246    }
247    if inside_quotes {
248        sout.push(b'\'');
249    }
250}