1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
#![doc = include_str!("lib.md")]
//// -- start of boilerplate that's generally pasted into the top of new projects -- ////
#![cfg_attr(all(feature = "clippy-insane", debug_assertions), warn(
    //// Turn the "allow" lints listed by `rustc -W help` ["rustc 1.58.1 (db9d1b20b 2022-01-20)"]
    //// into warn lints:
    absolute_paths_not_starting_with_crate, box_pointers, deprecated_in_future,
    elided_lifetimes_in_paths, explicit_outlives_requirements, keyword_idents,
    macro_use_extern_crate, meta_variable_misuse, missing_abi, missing_copy_implementations,
    missing_debug_implementations, missing_docs, non_ascii_idents, noop_method_call,
    pointer_structural_match, rust_2021_incompatible_closure_captures,
    rust_2021_incompatible_or_patterns, rust_2021_prefixes_incompatible_syntax,
    rust_2021_prelude_collisions, single_use_lifetimes, trivial_casts, trivial_numeric_casts,
    unreachable_pub, unsafe_code, unsafe_op_in_unsafe_fn, unstable_features,
    unused_crate_dependencies, unused_extern_crates, unused_import_braces, unused_lifetimes,
    unused_qualifications, unused_results, variant_size_differences,
    //// Ditto for clippy lint categories (see https://github.com/rust-lang/rust-clippy):
    clippy::all, clippy::cargo, clippy::nursery, clippy::pedantic, clippy::restriction,
) , allow(
    //// turn off individual noisy/buggy lints enabled by broader categories above:
    clippy::blanket_clippy_restriction_lints,
    box_pointers,                          // obsolete
    clippy::default_numeric_fallback,
    clippy::implicit_return,               // not idiomatic Rust
    // clippy::integer_arithmetic,
    clippy::missing_const_for_fn,
    clippy::missing_docs_in_private_items, // hmm
    clippy::missing_inline_in_public_items,
    clippy::shadow_reuse,                  // annoying
    clippy::shadow_same,
    elided_lifetimes_in_paths,
    clippy::ref_patterns,
    clippy::uninlined_format_args,
))]
//// -- end of boilerplate that's generally pasted into the top of new projects -- ////

extern crate alloc;

use alloc::borrow::Cow::{self, Borrowed, Owned};
use core::{fmt::Write as _, str};
use std::ffi::{OsStr, OsString};
#[cfg(unix)] use std::os::unix::prelude::{OsStrExt, OsStringExt};

#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
enum Category {
    // The ordering is important. We must quote a string with the style of the highest category of
    // all the characters: Unquoted < single-quoted < C-quoted.

    // This character can be used as-is anywhere.
    Safe = 0,
    // This character can be used as is, but must be in at least a single-quoted string.
    Quoted = 1,
    // This character must be C-quoted and backslash-escaped and in a C-quoted string.
    Escaped = 2,
    // This character must be octal-escaped and in a C-quoted string.
    Octal = 3,
}
impl Category {
    fn from(b: u8) -> Self {
        match b {
            // These characters are safe to use without quoting or escaping.
            b'+'
            | b','
            | b'-'
            | b'.'
            | b'/'
            | b'0' ..= b'9'
            | b':'
            | b'='
            | b'@'
            | b'A' ..= b'Z'
            | b'_'
            | b'a' ..= b'z' => Self::Safe,
            // Control and high-bit-set characters require C-quoting and \ooo-escaping.
            0 ..= 31 | 127 ..= 255 => Self::Octal,
            // A single quote or backslash must be C-quoted and backslash-escaped. Technically, we
            // can get away with just single-quoting backslashes, but they then must _not_ be
            // backslash-escaped. Since we don't know if a subsequent character might need to be
            // C-quoted, we play it safe.
            b'\'' | b'\\' => Self::Escaped,
            // Other characters are safe provided they are at least single-quoted.
            _ => Self::Quoted,
        }
    }
}

fn quotemeta_inner(s: &[u8]) -> Cow<str> {
    let category = s
        .iter()
        .copied()
        .map(Category::from)
        .max()
        .unwrap_or(Category::Safe);

    let (prefix, suffix) = match category {
        Category::Safe => {
            // The entire input is from the safe set so does not require escaping, so we can return
            // it. However, Rust doesn't know this so we need to do the UTF-8 check to avoid unsafe.
            if let Ok(s) = str::from_utf8(s) {
                return Borrowed(s);
            }
            // The above should never fail, but if it does, we fallback to an unquoted string.
            ("", "")
        }
        Category::Quoted => ("'", "'"),
        Category::Escaped | Category::Octal => ("$'", "'"),
    };

    // This will certainly need to be resized at least once, but let's at least avoid needing to
    // resize it log2(N) times.
    let mut result = String::with_capacity(s.len());
    result.push_str(prefix);
    for c in s {
        match Category::from(*c) {
            Category::Safe | Category::Quoted => {
                result.push(char::from(*c));
            }
            Category::Escaped => {
                result.push('\\');
                result.push(char::from(*c));
            }
            Category::Octal => {
                let _ignored = write!(result, r"\{:03o}", c);
            }
        }
    }
    result.push_str(suffix);
    Owned(result)
}

/// Shell-quotes the given OS string into a string.
///
/// This takes any `&AsRef<OsStr>`, so accepts `&str`/`&String`, `&Path`/`&PathBuf`,
/// `OsStr`/`&OsString`, and so on. Note that it's a _reference_ to an `AsRef` (for tedious
/// lifetime-related reasons) and so a plain `String` etc doesn't work.
///
/// Although this is an implementation detail which may change, strings which do not need to be
/// escaped are returned as-is, those with high-bit-set octets will be ANSI-C quoted, and otherwise
/// they will be single-quoted.
///
/// ```
/// use quotemeta::quotemeta;
/// use std::path::Path;
///
/// // "Boring" Unix paths do not need to be quoted.
/// assert_eq!(quotemeta("/bin/cat"), "/bin/cat");
/// // Spaces etc are single-quoted.
/// assert_eq!(quotemeta("Hello, world"), "'Hello, world'");
/// // Unicode gets C-quoted.
/// assert_eq!(quotemeta("\u{1f980}"), r"$'\360\237\246\200'");
/// // It handles `Path`s
/// assert_eq!(quotemeta(Path::new("/etc/passwd")), "/etc/passwd");
/// ```
#[cfg(unix)]
pub fn quotemeta<P: AsRef<OsStr> + ?Sized>(path: &P) -> Cow<str> {
    let path = path.as_ref();
    let result = quotemeta_inner(path.as_bytes());
    result
}

fn unquotemeta_inner(mut s: &[u8]) -> Cow<[u8]> {
    // Strip "$'...'" or "'...'" to produce "...".
    s = match *s {
        [b'$', b'\'', ref s @ .., b'\''] | [b'\'', ref s @ .., b'\''] | ref s => s,
    };

    // If there are no backslashes, there's nothing to decode, so we return the stripped string.
    if s.iter().all(|b| *b != b'\\') {
        return Borrowed(s);
    }

    let mut result = Vec::with_capacity(s.len());

    loop {
        s = match *s {
            [] => break,
            [b'\\', ref rest @ ..] => {
                let (byte, rest) = match *rest {
                    // escaped backslash or single quote is de-backslashed.
                    [byte @ (b'\'' | b'\\'), ref rest @ ..] => (byte, rest),
                    // octal escape is decoded.
                    [
                        a @ b'0' ..= b'3',
                        b @ b'0' ..= b'7',
                        c @ b'0' ..= b'7',
                        ref rest @ ..,
                    ] => ((a & 3) << 6 | (b & 7) << 3 | (c & 7), rest),
                    // unparseable escape is left as-is.
                    ref rest => (b'\\', rest),
                };
                result.push(byte);
                rest
            }
            ref rest => {
                let next_slash = rest.iter().position(|b| *b == b'\\').unwrap_or(rest.len());
                let (literal, rest) = rest.split_at(next_slash);
                result.extend_from_slice(literal);
                rest
            }
        };
    }

    Owned(result)
}

fn map_cow<T: ToOwned + ?Sized, U: ToOwned + ?Sized>(
    c: Cow<'_, T>, map_borrow: impl FnOnce(&T) -> &U, map_owned: impl FnOnce(T::Owned) -> U::Owned,
) -> Cow<'_, U> {
    match c {
        Borrowed(borrowed) => Borrowed(map_borrow(borrowed)),
        Owned(owned) => Owned(map_owned(owned)),
    }
}

/// Shell-unquotes a string into an OS string.
///
/// This takes any `&AsRef<[u8]>`, so accepts `&str`/`&String`, `&[u8]`/`Vec<u8>`, and so on. Note
/// that it's a _reference_ to an `AsRef` (for tedious lifetime-related reasons) and so a plain
/// `String` etc doesn't work.
///
/// This is the inverse of `[quotemeta]`, and round-trips its output back into the original OS
/// string. It obviously does not handle arbitrary shell quoting and escaping syntax because it is
/// not a shell, but will otherwise return something reasonable without panicking.
///
/// ```
/// use quotemeta::unquotemeta;
/// use std::ffi::OsStr;
///
/// // Inverse of `quotemeta` examples:
/// assert_eq!(unquotemeta("/bin/cat"), OsStr::new("/bin/cat"));
/// assert_eq!(unquotemeta("'Hello, world'"), OsStr::new("Hello, world"));
/// assert_eq!(unquotemeta(r"$'\360\237\246\200'"), OsStr::new("\u{1f980}"));
/// // `unquotemeta` is not a shell:
/// assert_eq!(unquotemeta(r"~"), OsStr::new("~"));
/// ```
#[cfg(unix)]
pub fn unquotemeta<P: AsRef<[u8]> + ?Sized>(buf: &P) -> Cow<OsStr> {
    map_cow(
        unquotemeta_inner(buf.as_ref()),
        OsStr::from_bytes,
        OsString::from_vec,
    )
}