1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
#![doc = include_str!("lib.md")]
//// -- start of boilerplate that's generally pasted into the top of new projects -- ////
#![cfg_attr(all(feature = "clippy-insane", debug_assertions), warn(
//// Turn the "allow" lints listed by `rustc -W help` ["rustc 1.58.1 (db9d1b20b 2022-01-20)"]
//// into warn lints:
absolute_paths_not_starting_with_crate, box_pointers, deprecated_in_future,
elided_lifetimes_in_paths, explicit_outlives_requirements, keyword_idents,
macro_use_extern_crate, meta_variable_misuse, missing_abi, missing_copy_implementations,
missing_debug_implementations, missing_docs, non_ascii_idents, noop_method_call,
pointer_structural_match, rust_2021_incompatible_closure_captures,
rust_2021_incompatible_or_patterns, rust_2021_prefixes_incompatible_syntax,
rust_2021_prelude_collisions, single_use_lifetimes, trivial_casts, trivial_numeric_casts,
unreachable_pub, unsafe_code, unsafe_op_in_unsafe_fn, unstable_features,
unused_crate_dependencies, unused_extern_crates, unused_import_braces, unused_lifetimes,
unused_qualifications, unused_results, variant_size_differences,
//// Ditto for clippy lint categories (see https://github.com/rust-lang/rust-clippy):
clippy::all, clippy::cargo, clippy::nursery, clippy::pedantic, clippy::restriction,
) , allow(
//// turn off individual noisy/buggy lints enabled by broader categories above:
clippy::blanket_clippy_restriction_lints,
box_pointers, // obsolete
clippy::default_numeric_fallback,
clippy::implicit_return, // not idiomatic Rust
// clippy::integer_arithmetic,
clippy::missing_const_for_fn,
clippy::missing_docs_in_private_items, // hmm
clippy::missing_inline_in_public_items,
clippy::shadow_reuse, // annoying
clippy::shadow_same,
elided_lifetimes_in_paths,
clippy::ref_patterns,
clippy::uninlined_format_args,
))]
//// -- end of boilerplate that's generally pasted into the top of new projects -- ////
extern crate alloc;
use alloc::borrow::Cow::{self, Borrowed, Owned};
use core::{fmt::Write as _, str};
use std::ffi::{OsStr, OsString};
#[cfg(unix)] use std::os::unix::prelude::{OsStrExt, OsStringExt};
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
enum Category {
// The ordering is important. We must quote a string with the style of the highest category of
// all the characters: Unquoted < single-quoted < C-quoted.
// This character can be used as-is anywhere.
Safe = 0,
// This character can be used as is, but must be in at least a single-quoted string.
Quoted = 1,
// This character must be C-quoted and backslash-escaped and in a C-quoted string.
Escaped = 2,
// This character must be octal-escaped and in a C-quoted string.
Octal = 3,
}
impl Category {
fn from(b: u8) -> Self {
match b {
// These characters are safe to use without quoting or escaping.
b'+'
| b','
| b'-'
| b'.'
| b'/'
| b'0' ..= b'9'
| b':'
| b'='
| b'@'
| b'A' ..= b'Z'
| b'_'
| b'a' ..= b'z' => Self::Safe,
// Control and high-bit-set characters require C-quoting and \ooo-escaping.
0 ..= 31 | 127 ..= 255 => Self::Octal,
// A single quote or backslash must be C-quoted and backslash-escaped. Technically, we
// can get away with just single-quoting backslashes, but they then must _not_ be
// backslash-escaped. Since we don't know if a subsequent character might need to be
// C-quoted, we play it safe.
b'\'' | b'\\' => Self::Escaped,
// Other characters are safe provided they are at least single-quoted.
_ => Self::Quoted,
}
}
}
fn quotemeta_inner(s: &[u8]) -> Cow<str> {
let category = s
.iter()
.copied()
.map(Category::from)
.max()
.unwrap_or(Category::Safe);
let (prefix, suffix) = match category {
Category::Safe => {
// The entire input is from the safe set so does not require escaping, so we can return
// it. However, Rust doesn't know this so we need to do the UTF-8 check to avoid unsafe.
if let Ok(s) = str::from_utf8(s) {
return Borrowed(s);
}
// The above should never fail, but if it does, we fallback to an unquoted string.
("", "")
}
Category::Quoted => ("'", "'"),
Category::Escaped | Category::Octal => ("$'", "'"),
};
// This will certainly need to be resized at least once, but let's at least avoid needing to
// resize it log2(N) times.
let mut result = String::with_capacity(s.len());
result.push_str(prefix);
for c in s {
match Category::from(*c) {
Category::Safe | Category::Quoted => {
result.push(char::from(*c));
}
Category::Escaped => {
result.push('\\');
result.push(char::from(*c));
}
Category::Octal => {
let _ignored = write!(result, r"\{:03o}", c);
}
}
}
result.push_str(suffix);
Owned(result)
}
/// Shell-quotes the given OS string into a string.
///
/// This takes any `&AsRef<OsStr>`, so accepts `&str`/`&String`, `&Path`/`&PathBuf`,
/// `OsStr`/`&OsString`, and so on. Note that it's a _reference_ to an `AsRef` (for tedious
/// lifetime-related reasons) and so a plain `String` etc doesn't work.
///
/// Although this is an implementation detail which may change, strings which do not need to be
/// escaped are returned as-is, those with high-bit-set octets will be ANSI-C quoted, and otherwise
/// they will be single-quoted.
///
/// ```
/// use quotemeta::quotemeta;
/// use std::path::Path;
///
/// // "Boring" Unix paths do not need to be quoted.
/// assert_eq!(quotemeta("/bin/cat"), "/bin/cat");
/// // Spaces etc are single-quoted.
/// assert_eq!(quotemeta("Hello, world"), "'Hello, world'");
/// // Unicode gets C-quoted.
/// assert_eq!(quotemeta("\u{1f980}"), r"$'\360\237\246\200'");
/// // It handles `Path`s
/// assert_eq!(quotemeta(Path::new("/etc/passwd")), "/etc/passwd");
/// ```
#[cfg(unix)]
pub fn quotemeta<P: AsRef<OsStr> + ?Sized>(path: &P) -> Cow<str> {
let path = path.as_ref();
let result = quotemeta_inner(path.as_bytes());
result
}
fn unquotemeta_inner(mut s: &[u8]) -> Cow<[u8]> {
// Strip "$'...'" or "'...'" to produce "...".
s = match *s {
[b'$', b'\'', ref s @ .., b'\''] | [b'\'', ref s @ .., b'\''] | ref s => s,
};
// If there are no backslashes, there's nothing to decode, so we return the stripped string.
if s.iter().all(|b| *b != b'\\') {
return Borrowed(s);
}
let mut result = Vec::with_capacity(s.len());
loop {
s = match *s {
[] => break,
[b'\\', ref rest @ ..] => {
let (byte, rest) = match *rest {
// escaped backslash or single quote is de-backslashed.
[byte @ (b'\'' | b'\\'), ref rest @ ..] => (byte, rest),
// octal escape is decoded.
[
a @ b'0' ..= b'3',
b @ b'0' ..= b'7',
c @ b'0' ..= b'7',
ref rest @ ..,
] => ((a & 3) << 6 | (b & 7) << 3 | (c & 7), rest),
// unparseable escape is left as-is.
ref rest => (b'\\', rest),
};
result.push(byte);
rest
}
ref rest => {
let next_slash = rest.iter().position(|b| *b == b'\\').unwrap_or(rest.len());
let (literal, rest) = rest.split_at(next_slash);
result.extend_from_slice(literal);
rest
}
};
}
Owned(result)
}
fn map_cow<T: ToOwned + ?Sized, U: ToOwned + ?Sized>(
c: Cow<'_, T>, map_borrow: impl FnOnce(&T) -> &U, map_owned: impl FnOnce(T::Owned) -> U::Owned,
) -> Cow<'_, U> {
match c {
Borrowed(borrowed) => Borrowed(map_borrow(borrowed)),
Owned(owned) => Owned(map_owned(owned)),
}
}
/// Shell-unquotes a string into an OS string.
///
/// This takes any `&AsRef<[u8]>`, so accepts `&str`/`&String`, `&[u8]`/`Vec<u8>`, and so on. Note
/// that it's a _reference_ to an `AsRef` (for tedious lifetime-related reasons) and so a plain
/// `String` etc doesn't work.
///
/// This is the inverse of `[quotemeta]`, and round-trips its output back into the original OS
/// string. It obviously does not handle arbitrary shell quoting and escaping syntax because it is
/// not a shell, but will otherwise return something reasonable without panicking.
///
/// ```
/// use quotemeta::unquotemeta;
/// use std::ffi::OsStr;
///
/// // Inverse of `quotemeta` examples:
/// assert_eq!(unquotemeta("/bin/cat"), OsStr::new("/bin/cat"));
/// assert_eq!(unquotemeta("'Hello, world'"), OsStr::new("Hello, world"));
/// assert_eq!(unquotemeta(r"$'\360\237\246\200'"), OsStr::new("\u{1f980}"));
/// // `unquotemeta` is not a shell:
/// assert_eq!(unquotemeta(r"~"), OsStr::new("~"));
/// ```
#[cfg(unix)]
pub fn unquotemeta<P: AsRef<[u8]> + ?Sized>(buf: &P) -> Cow<OsStr> {
map_cow(
unquotemeta_inner(buf.as_ref()),
OsStr::from_bytes,
OsString::from_vec,
)
}