extern crate alloc;
use alloc::vec::Vec;
use alloc::borrow::Cow;
#[cfg(test)]
use alloc::vec;
#[cfg(test)]
use alloc::borrow::ToOwned;
#[cfg(all(doc, not(doctest)))]
use crate::{self as shlex, quoting_warning};
use super::QuoteError;
pub struct Shlex<'a> {
in_iter: core::slice::Iter<'a, u8>,
pub line_no: usize,
pub had_error: bool,
}
impl<'a> Shlex<'a> {
pub fn new(in_bytes: &'a [u8]) -> Self {
Shlex {
in_iter: in_bytes.iter(),
line_no: 1,
had_error: false,
}
}
fn parse_word(&mut self, mut ch: u8) -> Option<Vec<u8>> {
let mut result: Vec<u8> = Vec::new();
loop {
match ch as char {
'"' => if let Err(()) = self.parse_double(&mut result) {
self.had_error = true;
return None;
},
'\'' => if let Err(()) = self.parse_single(&mut result) {
self.had_error = true;
return None;
},
'\\' => if let Some(ch2) = self.next_char() {
if ch2 != '\n' as u8 { result.push(ch2); }
} else {
self.had_error = true;
return None;
},
' ' | '\t' | '\n' => { break; },
_ => { result.push(ch as u8); },
}
if let Some(ch2) = self.next_char() { ch = ch2; } else { break; }
}
Some(result)
}
fn parse_double(&mut self, result: &mut Vec<u8>) -> Result<(), ()> {
loop {
if let Some(ch2) = self.next_char() {
match ch2 as char {
'\\' => {
if let Some(ch3) = self.next_char() {
match ch3 as char {
'$' | '`' | '"' | '\\' => { result.push(ch3); },
'\n' => {},
_ => { result.push('\\' as u8); result.push(ch3); }
}
} else {
return Err(());
}
},
'"' => { return Ok(()); },
_ => { result.push(ch2); },
}
} else {
return Err(());
}
}
}
fn parse_single(&mut self, result: &mut Vec<u8>) -> Result<(), ()> {
loop {
if let Some(ch2) = self.next_char() {
match ch2 as char {
'\'' => { return Ok(()); },
_ => { result.push(ch2); },
}
} else {
return Err(());
}
}
}
fn next_char(&mut self) -> Option<u8> {
let res = self.in_iter.next().copied();
if res == Some(b'\n') { self.line_no += 1; }
res
}
}
impl<'a> Iterator for Shlex<'a> {
type Item = Vec<u8>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(mut ch) = self.next_char() {
loop {
match ch as char {
' ' | '\t' | '\n' => {},
'#' => {
while let Some(ch2) = self.next_char() {
if ch2 as char == '\n' { break; }
}
},
_ => { break; }
}
if let Some(ch2) = self.next_char() { ch = ch2; } else { return None; }
}
self.parse_word(ch)
} else { None
}
}
}
pub fn split(in_bytes: &[u8]) -> Option<Vec<Vec<u8>>> {
let mut shl = Shlex::new(in_bytes);
let res = shl.by_ref().collect();
if shl.had_error { None } else { Some(res) }
}
#[derive(Default, Debug, Clone)]
pub struct Quoter {
allow_nul: bool,
}
impl Quoter {
#[inline]
pub fn new() -> Self {
Self::default()
}
#[inline]
pub fn allow_nul(mut self, allow: bool) -> Self {
self.allow_nul = allow;
self
}
pub fn join<'a, I: IntoIterator<Item = &'a [u8]>>(&self, words: I) -> Result<Vec<u8>, QuoteError> {
Ok(words.into_iter()
.map(|word| self.quote(word))
.collect::<Result<Vec<Cow<[u8]>>, QuoteError>>()?
.join(&b' '))
}
pub fn quote<'a>(&self, mut in_bytes: &'a [u8]) -> Result<Cow<'a, [u8]>, QuoteError> {
if in_bytes.is_empty() {
return Ok(b"''"[..].into());
}
if !self.allow_nul && in_bytes.iter().any(|&b| b == b'\0') {
return Err(QuoteError::Nul);
}
let mut out: Vec<u8> = Vec::new();
while !in_bytes.is_empty() {
let (cur_len, strategy) = quoting_strategy(in_bytes);
if cur_len == in_bytes.len() && strategy == QuotingStrategy::Unquoted && out.is_empty() {
return Ok(in_bytes.into());
}
let (cur_chunk, rest) = in_bytes.split_at(cur_len);
assert!(rest.len() < in_bytes.len()); in_bytes = rest;
append_quoted_chunk(&mut out, cur_chunk, strategy);
}
Ok(out.into())
}
}
#[derive(PartialEq)]
enum QuotingStrategy {
Unquoted,
SingleQuoted,
DoubleQuoted,
}
const fn unquoted_ok(c: u8) -> bool {
match c as char {
'+' | '-' | '.' | '/' | ':' | '@' | ']' | '_' |
'0'..='9' | 'A'..='Z' | 'a'..='z'
=> true,
'|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | '\'' | ' ' | '\t' | '\n' |
'*' | '?' | '[' | '#' | '~' | '=' | '%' |
'{' | '}' |
',' |
'\r' |
'!' | '^' |
'\x00' ..= '\x1f' | '\x7f'
=> false,
'\u{80}' ..= '\u{10ffff}' => {
unquoted_ok(c)
},
}
}
fn unquoted_ok_fast(c: u8) -> bool {
const UNQUOTED_OK_MASK: u128 = {
let mut c = 0u8;
let mut mask = 0u128;
while c < 0x80 {
if unquoted_ok(c) {
mask |= 1u128 << c;
}
c += 1;
}
mask
};
((UNQUOTED_OK_MASK >> c) & 1) != 0
}
fn single_quoted_ok(c: u8) -> bool {
match c {
b'\'' => false,
b'^' => false,
b'\\' => false,
_ => true
}
}
fn double_quoted_ok(c: u8) -> bool {
match c {
b'`' | b'$' => false,
b'!' | b'^' => false,
_ => true
}
}
#[cfg_attr(manual_codegen_check, inline(never))]
fn quoting_strategy(in_bytes: &[u8]) -> (usize, QuotingStrategy) {
const UNQUOTED_OK: u8 = 1;
const SINGLE_QUOTED_OK: u8 = 2;
const DOUBLE_QUOTED_OK: u8 = 4;
let mut prev_ok = SINGLE_QUOTED_OK | DOUBLE_QUOTED_OK | UNQUOTED_OK;
let mut i = 0;
if in_bytes[0] == b'^' {
prev_ok = SINGLE_QUOTED_OK;
i = 1;
}
while i < in_bytes.len() {
let c = in_bytes[i];
let mut cur_ok = prev_ok;
if c >= 0x80 {
cur_ok &= !UNQUOTED_OK;
} else {
if !unquoted_ok_fast(c) {
cur_ok &= !UNQUOTED_OK;
}
if !single_quoted_ok(c){
cur_ok &= !SINGLE_QUOTED_OK;
}
if !double_quoted_ok(c) {
cur_ok &= !DOUBLE_QUOTED_OK;
}
}
if cur_ok == 0 {
break;
}
prev_ok = cur_ok;
i += 1;
}
let strategy = if prev_ok & UNQUOTED_OK != 0 {
QuotingStrategy::Unquoted
} else if prev_ok & SINGLE_QUOTED_OK != 0 {
QuotingStrategy::SingleQuoted
} else if prev_ok & DOUBLE_QUOTED_OK != 0 {
QuotingStrategy::DoubleQuoted
} else {
unreachable!()
};
debug_assert!(i > 0);
(i, strategy)
}
fn append_quoted_chunk(out: &mut Vec<u8>, cur_chunk: &[u8], strategy: QuotingStrategy) {
match strategy {
QuotingStrategy::Unquoted => {
out.extend_from_slice(cur_chunk);
},
QuotingStrategy::SingleQuoted => {
out.reserve(cur_chunk.len() + 2);
out.push(b'\'');
out.extend_from_slice(cur_chunk);
out.push(b'\'');
},
QuotingStrategy::DoubleQuoted => {
out.reserve(cur_chunk.len() + 2);
out.push(b'"');
for &c in cur_chunk.into_iter() {
if let b'$' | b'`' | b'"' | b'\\' = c {
out.push(b'\\');
}
out.push(c);
}
out.push(b'"');
},
}
}
#[deprecated(since = "1.3.0", note = "replace with `try_join(words)?` to avoid nul byte danger")]
pub fn join<'a, I: IntoIterator<Item = &'a [u8]>>(words: I) -> Vec<u8> {
Quoter::new().allow_nul(true).join(words).unwrap()
}
pub fn try_join<'a, I: IntoIterator<Item = &'a [u8]>>(words: I) -> Result<Vec<u8>, QuoteError> {
Quoter::new().join(words)
}
#[deprecated(since = "1.3.0", note = "replace with `try_quote(str)?` to avoid nul byte danger")]
pub fn quote(in_bytes: &[u8]) -> Cow<[u8]> {
Quoter::new().allow_nul(true).quote(in_bytes).unwrap()
}
pub fn try_quote(in_bytes: &[u8]) -> Result<Cow<[u8]>, QuoteError> {
Quoter::new().quote(in_bytes)
}
#[cfg(test)]
const INVALID_UTF8: &[u8] = b"\xa1";
#[cfg(test)]
const INVALID_UTF8_SINGLEQUOTED: &[u8] = b"'\xa1'";
#[test]
#[allow(invalid_from_utf8)]
fn test_invalid_utf8() {
assert!(core::str::from_utf8(INVALID_UTF8).is_err());
}
#[cfg(test)]
static SPLIT_TEST_ITEMS: &'static [(&'static [u8], Option<&'static [&'static [u8]]>)] = &[
(b"foo$baz", Some(&[b"foo$baz"])),
(b"foo baz", Some(&[b"foo", b"baz"])),
(b"foo\"bar\"baz", Some(&[b"foobarbaz"])),
(b"foo \"bar\"baz", Some(&[b"foo", b"barbaz"])),
(b" foo \nbar", Some(&[b"foo", b"bar"])),
(b"foo\\\nbar", Some(&[b"foobar"])),
(b"\"foo\\\nbar\"", Some(&[b"foobar"])),
(b"'baz\\$b'", Some(&[b"baz\\$b"])),
(b"'baz\\\''", None),
(b"\\", None),
(b"\"\\", None),
(b"'\\", None),
(b"\"", None),
(b"'", None),
(b"foo #bar\nbaz", Some(&[b"foo", b"baz"])),
(b"foo #bar", Some(&[b"foo"])),
(b"foo#bar", Some(&[b"foo#bar"])),
(b"foo\"#bar", None),
(b"'\\n'", Some(&[b"\\n"])),
(b"'\\\\n'", Some(&[b"\\\\n"])),
(INVALID_UTF8, Some(&[INVALID_UTF8])),
];
#[test]
fn test_split() {
for &(input, output) in SPLIT_TEST_ITEMS {
assert_eq!(split(input), output.map(|o| o.iter().map(|&x| x.to_owned()).collect()));
}
}
#[test]
fn test_lineno() {
let mut sh = Shlex::new(b"\nfoo\nbar");
while let Some(word) = sh.next() {
if word == b"bar" {
assert_eq!(sh.line_no, 3);
}
}
}
#[test]
#[allow(deprecated)]
fn test_quote() {
assert_eq!(quote(INVALID_UTF8), INVALID_UTF8_SINGLEQUOTED);
assert_eq!(quote(b""), &b"''"[..]);
assert_eq!(quote(b"foobar"), &b"foobar"[..]);
assert_eq!(quote(b"foo bar"), &b"'foo bar'"[..]);
assert_eq!(quote(b"'\""), &b"\"'\\\"\""[..]);
assert_eq!(quote(b""), &b"''"[..]);
}
#[test]
#[allow(deprecated)]
fn test_join() {
assert_eq!(join(vec![INVALID_UTF8]), INVALID_UTF8_SINGLEQUOTED);
assert_eq!(join(vec![]), &b""[..]);
assert_eq!(join(vec![&b""[..]]), b"''");
}