use crate::shlex::FieldType::ArgDoubleDash;
use lazy_static::lazy_static;
use regex::Regex;
use std::borrow::Cow;
use std::borrow::Cow::Borrowed;
use std::error::Error;
use std::ffi::OsStr;
use std::fmt;
use std::fmt::{Display, Formatter};
use std::ops::Range;
lazy_static! {
static ref MAIN_PATTERN: Regex =
Regex::new(r#"(?m:\s*(?:([^\s\\'"]+)|'([^']*)'|"((?:[^"\\]|\\.)*)"|(\\.?)|(\S))(\s|\z)?)"#)
.unwrap();
static ref ESCAPE_PATTERN: Regex = Regex::new(r#"\\(.)"#).unwrap();
static ref METACHAR_PATTERN: Regex = Regex::new(r#"\\([$`"\\\n])"#).unwrap();
}
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct Field<'a> {
pub parsed: Cow<'a, str>,
pub original: &'a str,
pub position: Range<usize>,
pub kind: FieldType,
}
impl<'a> Field<'a> {
#[inline]
fn new(original: &'a str, offset: usize, parsed: Cow<'a, str>) -> Self {
let kind = FieldType::from(&parsed);
Field {
parsed,
original,
position: offset..offset + original.len(),
kind,
}
}
fn push_str(&mut self, parsed_append: &str, new_original: &'a str) {
self.parsed.to_mut().push_str(&parsed_append);
self.kind = FieldType::from(&self.parsed);
self.original = new_original;
self.position.end = self.position.start + new_original.len();
}
#[allow(clippy::must_use_candidate)]
pub fn is_argument(&self) -> bool {
use FieldType::{ArgLong, ArgShort};
self.kind == ArgShort || self.kind == ArgLong || self.kind == ArgDoubleDash
}
#[allow(clippy::must_use_candidate)]
pub fn argument_name_and_value(&self) -> (Option<&str>, Option<&str>) {
use FieldType::{ArgLong, ArgShort};
if self.is_argument() {
let name_offset = match self.kind {
ArgShort => 1,
ArgLong | ArgDoubleDash => 2,
_ => unreachable!(),
};
if let Some(value_pos) = self.parsed.find('=') {
let name = Some(&self.parsed[name_offset..value_pos]);
let value = Some(&self.parsed[value_pos + 1..]);
return (name, value);
}
return (Some(&self.parsed[name_offset..]), None);
}
(None, None)
}
#[must_use]
pub fn is_quoted(&self) -> bool {
self.original.starts_with('\'') || self.original.starts_with('"')
}
}
impl AsRef<OsStr> for Field<'_> {
fn as_ref(&self) -> &OsStr {
(*self.parsed).as_ref()
}
}
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub enum FieldType {
Value,
ArgShort,
ArgLong,
ArgDoubleDash,
}
impl<T: AsRef<str>> From<T> for FieldType {
#[inline]
fn from(s: T) -> Self {
if !s.as_ref().starts_with('-') || s.as_ref().starts_with("---") {
FieldType::Value
} else if s.as_ref() == "--" {
FieldType::ArgDoubleDash
} else if s.as_ref().starts_with("--") {
FieldType::ArgLong
} else {
FieldType::ArgShort
}
}
}
pub trait FieldMatcher {
fn split_at_pos(&self, pos: usize) -> (&[Field<'_>], Option<&Field>, &[Field<'_>]);
fn has_double_dash(&self) -> bool;
fn match_first(&self, s: &str) -> Option<&Field>;
}
impl FieldMatcher for [Field<'_>] {
fn split_at_pos(&self, pos: usize) -> (&[Field<'_>], Option<&Field<'_>>, &[Field<'_>]) {
self.iter()
.position(|w| w.position.start < pos && w.position.end >= pos)
.map_or(
self.iter().position(|w| w.position.start >= pos).map_or(
(self, None, &[]),
|split_pos| {
let (before, after) = self.split_at(split_pos);
(before, None, after)
},
),
|at_cursor| {
(
&self[..at_cursor],
self.get(at_cursor),
&self[at_cursor + 1..],
)
},
)
}
fn has_double_dash(&self) -> bool {
self.iter().any(|f| f.kind == FieldType::ArgDoubleDash)
}
fn match_first(&self, s: &str) -> Option<&Field> {
self.iter().take(1).find(|&f| f.parsed == s)
}
}
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct MismatchedQuotes(pub usize);
impl Display for MismatchedQuotes {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl Error for MismatchedQuotes {}
impl From<usize> for MismatchedQuotes {
fn from(pos: usize) -> Self {
MismatchedQuotes(pos)
}
}
pub fn split(input: &str) -> Result<Vec<Field>, MismatchedQuotes> {
let mut fields = Vec::with_capacity(input.split_whitespace().count());
if fields.capacity() == 0 {
return Ok(fields);
}
let mut new_field: Option<Field> = None;
let mut past_double_dash = false;
for capture in MAIN_PATTERN.captures_iter(input) {
if let Some(word) = capture.get(1) {
if let Some(field) = &mut new_field {
field.push_str(&word.as_str(), &input[field.position.start..word.end()]);
} else {
new_field = Some(Field::new(
word.as_str(),
word.start(),
Borrowed(word.as_str()),
));
}
} else if let Some(single_quoted_word) = capture.get(2) {
if let Some(field) = &mut new_field {
field.push_str(
&single_quoted_word.as_str(),
&input[field.position.start..=single_quoted_word.end()],
);
} else {
let range = single_quoted_word.start() - 1..=single_quoted_word.end();
let start = *range.start();
new_field = Some(Field::new(
&input[range],
start,
Borrowed(single_quoted_word.as_str()),
));
}
} else if let Some(double_quoted_word) = capture.get(3) {
let escaped = METACHAR_PATTERN.replace_all(double_quoted_word.as_str(), "$1");
if let Some(field) = &mut new_field {
field.push_str(
&escaped,
&input[field.position.start..=double_quoted_word.end()],
);
} else {
let range = double_quoted_word.start() - 1..=double_quoted_word.end();
let start = *range.start();
new_field = Some(Field::new(&input[range], start, escaped));
}
} else if let Some(to_escape) = capture.get(4) {
let escaped = ESCAPE_PATTERN.replace_all(to_escape.as_str(), "$1");
if let Some(field) = &mut new_field {
field.push_str(&escaped, &input[field.position.start..to_escape.end()]);
} else {
new_field = Some(Field::new(to_escape.as_str(), to_escape.start(), escaped));
}
} else if let Some(mismatched_quote) = capture.get(5) {
return Err(MismatchedQuotes::from(mismatched_quote.start()));
}
if capture.get(6).is_some() {
if let Some(mut field) = new_field {
if past_double_dash {
field.kind = FieldType::Value;
} else if field.kind == FieldType::ArgDoubleDash {
past_double_dash = true;
}
fields.push(field);
new_field = None;
}
}
}
Ok(fields)
}
#[cfg(test)]
mod tests {
use crate::shlex::FieldType::{ArgDoubleDash, ArgLong, ArgShort, Value};
use crate::shlex::{split, MismatchedQuotes};
use std::borrow::Cow::{Borrowed, Owned};
#[test]
fn splits_simple() {
let fields = split("foo bar").unwrap();
assert_eq!(fields.len(), 2);
assert_eq!(fields[0].original, r#"foo"#);
assert_eq!(fields[0].position.start, 0);
assert_eq!(fields[0].position.len(), fields[0].original.len());
assert_eq!(fields[0].kind, Value);
assert_eq!(fields[0].parsed, Borrowed(r#"foo"#));
assert!(matches!(fields[0].parsed, Borrowed(_)));
assert_eq!(fields[1].original, r#"bar"#);
assert_eq!(fields[1].position.start, fields[0].position.end + 1);
assert_eq!(fields[1].position.len(), fields[1].original.len());
assert_eq!(fields[1].kind, Value);
assert_eq!(fields[1].parsed, Borrowed(r#"bar"#));
assert!(matches!(fields[1].parsed, Borrowed(_)));
}
#[test]
fn splits_removes_escape() {
let fields = split(r#"\ foo b\"ar a\\rb"#).unwrap();
assert_eq!(fields.len(), 3);
assert_eq!(fields[0].original, r#"\ foo"#);
assert_eq!(fields[0].position.start, 0);
assert_eq!(fields[0].position.len(), fields[0].original.len());
assert_eq!(fields[0].kind, Value);
assert_eq!(fields[0].parsed, Borrowed(r#" foo"#));
assert!(matches!(fields[0].parsed, Owned(_)));
assert_eq!(fields[1].original, r#"b\"ar"#);
assert_eq!(fields[1].position.start, fields[0].position.end + 1);
assert_eq!(fields[1].position.len(), fields[1].original.len());
assert_eq!(fields[1].kind, Value);
assert_eq!(fields[1].parsed, Borrowed(r#"b"ar"#));
assert!(matches!(fields[1].parsed, Owned(_)));
assert_eq!(fields[2].original, r#"a\\rb"#);
assert_eq!(fields[2].position.start, fields[1].position.end + 1);
assert_eq!(fields[2].position.len(), fields[2].original.len());
assert_eq!(fields[2].kind, Value);
assert_eq!(fields[2].parsed, Borrowed(r#"a\rb"#));
assert!(matches!(fields[2].parsed, Owned(_)));
}
#[test]
fn splits_single_quoted() {
let fields = split("'f oo' b'a r' 'a r'b").unwrap();
assert_eq!(fields.len(), 3);
assert_eq!(fields[0].original, r#"'f oo'"#);
assert_eq!(fields[0].position.start, 0);
assert_eq!(fields[0].position.len(), fields[0].original.len());
assert_eq!(fields[0].kind, Value);
assert_eq!(fields[0].parsed, Borrowed(r#"f oo"#));
assert!(matches!(fields[0].parsed, Borrowed(_)));
assert_eq!(fields[1].original, r#"b'a r'"#);
assert_eq!(fields[1].position.start, fields[0].position.end + 1);
assert_eq!(fields[1].position.len(), fields[1].original.len());
assert_eq!(fields[1].kind, Value);
assert_eq!(fields[1].parsed, Borrowed(r#"ba r"#));
assert!(matches!(fields[1].parsed, Owned(_)));
assert_eq!(fields[2].original, r#"'a r'b"#);
assert_eq!(fields[2].position.start, fields[1].position.end + 1);
assert_eq!(fields[2].position.len(), fields[2].original.len());
assert_eq!(fields[2].kind, Value);
assert_eq!(fields[2].parsed, Borrowed(r#"a rb"#));
assert!(matches!(fields[2].parsed, Owned(_)));
}
#[test]
fn splits_double_quoted() {
let fields = split(r#""f oo" b"a r" "a r"b"#).unwrap();
assert_eq!(fields.len(), 3);
assert_eq!(fields[0].original, r#""f oo""#);
assert_eq!(fields[0].position.start, 0);
assert_eq!(fields[0].position.len(), fields[0].original.len());
assert_eq!(fields[0].kind, Value);
assert_eq!(fields[0].parsed, Borrowed(r#"f oo"#));
assert!(matches!(fields[0].parsed, Borrowed(_)));
assert_eq!(fields[1].original, r#"b"a r""#);
assert_eq!(fields[1].position.start, fields[0].position.end + 1);
assert_eq!(fields[1].position.len(), fields[1].original.len());
assert_eq!(fields[1].kind, Value);
assert_eq!(fields[1].parsed, Borrowed(r#"ba r"#));
assert!(matches!(fields[1].parsed, Owned(_)));
assert_eq!(fields[2].original, r#""a r"b"#);
assert_eq!(fields[2].position.start, fields[1].position.end + 1);
assert_eq!(fields[2].position.len(), fields[2].original.len());
assert_eq!(fields[2].kind, Value);
assert_eq!(fields[2].parsed, Borrowed(r#"a rb"#));
assert!(matches!(fields[2].parsed, Owned(_)));
}
#[test]
fn splits_double_quoted_removes_escape() {
let fields = split(r#""f oo" b"a\" r" "a r"b"#).unwrap();
assert_eq!(fields.len(), 3);
assert_eq!(fields[0].original, r#""f oo""#);
assert_eq!(fields[0].position.start, 0);
assert_eq!(fields[0].position.len(), fields[0].original.len());
assert_eq!(fields[0].kind, Value);
assert_eq!(fields[0].parsed, Borrowed(r#"f oo"#));
assert!(matches!(fields[0].parsed, Borrowed(_)));
assert_eq!(fields[1].original, r#"b"a\" r""#);
assert_eq!(fields[1].position.start, fields[0].position.end + 1);
assert_eq!(fields[1].position.len(), fields[1].original.len());
assert_eq!(fields[1].kind, Value);
assert_eq!(fields[1].parsed, Borrowed(r#"ba" r"#));
assert!(matches!(fields[1].parsed, Owned(_)));
assert_eq!(fields[2].original, r#""a r"b"#);
assert_eq!(fields[2].position.start, fields[1].position.end + 1);
assert_eq!(fields[2].position.len(), fields[2].original.len());
assert_eq!(fields[2].kind, Value);
assert_eq!(fields[2].parsed, Borrowed(r#"a rb"#));
assert!(matches!(fields[2].parsed, Owned(_)));
}
#[test]
fn splits_args() {
let fields = split(r#"'--foo=abc' "-s" " --long" ---invalid"#).unwrap();
assert_eq!(fields.len(), 4);
assert_eq!(fields[0].original, r#"'--foo=abc'"#);
assert_eq!(fields[0].position.start, 0);
assert_eq!(fields[0].position.len(), fields[0].original.len());
assert_eq!(fields[0].kind, ArgLong);
assert_eq!(fields[0].parsed, Borrowed(r#"--foo=abc"#));
assert_eq!(fields[0].argument_name_and_value().0, Some("foo"));
assert_eq!(fields[0].argument_name_and_value().1, Some("abc"));
assert!(matches!(fields[0].parsed, Borrowed(_)));
assert_eq!(fields[1].original, r#""-s""#);
assert_eq!(fields[1].position.start, fields[0].position.end + 3);
assert_eq!(fields[1].position.len(), fields[1].original.len());
assert_eq!(fields[1].kind, ArgShort);
assert_eq!(fields[1].parsed, Borrowed(r#"-s"#));
assert!(matches!(fields[1].parsed, Borrowed(_)));
assert_eq!(fields[2].original, r#"" --long""#);
assert_eq!(fields[2].position.start, fields[1].position.end + 1);
assert_eq!(fields[2].position.len(), fields[2].original.len());
assert_eq!(fields[2].kind, Value);
assert_eq!(fields[2].parsed, Borrowed(r#" --long"#));
assert!(matches!(fields[2].parsed, Borrowed(_)));
assert_eq!(fields[3].original, r#"---invalid"#);
assert_eq!(fields[3].position.start, fields[2].position.end + 1);
assert_eq!(fields[3].position.len(), fields[3].original.len());
assert_eq!(fields[3].kind, Value);
assert_eq!(fields[3].parsed, Borrowed(r#"---invalid"#));
assert!(matches!(fields[3].parsed, Borrowed(_)));
}
#[test]
fn splits_mismatched_quotes() {
assert_eq!(split(r#"foo ' aaa"#).unwrap_err().0, 7);
assert_eq!(split(r#"foo " aaa"#).unwrap_err().0, 7);
assert_eq!(split(r#"foo' ' " aaaa"#).unwrap_err().0, 10);
assert_eq!(split(r#"foo" " ' "#).unwrap_err().0, 10);
let mismatched = MismatchedQuotes(10);
assert_eq!(format!("{}", mismatched), format!("{}", 10 as usize));
assert_eq!(MismatchedQuotes(20), MismatchedQuotes(20));
assert_ne!(MismatchedQuotes(20), MismatchedQuotes(10));
assert!(MismatchedQuotes(20) > MismatchedQuotes(10));
assert!(MismatchedQuotes(10) < MismatchedQuotes(20));
}
#[test]
fn splits_double_dash() {
let fields = split(r#"--foo '--' -z --aaa"#).unwrap();
assert_eq!(fields.len(), 4);
assert_eq!(fields[0].original, r#"--foo"#);
assert_eq!(fields[0].position.start, 0);
assert_eq!(fields[0].position.len(), fields[0].original.len());
assert_eq!(fields[0].kind, ArgLong);
assert_eq!(fields[0].parsed, Borrowed(r#"--foo"#));
assert!(matches!(fields[0].parsed, Borrowed(_)));
assert_eq!(fields[1].original, r#"'--'"#);
assert_eq!(fields[1].position.start, fields[0].position.end + 1);
assert_eq!(fields[1].position.len(), fields[1].original.len());
assert_eq!(fields[1].kind, ArgDoubleDash);
assert_eq!(fields[1].parsed, Borrowed(r#"--"#));
assert!(matches!(fields[1].parsed, Borrowed(_)));
assert_eq!(fields[2].original, r#"-z"#);
assert_eq!(fields[2].position.start, fields[1].position.end + 2);
assert_eq!(fields[2].position.len(), fields[2].original.len());
assert_eq!(fields[2].kind, Value);
assert_eq!(fields[2].parsed, Borrowed(r#"-z"#));
assert!(matches!(fields[2].parsed, Borrowed(_)));
assert_eq!(fields[3].original, r#"--aaa"#);
assert_eq!(fields[3].position.start, fields[2].position.end + 3);
assert_eq!(fields[3].position.len(), fields[3].original.len());
assert_eq!(fields[3].kind, Value);
assert_eq!(fields[3].parsed, Borrowed(r#"--aaa"#));
assert!(matches!(fields[3].parsed, Borrowed(_)));
}
#[test]
fn test_splits_empty() {
let fields = split(r#""#).unwrap();
assert_eq!(fields.len(), 0);
let fields = split(r#" "#).unwrap();
assert_eq!(fields.len(), 0);
}
#[test]
fn test_field_equals() {
let fields = split(r#"--foo --foo --bar "#).unwrap();
assert_eq!(fields[0], fields[0]);
assert_ne!(fields[0], fields[1]);
assert_ne!(fields[0], fields[2]);
assert_eq!(fields[1], fields[1]);
assert_ne!(fields[1], fields[0]);
assert_ne!(fields[1], fields[2]);
assert_eq!(fields[2], fields[2]);
assert_ne!(fields[2], fields[0]);
assert_ne!(fields[2], fields[1]);
}
}