use std::error::Error;
use std::ffi::{OsStr, OsString};
use std::fmt::{Debug, Display, Formatter};
use std::mem;
use itertools::Itertools;
use serde::de::Visitor;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use stfu8::DecodeError;
#[derive(Debug, Eq, PartialEq, Clone)]
pub struct Arg(OsString);
impl Arg {
pub fn from_escaped_string(s: &str) -> Result<Self, DecodeError> {
Ok(Arg(from_stfu8(s)?))
}
pub fn to_escaped_string(&self) -> String {
to_stfu8(self.0.clone())
}
pub fn quote(&self) -> String {
quote(self.0.to_os_string())
}
pub fn as_os_str(&self) -> &OsStr {
self.0.as_ref()
}
}
impl AsRef<OsStr> for Arg {
fn as_ref(&self) -> &OsStr {
self.0.as_os_str()
}
}
impl From<OsString> for Arg {
fn from(s: OsString) -> Self {
Arg(s)
}
}
impl From<&OsStr> for Arg {
fn from(s: &OsStr) -> Self {
Arg(OsString::from(s))
}
}
impl From<&str> for Arg {
fn from(s: &str) -> Self {
Arg(OsString::from(s))
}
}
struct ArgVisitor;
impl Visitor<'_> for ArgVisitor {
type Value = Arg;
fn expecting(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result {
formatter.write_str("an STFU encoded string")
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
let arg = Arg::from_escaped_string(v).map_err(|e| E::custom(e.to_string()))?;
Ok(arg)
}
}
impl Serialize for Arg {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(self.to_escaped_string().as_str())
}
}
impl<'de> Deserialize<'de> for Arg {
fn deserialize<D>(deserializer: D) -> Result<Arg, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_str(ArgVisitor)
}
}
#[cfg(unix)]
pub fn to_stfu8(s: OsString) -> String {
use std::os::unix::ffi::OsStringExt;
let raw_path_bytes = s.into_vec();
stfu8::encode_u8(&raw_path_bytes)
}
#[cfg(windows)]
pub fn to_stfu8(s: OsString) -> String {
use std::os::windows::ffi::OsStrExt;
let raw_path_bytes: Vec<u16> = s.encode_wide().collect();
stfu8::encode_u16(&raw_path_bytes)
}
#[cfg(unix)]
pub fn from_stfu8(encoded: &str) -> Result<OsString, DecodeError> {
use std::os::unix::ffi::OsStringExt;
let raw_bytes = stfu8::decode_u8(encoded)?;
Ok(OsString::from_vec(raw_bytes))
}
#[cfg(windows)]
pub fn from_stfu8(encoded: &str) -> Result<OsString, DecodeError> {
use std::os::windows::ffi::OsStringExt;
let raw_bytes = stfu8::decode_u16(encoded)?;
Ok(OsString::from_wide(&raw_bytes))
}
const SPECIAL_CHARS: [char; 25] = [
'|', '&', ';', '<', '>', '(', ')', '{', '}', '$', '`', '\\', '\'', '"', ' ', '\t', '*', '?',
'+', '[', ']', '#', '˜', '=', '%',
];
pub fn quote(s: OsString) -> String {
let lossy = s.to_string_lossy();
if lossy
.chars()
.any(|c| c < '\u{20}' || c == '\u{7f}' || c == '\u{fffd}' || c == '\'')
{
format!("$'{}'", to_stfu8(s).replace('\'', "\\'"))
} else if lossy.chars().any(|c| SPECIAL_CHARS.contains(&c)) {
format!("'{lossy}'")
} else {
lossy.to_string()
}
}
#[derive(Debug)]
pub struct ParseError {
pub msg: String,
}
impl ParseError {
pub fn new(msg: &str) -> ParseError {
ParseError {
msg: msg.to_string(),
}
}
}
impl Display for ParseError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.msg)
}
}
impl Error for ParseError {}
enum State {
Delimiter,
Backslash,
Unquoted,
UnquotedBackslash,
SingleQuoted,
DoubleQuoted,
DoubleQuotedBackslash,
Dollar,
DollarQuoted,
DollarQuotedBackslash,
Comment,
}
fn append(s: &mut OsString, c: char) {
let mut buf = [0; 4];
let c = c.encode_utf8(&mut buf);
s.push(c)
}
pub fn split(s: &str) -> Result<Vec<Arg>, ParseError> {
use State::*;
let mut words = Vec::new();
let mut word = OsString::new();
let mut pos = 0;
let mut dollar_quote_start = 0;
let mut chars = s.chars();
let mut state = Delimiter;
loop {
let c = chars.next();
state = match state {
Delimiter => match c {
None => break,
Some('\'') => SingleQuoted,
Some('\"') => DoubleQuoted,
Some('\\') => Backslash,
Some('\t') | Some(' ') | Some('\n') => Delimiter,
Some('$') => Dollar,
Some('#') => Comment,
Some(c) => {
append(&mut word, c);
Unquoted
}
},
Backslash => match c {
None => {
append(&mut word, '\\');
words.push(Arg(mem::replace(&mut word, OsString::new())));
break;
}
Some('\n') => Delimiter,
Some(c) => {
append(&mut word, c);
Unquoted
}
},
Unquoted => match c {
None => {
words.push(Arg(mem::replace(&mut word, OsString::new())));
break;
}
Some('\'') => SingleQuoted,
Some('\"') => DoubleQuoted,
Some('\\') => UnquotedBackslash,
Some('$') => Dollar,
Some('\t') | Some(' ') | Some('\n') => {
words.push(Arg(mem::replace(&mut word, OsString::new())));
Delimiter
}
Some(c) => {
append(&mut word, c);
Unquoted
}
},
UnquotedBackslash => match c {
None => {
append(&mut word, '\\');
words.push(Arg(mem::replace(&mut word, OsString::new())));
break;
}
Some('\n') => Unquoted,
Some(c) => {
append(&mut word, c);
Unquoted
}
},
SingleQuoted => match c {
None => return Err(ParseError::new("Unclosed single quote")),
Some('\'') => Unquoted,
Some(c) => {
append(&mut word, c);
SingleQuoted
}
},
DoubleQuoted => match c {
None => return Err(ParseError::new("Unclosed double quote")),
Some('\"') => Unquoted,
Some('\\') => DoubleQuotedBackslash,
Some(c) => {
append(&mut word, c);
DoubleQuoted
}
},
DoubleQuotedBackslash => match c {
None => return Err(ParseError::new("Unexpected end of input")),
Some('\n') => DoubleQuoted,
Some(c @ '$') | Some(c @ '`') | Some(c @ '"') | Some(c @ '\\') => {
append(&mut word, c);
DoubleQuoted
}
Some(c) => {
append(&mut word, '\\');
append(&mut word, c);
DoubleQuoted
}
},
Dollar => match c {
None => return Err(ParseError::new("Unexpected end of input")),
Some('\'') => {
dollar_quote_start = pos + 1;
DollarQuoted
}
Some(_) => return Err(ParseError::new("Expected single quote")),
},
DollarQuoted => match c {
None => return Err(ParseError::new("Unclosed single quote")),
Some('\\') => DollarQuotedBackslash,
Some('\'') => {
let quoted_slice = &s[dollar_quote_start..pos].replace("\\'", "'");
let decoded = from_stfu8(quoted_slice).map_err(|e| {
ParseError::new(format!("Failed to decode STFU-8 chunk: {e}").as_str())
})?;
word.push(decoded.as_os_str());
Unquoted
}
Some(_) => DollarQuoted,
},
DollarQuotedBackslash => match c {
None => return Err(ParseError::new("Unexpected end of input")),
Some(_) => DollarQuoted,
},
Comment => match c {
None => break,
Some('\n') => Delimiter,
Some(_) => Comment,
},
};
pos += 1;
}
Ok(words)
}
pub fn join(args: &[Arg]) -> String {
args.iter().map(|arg| arg.quote()).join(" ")
}
#[cfg(test)]
mod test {
use std::ffi::OsString;
use crate::arg::{quote, split, Arg};
#[test]
fn quote_no_special_chars() {
assert_eq!(quote(OsString::from("abc/def_123.txt")), "abc/def_123.txt");
}
#[test]
fn quote_path_with_control_chars() {
assert_eq!(quote(OsString::from("a\nb")), "$'a\\nb'");
assert_eq!(quote(OsString::from("a\tb")), "$'a\\tb'");
}
#[test]
fn quote_path_with_special_chars() {
assert_eq!(quote(OsString::from("a b")), "'a b'");
assert_eq!(quote(OsString::from("a*b")), "'a*b'");
assert_eq!(quote(OsString::from("a?b")), "'a?b'");
assert_eq!(quote(OsString::from("$ab")), "'$ab'");
assert_eq!(quote(OsString::from("a(b)")), "'a(b)'");
assert_eq!(quote(OsString::from("a\\b")), "'a\\b'");
}
#[test]
fn quote_path_with_single_quotes() {
assert_eq!(quote(OsString::from("a'b")), "$'a\\'b'");
assert_eq!(quote(OsString::from("a'b'")), "$'a\\'b\\''");
}
#[test]
fn split_unquoted_args() {
assert_eq!(
split("arg1 arg2").unwrap(),
vec![Arg::from("arg1"), Arg::from("arg2")]
)
}
#[test]
fn split_single_quoted_args() {
assert_eq!(
split("'arg1 with spaces' arg2").unwrap(),
vec![Arg::from("arg1 with spaces"), Arg::from("arg2")]
)
}
#[test]
fn split_doubly_quoted_args() {
assert_eq!(
split("\"arg1 with spaces\" arg2").unwrap(),
vec![Arg::from("arg1 with spaces"), Arg::from("arg2")]
)
}
#[test]
fn split_quotes_escaping() {
assert_eq!(
split("\"escaped \\\" quotes\"").unwrap(),
vec![Arg::from("escaped \" quotes")]
)
}
#[test]
fn split_escaped_single_quote() {
assert_eq!(
split("$'single\\'quote'").unwrap(),
vec![Arg::from("single'quote")]
);
}
#[test]
fn split_spaces_escaping() {
assert_eq!(
split("escaped\\ space").unwrap(),
vec![Arg::from("escaped space")]
)
}
#[test]
fn dollar_quoting() {
assert_eq!(
split("arg1 $'arg2-\\n\\t\\\\' arg3-$'\\x7f'").unwrap(),
vec![
Arg::from("arg1"),
Arg::from("arg2-\n\t\\"),
Arg::from("arg3-\x7f")
]
)
}
}