#[cfg(test)]
extern crate quickcheck;
#[cfg(test)]
#[macro_use(quickcheck)]
extern crate quickcheck_macros;
extern crate unicode_categories;
use std::borrow::Cow;
use std::num::ParseIntError;
use std::{char, str};
use thiserror::Error;
use unicode_categories::UnicodeCategories;
pub fn escape(s: &str) -> Cow<str> {
let mut needs_quoting = false;
let mut single_quotable = true;
for c in s.chars() {
let quote = match c {
'\'' | '\\' => {
single_quotable = false;
true
},
'"' | ' ' => true,
'(' | ')' | '&' | '~' | '$' | '#' | '`' | ';' => true,
'*' | '?' | '!' | '[' => true,
'>' | '<' | '|' => true,
c if c.is_whitespace() || c.is_separator() || c.is_other() => {
single_quotable = false;
true
},
_ => false,
};
if quote {
needs_quoting = true;
}
if needs_quoting && !single_quotable {
break;
}
}
if !needs_quoting {
return Cow::from(s);
}
if single_quotable {
return format!("'{}'", s).into();
}
let mut output = String::with_capacity(s.len());
output.push('"');
for c in s.chars() {
if c == '"' {
output += "\\\"";
} else if c == '\\' {
output += "\\\\";
} else if c == ' ' {
output.push(c);
} else if c == '$' {
output += "\\$";
} else if c == '`' {
output += "\\`";
} else if c.is_other() || c.is_separator() {
output += &escape_character(c);
} else {
output.push(c);
}
}
output.push('"');
output.into()
}
fn escape_character(c: char) -> String {
match c {
'\u{07}' => "\\a".to_string(),
'\u{08}' => "\\b".to_string(),
'\u{0b}' => "\\v".to_string(),
'\u{0c}' => "\\f".to_string(),
'\u{1b}' => "\\e".to_string(),
c => {
c.escape_default().to_string()
}
}
}
#[derive(Debug, Error, PartialEq)]
pub enum UnescapeError {
#[error("invalid escape {escape} at {index} in {string}")]
InvalidEscape {
escape: String,
index: usize,
string: String,
},
#[error("\\u could not be parsed at {index} in {string}: {source}")]
InvalidUnicode {
#[source]
source: ParseUnicodeError,
index: usize,
string: String,
},
}
#[derive(Debug, Error, PartialEq)]
pub enum ParseUnicodeError {
#[error("expected '{{' character in unicode escape")]
BraceNotFound,
#[error("could not parse {string} as u32 hex: {source}")]
ParseHexFailed {
#[source]
source: ParseIntError,
string: String,
},
#[error("could not parse {value} as a unicode char")]
ParseUnicodeFailed { value: u32 },
}
pub fn unescape(s: &str) -> Result<String, UnescapeError> {
let mut in_single_quote = false;
let mut in_double_quote = false;
let mut chars = s.chars().enumerate();
let mut res = String::with_capacity(s.len());
while let Some((idx, c)) = chars.next() {
if in_single_quote {
if c == '\'' {
in_single_quote = false;
continue;
}
} else if in_double_quote {
if c == '"' {
in_double_quote = false;
continue;
}
if c == '\\' {
match chars.next() {
None => {
return Err(UnescapeError::InvalidEscape {
escape: format!("{}", c),
index: idx,
string: String::from(s),
});
}
Some((idx, c2)) => {
res.push(match c2 {
'a' => '\u{07}',
'b' => '\u{08}',
'v' => '\u{0B}',
'f' => '\u{0C}',
'n' => '\n',
'r' => '\r',
't' => '\t',
'e' | 'E' => '\u{1B}',
'\\' => '\\',
'\'' => '\'',
'"' => '"',
'$' => '$',
'`' => '`',
' ' => ' ',
'u' => parse_unicode(&mut chars).map_err(|x| {
UnescapeError::InvalidUnicode {
source: x,
index: idx,
string: String::from(s),
}
})?,
_ => {
return Err(UnescapeError::InvalidEscape {
escape: format!("{}{}", c, c2),
index: idx,
string: String::from(s),
});
}
});
continue;
}
};
}
} else if c == '\'' {
in_single_quote = true;
continue;
} else if c == '"' {
in_double_quote = true;
continue;
}
res.push(c);
}
Ok(res)
}
fn parse_unicode<I>(chars: &mut I) -> Result<char, ParseUnicodeError>
where
I: Iterator<Item = (usize, char)>,
{
match chars.next() {
Some((_, '{')) => {}
_ => {
return Err(ParseUnicodeError::BraceNotFound);
}
}
let unicode_seq: String = chars
.take_while(|&(_, c)| c != '}')
.map(|(_, c)| c)
.collect();
u32::from_str_radix(&unicode_seq, 16)
.map_err(|e| ParseUnicodeError::ParseHexFailed {
source: e,
string: unicode_seq,
})
.and_then(|u| {
char::from_u32(u).ok_or_else(|| ParseUnicodeError::ParseUnicodeFailed { value: u })
})
}
#[cfg(test)]
mod test {
use super::*;
use std::io::Read;
#[cfg(feature = "unsafe_tests")]
use std::process::Command;
#[test]
fn test_escape() {
let test_cases = vec![
("東方", "東方"),
("\"'", r#""\"'""#),
("\\", "\"\\\\\""),
("spaces only", "'spaces only'"),
("some\ttabs", "\"some\\ttabs\""),
("💩", "💩"),
("\u{202e}RTL", "\"\\u{202e}RTL\""),
("no\u{202b}space", "\"no\\u{202b}space\""),
("cash $ money $$ \t", "\"cash \\$ money \\$\\$ \\t\""),
("back ` tick `` \t", "\"back \\` tick \\`\\` \\t\""),
(
"\u{07}\u{08}\u{0b}\u{0c}\u{0a}\u{0d}\u{09}\u{1b}\u{1b}\u{5c}\u{27}\u{22}",
"\"\\a\\b\\v\\f\\n\\r\\t\\e\\e\\\\'\\\"\"",
),
("semi;colon", "'semi;colon'"),
];
for (s, expected) in test_cases {
assert_eq!(escape(s), expected);
}
}
#[test]
fn test_unescape() {
assert_eq!(unescape("\"\\u{6771}\\u{65b9}\""), Ok("東方".to_string()));
assert_eq!(unescape("東方"), Ok("東方".to_string()));
assert_eq!(unescape("\"\\\\\"'\"\"'"), Ok("\\\"\"".to_string()));
assert_eq!(unescape("'\"'"), Ok("\"".to_string()));
assert_eq!(unescape("'\"'"), Ok("\"".to_string()));
assert_eq!(
unescape("\"\\a\\b\\v\\f\\n\\r\\t\\e\\E\\\\\\'\\\"\\u{09}\\$\\`\""),
Ok(
"\u{07}\u{08}\u{0b}\u{0c}\u{0a}\u{0d}\u{09}\u{1b}\u{1b}\u{5c}\u{27}\u{22}\u{09}$`"
.to_string()
)
);
}
#[test]
fn test_unescape_error() {
assert_eq!(
unescape("\"\\x\""),
Err(UnescapeError::InvalidEscape {
escape: "\\x".to_string(),
index: 2,
string: "\"\\x\"".to_string()
})
);
assert_eq!(
unescape("\"\\u6771}\""),
Err(UnescapeError::InvalidUnicode {
source: ParseUnicodeError::BraceNotFound,
index: 2,
string: "\"\\u6771}\"".to_string()
})
);
assert_eq!(
format!("{}", unescape("\"\\u{qqqq}\"").err().unwrap()),
"\\u could not be parsed at 2 in \"\\u{qqqq}\": could not parse qqqq as u32 hex: invalid digit found in string"
);
assert_eq!(
unescape("\"\\u{ffffffff}\""),
Err(UnescapeError::InvalidUnicode {
source: ParseUnicodeError::ParseUnicodeFailed { value: 0xffffffff },
index: 2,
string: "\"\\u{ffffffff}\"".to_string()
})
);
}
#[test]
fn test_round_trip() {
let test_cases = vec![
"東方",
"foo bar baz",
"\\",
"\0",
"\"'",
"\"'''''\"()())}{{}{}{{{!////",
"foo;bar",
];
for case in test_cases {
assert_eq!(unescape(&escape(case)), Ok(case.to_owned()));
}
}
#[quickcheck]
fn round_trips(s: String) -> bool {
s == unescape(&escape(&s)).unwrap()
}
#[cfg(feature = "unsafe_tests")]
#[quickcheck]
fn sh_quoting_round_trips(s: String) -> bool {
let s = s.replace(|c: char| c.is_ascii_control() || !c.is_ascii(), "");
let escaped = escape(&s);
println!("escaped '{}' as '{}'", s, escaped);
let output = Command::new("sh").args(vec!["-c", &format!("printf '%s' {}", escaped)]).output().unwrap();
if !output.status.success() {
panic!("printf %s {} did not exit with success", escaped);
}
let echo_output = String::from_utf8(output.stdout).unwrap();
println!("printf gave it back as '{}'", echo_output);
echo_output == s
}
#[test]
fn test_os_release_parsing() {
let tests = vec![
("fedora-19", "Fedora 19 (Schrödinger’s Cat)"),
("fedora-29", "Fedora 29 (Twenty Nine)"),
("gentoo", "Gentoo/Linux"),
("fictional", "Fictional $ OS: ` edition"),
];
for (file, pretty_name) in tests {
let mut data = String::new();
std::fs::File::open(format!("./src/testdata/os-releases/{}", file))
.unwrap()
.read_to_string(&mut data)
.unwrap();
let mut found_prettyname = false;
for line in data.lines() {
if line.trim().starts_with("#") {
continue;
}
let mut iter = line.splitn(2, "=");
let key = iter.next().unwrap();
let value = iter.next().unwrap();
let unescaped = unescape(value).unwrap();
if key == "PRETTY_NAME" {
assert_eq!(unescaped, pretty_name);
found_prettyname = true;
}
}
assert!(
found_prettyname,
"expected os-release to have 'PRETTY_NAME' key"
);
}
}
}