use std::borrow::Cow;
use std::mem;
use base64::engine::GeneralPurposeConfig;
use base64::engine::general_purpose::GeneralPurpose;
use base64::{Engine, alphabet};
use crate::date::Date;
use crate::depth::MAX_PARSE_DEPTH;
use crate::error::{Error, Result};
use crate::format::Format;
use crate::scalar;
use crate::text::tables::{BASE64_VALID, GS_QUOTABLE, NEWLINE, WHITESPACE};
use crate::value::{Dictionary, Value, maybe_uid};
const STD_BASE64: GeneralPurpose = GeneralPurpose::new(
&alphabet::STANDARD,
GeneralPurposeConfig::new().with_decode_allow_trailing_bits(true),
);
pub(crate) fn parse(data: &[u8]) -> Result<(Value, Format)> {
let input = guess_encoding_and_convert(data)?;
let mut parser = Parser::new(&input);
let value = parser.parse_document()?;
Ok((value, parser.format))
}
fn convert_u16(buffer: &[u8], big_endian: bool) -> Result<Cow<'_, str>> {
if !buffer.len().is_multiple_of(2) {
return Err(Error::parse("text", "truncated utf16"));
}
let mut units = Vec::with_capacity(buffer.len() / 2);
for pair in buffer.chunks_exact(2) {
if let &[a, b] = pair {
units.push(if big_endian {
u16::from_be_bytes([a, b])
} else {
u16::from_le_bytes([a, b])
});
}
}
Ok(Cow::Owned(
char::decode_utf16(units)
.map(|unit| unit.unwrap_or('\u{FFFD}'))
.collect(),
))
}
fn decode_8bit(buffer: &[u8]) -> Cow<'_, str> {
if let Ok(valid) = str::from_utf8(buffer) {
return Cow::Borrowed(valid);
}
let mut out = String::with_capacity(buffer.len());
let mut rest = buffer;
loop {
match str::from_utf8(rest) {
Ok(valid) => {
out.push_str(valid);
return Cow::Owned(out);
}
Err(err) => {
let (valid, invalid) = rest.split_at(err.valid_up_to());
out.push_str(str::from_utf8(valid).unwrap_or_default());
let Some((&byte, tail)) = invalid.split_first() else {
return Cow::Owned(out);
};
out.push(char::from(byte));
rest = tail;
}
}
}
}
fn guess_encoding_and_convert(buffer: &[u8]) -> Result<Cow<'_, str>> {
if let Some(rest) = buffer.strip_prefix(&[0xEF, 0xBB, 0xBF]) {
return Ok(decode_8bit(rest));
}
if let (Some(&b0), Some(&b1)) = (buffer.first(), buffer.get(1)) {
let after_bom = buffer.get(2..).unwrap_or_default();
if b0 == 0xFE && b1 == 0xFF {
return convert_u16(after_bom, true);
}
if b0 == 0 && b1 != 0 {
return convert_u16(buffer, true);
}
if b0 == 0xFF && b1 == 0xFE {
return convert_u16(after_bom, false);
}
if b0 != 0 && b1 == 0 {
return convert_u16(buffer, false);
}
}
Ok(decode_8bit(buffer))
}
enum Pending {
Dictionary {
entries: Vec<(String, Value)>,
pending_key: Option<String>,
ignore_eof: bool,
},
Array(Vec<Value>),
}
struct Frame {
pending: Pending,
counted: bool,
}
enum Step {
Open,
Advance,
Deliver(Value),
}
struct Parser<'a> {
input: &'a str,
start: usize,
pos: usize,
width: usize,
depth: usize,
format: Format,
}
impl<'a> Parser<'a> {
const fn new(input: &'a str) -> Self {
Self {
input,
start: 0,
pos: 0,
width: 0,
depth: 0,
format: Format::OpenStep,
}
}
fn rest(&self) -> &'a str {
self.input.get(self.pos..).unwrap_or_default()
}
fn next(&mut self) -> Option<char> {
let c = self.rest().chars().next();
self.width = c.map_or(0, char::len_utf8);
self.pos += self.width;
c
}
const fn backup(&mut self) {
self.pos -= self.width;
}
fn peek(&mut self) -> Option<char> {
let c = self.next();
self.backup();
c
}
fn emit(&mut self) -> &'a str {
let s = self.input.get(self.start..self.pos).unwrap_or_default();
self.start = self.pos;
s
}
const fn ignore(&mut self) {
self.start = self.pos;
}
const fn empty(&self) -> bool {
self.start == self.pos
}
fn scan_until(&mut self, ch: char) {
match self.rest().find(ch) {
Some(offset) => self.pos += offset,
None => self.pos = self.input.len(),
}
}
fn scan_until_any(&mut self, chars: &[char]) {
match self.rest().find(chars) {
Some(offset) => self.pos += offset,
None => self.pos = self.input.len(),
}
}
fn scan_chars_in_set(&mut self, set: super::tables::CharSet) {
while self.next().is_some_and(|c| set.contains_char(c)) {}
self.backup();
}
fn scan_chars_not_in_set(&mut self, set: super::tables::CharSet) {
loop {
match self.next() {
None => break,
Some(c) if set.contains_char(c) => break,
Some(_) => {}
}
}
self.backup();
}
fn error_at(&self, message: &str) -> Error {
let prefix = self.input.get(..self.pos).unwrap_or_default();
let line = prefix.matches('\n').count();
let character = self.pos - prefix.rfind('\n').map_or(0, |index| index + 1);
Error::parse(
"text",
format!("{message} at line {line} character {character}"),
)
}
fn parse_document(&mut self) -> Result<Value> {
let value = self.parse_value()?;
self.skip_whitespace_and_comments()?;
if self.peek().is_some() {
if !matches!(value, Value::String(_)) {
return Err(self.error_at("garbage after end of document"));
}
self.start = 0;
self.pos = 0;
let implicit = Frame {
pending: Pending::Dictionary {
entries: Vec::new(),
pending_key: None,
ignore_eof: true,
},
counted: false,
};
return self.run(vec![implicit]);
}
Ok(value)
}
fn skip_whitespace_and_comments(&mut self) -> Result<()> {
loop {
self.scan_chars_in_set(WHITESPACE);
if self.rest().starts_with("//") {
self.scan_chars_not_in_set(NEWLINE);
} else if self.rest().starts_with("/*") {
let Some(end) = self.rest().find("*/") else {
return Err(self.error_at("unexpected eof in block comment"));
};
self.pos += end + 2;
} else {
break;
}
}
self.ignore();
Ok(())
}
fn parse_value(&mut self) -> Result<Value> {
self.run(Vec::new())
}
fn run(&mut self, mut stack: Vec<Frame>) -> Result<Value> {
let mut step = if stack.is_empty() {
Step::Open
} else {
Step::Advance
};
loop {
step = match step {
Step::Open => self.open_step(&mut stack)?,
Step::Advance => self.advance_step(&mut stack)?,
Step::Deliver(value) => match stack.last_mut() {
None => return Ok(value),
Some(frame) => self.deliver_step(frame, value)?,
},
};
}
}
fn open_step(&mut self, stack: &mut Vec<Frame>) -> Result<Step> {
self.depth += 1;
if self.depth > MAX_PARSE_DEPTH {
return Err(Error::MaxDepthExceeded);
}
self.skip_whitespace_and_comments()?;
let value = match self.next() {
None => Value::Dictionary(Dictionary::new()),
Some('<') => match self.next() {
Some('*') => {
self.format = Format::GnuStep;
self.parse_gnustep_value()?
}
Some('[') => {
self.format = Format::GnuStep;
self.parse_gnustep_base64()?
}
_ => {
self.backup();
self.parse_hex_data()?
}
},
Some('"') => Value::String(self.parse_quoted_string()?),
Some('{') => {
stack.push(Frame {
pending: Pending::Dictionary {
entries: Vec::new(),
pending_key: None,
ignore_eof: false,
},
counted: true,
});
return Ok(Step::Advance);
}
Some('(') => {
stack.push(Frame {
pending: Pending::Array(Vec::new()),
counted: true,
});
return Ok(Step::Advance);
}
Some(_) => {
self.backup();
Value::String(self.parse_unquoted_string()?)
}
};
self.depth -= 1;
Ok(Step::Deliver(value))
}
fn advance_step(&mut self, stack: &mut Vec<Frame>) -> Result<Step> {
let Some(frame) = stack.last_mut() else {
return Err(self.error_at("unexpected eof"));
};
match &mut frame.pending {
Pending::Dictionary {
entries,
pending_key,
ignore_eof,
} => {
self.skip_whitespace_and_comments()?;
let key = match self.next() {
None if *ignore_eof => None,
None => return Err(self.error_at("unexpected eof in dictionary")),
Some('}') => None,
Some('"') => Some(self.parse_quoted_string()?),
Some(_) => {
self.backup();
Some(self.parse_unquoted_string()?)
}
};
let Some(key) = key else {
let value = mem::take(entries);
if frame.counted {
self.depth -= 1;
}
drop(stack.pop());
return Ok(Step::Deliver(maybe_uid(
value,
self.format == Format::OpenStep,
)));
};
self.skip_whitespace_and_comments()?;
match self.next() {
Some(';') => {
entries.push((key.clone(), Value::String(key)));
Ok(Step::Advance)
}
Some('=') => {
*pending_key = Some(key);
Ok(Step::Open)
}
_ => Err(self.error_at("missing '=' in dictionary")),
}
}
Pending::Array(values) => {
self.skip_whitespace_and_comments()?;
match self.next() {
None => Err(self.error_at("unexpected eof in array")),
Some(')') => {
let value = Value::Array(mem::take(values));
if frame.counted {
self.depth -= 1;
}
drop(stack.pop());
Ok(Step::Deliver(value))
}
Some(',') => Ok(Step::Advance),
Some(_) => {
self.backup();
Ok(Step::Open)
}
}
}
}
}
fn deliver_step(&mut self, frame: &mut Frame, value: Value) -> Result<Step> {
match &mut frame.pending {
Pending::Dictionary {
entries,
pending_key,
..
} => {
self.skip_whitespace_and_comments()?;
if self.next() != Some(';') {
return Err(self.error_at("missing ';' in dictionary"));
}
let Some(key) = pending_key.take() else {
return Err(self.error_at("missing '=' in dictionary"));
};
entries.push((key, value));
Ok(Step::Advance)
}
Pending::Array(values) => {
if !matches!(value, Value::String(ref s) if s.is_empty()) {
values.push(value);
}
Ok(Step::Advance)
}
}
}
fn parse_quoted_string(&mut self) -> Result<String> {
self.ignore();
let mut slow_path: Option<String> = None;
loop {
self.scan_until_any(&['"', '\\']);
match self.peek() {
None => return Err(self.error_at("unexpected eof in quoted string")),
Some('"') => {
let section = self.emit();
self.pos += 1;
return Ok(slow_path.map_or_else(
|| section.to_owned(),
|mut built| {
built.push_str(section);
built
},
));
}
Some(_) => {
let section = self.emit();
let built = slow_path.get_or_insert_with(String::new);
built.push_str(section);
let _ = self.next();
let escape = self.parse_escape();
built.push_str(&escape);
}
}
}
}
fn parse_escape(&mut self) -> String {
let escaped = match self.next() {
Some('a') => Some('\u{07}'),
Some('b') => Some('\u{08}'),
Some('v') => Some('\u{0B}'),
Some('f') => Some('\u{0C}'),
Some('t') => Some('\t'),
Some('r') => Some('\r'),
Some('n') => Some('\n'),
Some('\\') => Some('\\'),
Some('"') => Some('"'),
Some('x') => Some(escape_char(self.parse_hex_digits(2))),
Some('u' | 'U') => Some(escape_char(self.parse_hex_digits(4))),
Some('0'..='7') => {
self.backup();
Some(escape_char(self.parse_octal_digits(3)))
}
_ => {
self.backup();
None
}
};
self.ignore();
escaped.map(String::from).unwrap_or_default()
}
fn parse_hex_digits(&mut self, max: usize) -> u32 {
self.parse_digits(max, 16, 4)
}
fn parse_octal_digits(&mut self, max: usize) -> u32 {
self.parse_digits(max, 8, 3)
}
fn parse_digits(&mut self, max: usize, radix: u32, shift: u32) -> u32 {
let mut value = 0;
for _ in 0..max {
let Some(digit) = self.next().and_then(|c| c.to_digit(radix)) else {
self.backup();
break;
};
value = (value << shift) | digit;
}
value
}
fn parse_unquoted_string(&mut self) -> Result<String> {
self.scan_chars_not_in_set(GS_QUOTABLE);
let s = self.emit();
if s.is_empty() {
return Err(self.error_at("invalid unquoted string"));
}
Ok(s.to_owned())
}
fn parse_gnustep_value(&mut self) -> Result<Value> {
let typ = match self.next() {
None | Some('>') => return Err(self.error_at("invalid GNUStep extended value")),
Some(c) => c,
};
if !matches!(typ, 'I' | 'R' | 'B' | 'D') {
return Err(self.error_at(&format!("unknown GNUStep extended value type '{typ}'")));
}
if self.peek() == Some('"') {
let _ = self.next();
}
self.ignore();
self.scan_until('>');
if self.peek().is_none() {
return Err(self.error_at("unterminated GNUStep extended value"));
}
if self.empty() {
return Err(self.error_at("empty GNUStep extended value"));
}
let mut payload = self.emit();
let _ = self.next();
if payload.as_bytes().last() == Some(&b'"') {
payload = payload.get(..payload.len() - 1).unwrap_or_default();
}
match typ {
'I' => self.parse_gnustep_integer(payload),
'R' => scalar::parse_f64(payload)
.map(Value::from)
.map_err(|cause| Error::parse("text", format!("invalid GNUStep real: {cause}"))),
'B' => self.parse_gnustep_boolean(payload),
_ => self.parse_gnustep_date(payload),
}
}
fn parse_gnustep_integer(&self, payload: &str) -> Result<Value> {
if payload.is_empty() {
return Err(self.error_at("truncated GNUStep extended value"));
}
let parsed = if payload.starts_with('-') {
scalar::parse_i64(payload, 10).map(Value::from)
} else {
scalar::parse_u64(payload, 10).map(Value::from)
};
parsed.map_err(|cause| Error::parse("text", format!("invalid GNUStep integer: {cause}")))
}
fn parse_gnustep_boolean(&self, payload: &str) -> Result<Value> {
if payload.is_empty() {
return Err(self.error_at("truncated GNUStep extended value"));
}
Ok(Value::Boolean(payload.as_bytes().first() == Some(&b'Y')))
}
fn parse_gnustep_date(&self, payload: &str) -> Result<Value> {
Date::parse_text_layout(payload)
.map(Value::Date)
.ok_or_else(|| self.error_at(&format!("invalid GNUStep date: {payload}")))
}
fn parse_gnustep_base64(&mut self) -> Result<Value> {
self.ignore();
self.scan_until(']');
let payload = self.emit();
if self.next() != Some(']') {
return Err(self.error_at("invalid GNUStep base64 data: expected ']'"));
}
if self.next() != Some('>') {
return Err(self.error_at("invalid GNUStep base64 data: expected '>'"));
}
let filtered: String = payload
.chars()
.filter(|c| BASE64_VALID.contains_char(*c))
.collect();
STD_BASE64
.decode(filtered)
.map(Value::Data)
.map_err(|cause| self.error_at(&format!("invalid GNUStep base64 data: {cause}")))
}
fn parse_hex_data(&mut self) -> Result<Value> {
let mut bytes = Vec::new();
let mut digits: usize = 0;
loop {
let Some(c) = self.next() else {
return Err(self.error_at("unexpected eof in data"));
};
match c {
'>' => {
if !digits.is_multiple_of(2) {
return Err(self.error_at("uneven number of hex digits in data"));
}
self.ignore();
return Ok(Value::Data(bytes));
}
' ' | '\t' | '\n' | '\r' | '\u{2028}' | '\u{2029}' => continue,
_ => {}
}
let Some(nibble) = c.to_digit(16).and_then(|d| u8::try_from(d).ok()) else {
return Err(self.error_at(&format!("unexpected hex digit '{c}'")));
};
if digits.is_multiple_of(2) {
bytes.push(nibble);
} else if let Some(last) = bytes.last_mut() {
*last = (*last << 4) | nibble;
}
digits += 1;
}
}
}
fn escape_char(value: u32) -> char {
char::from_u32(value).unwrap_or('\u{FFFD}')
}
#[cfg(test)]
mod tests {
#![expect(clippy::unwrap_used, reason = "test code: unwrap is the assertion")]
use crate::error::Error;
use crate::format::Format;
use crate::text::parse;
use crate::uid::Uid;
use crate::value::Value;
const INVALID_TEXT: &[(&str, &[u8])] = &[
("Truncated array", b"("),
("Truncated dictionary", b"{a=b;"),
("Truncated dictionary 2", b"{"),
("Unclosed nested array", b"{0=(/"),
("Unclosed dictionary", b"{0=/"),
(
"Broken GNUStep data",
b"(<*I5>,<*I5>,<*I5>,<*I5>,*I16777215>,<*I268435455>,<*I4294967295>,<*I18446744073709551615>,)",
),
("Truncated nested array", b"{0=(((/"),
("Truncated dictionary with comment-like", b"{/"),
("Truncated array with comment-like", b"(/"),
("Truncated array with empty data", b"(<>"),
("Bad Extended Character", "{¬=A;}".as_bytes()),
("Missing Equals in Dictionary", b"{\"A\"A;}"),
("Missing Semicolon in Dictionary", b"{\"A\"=A}"),
("Invalid GNUStep type", b"<*F33>"),
("Invalid GNUStep int", b"(<*I>"),
("Invalid GNUStep date", b"<*D5>"),
("Truncated GNUStep value", b"<*I3"),
("Invalid data", b"<EQ>"),
("Truncated unicode escape", b"\"\\u231"),
("Truncated hex escape", b"\"\\x2"),
("Truncated octal escape", b"\"\\02"),
("Truncated data", b"<33"),
("Uneven data", b"<3>"),
("Truncated block comment", b"/* hello"),
("Truncated quoted string", b"\"hi"),
("Garbage after end of non-string", b"<ab> cde"),
("Broken UTF-16", b"\xFE\xFF\x01"),
("Truncated GNUStep data", b"<"),
("Truncated GNUStep base64 data (missing ])", b"<[33=="),
("Truncated GNUStep base64 data (missing >)", b"<[33==]"),
("Invalid GNUStep base64 data", b"<[3]>"),
("GNUStep extended value with EOF before type", b"<*"),
("GNUStep extended value terminated before type", b"<*>"),
("Empty GNUStep extended value", b"<*I>"),
("Unterminated GNUStep quoted value", b"<*D\"5>"),
("Unterminated GNUStep quoted value (EOF)", b"<*D\""),
("Poorly-terminated GNUStep quoted value", b"<*D\">"),
("Empty GNUStep quoted extended value", b"<*D\"\">"),
];
fn ok(data: &[u8]) -> (Value, Format) {
parse(data).unwrap()
}
fn value_of(data: &[u8]) -> Value {
ok(data).0
}
fn s(v: &str) -> Value {
Value::from(v)
}
fn dict<const N: usize>(entries: [(&str, Value); N]) -> Value {
entries
.into_iter()
.map(|(key, value)| (key.to_owned(), value))
.collect()
}
fn nested_dicts(n: usize, innermost: &str) -> Vec<u8> {
let mut doc = String::new();
for _ in 0..n {
doc.push_str("{a=");
}
doc.push_str(innermost);
for _ in 0..n {
doc.push_str(";}");
}
doc.into_bytes()
}
#[test]
fn all_38_invalid_text_inputs_error() {
assert_eq!(INVALID_TEXT.len(), 38);
for &(name, data) in INVALID_TEXT {
let result = parse(data);
assert!(
matches!(result, Err(Error::Parse { format: "text", .. })),
"{name}: expected the hard text parse error"
);
}
}
#[test]
fn depth_overflow_uses_the_dedicated_variant_at_the_boundary() {
assert!(parse(&nested_dicts(127, "x")).is_ok());
assert!(matches!(
parse(&nested_dicts(128, "x")),
Err(Error::MaxDepthExceeded)
));
assert!(parse(&nested_dicts(127, "{}")).is_ok());
assert!(matches!(
parse(&nested_dicts(128, "{}")),
Err(Error::MaxDepthExceeded)
));
assert!(matches!(
parse(&nested_dicts(178, "x")),
Err(Error::MaxDepthExceeded)
));
}
#[test]
fn format_detection_matches_the_pins() {
assert_eq!(ok(b"(1,2,3,4,5)").1, Format::OpenStep);
assert_eq!(ok(b"<abab>").1, Format::OpenStep);
assert_eq!(ok(b"(1,2,<*I3>)").1, Format::GnuStep);
assert!(parse(&[0x00]).is_err());
}
#[test]
fn empty_whitespace_and_comment_only_documents_are_empty_dictionaries() {
for doc in [
&b""[..],
b" \n\t",
b"// just a comment",
b"/* block */",
b" /* a */ // b",
] {
let (value, format) = ok(doc);
assert_eq!(value, dict([]));
assert_eq!(format, Format::OpenStep);
}
}
#[test]
fn root_may_be_any_value_and_trailing_garbage_depends_on_root_type() {
assert_eq!(value_of(b"Hello"), s("Hello"));
assert_eq!(value_of(b"\"hi there\""), s("hi there"));
assert_eq!(value_of(b"<ab>"), Value::Data(vec![0xAB]));
assert!(matches!(
parse(b"<ab> cde"),
Err(Error::Parse { format: "text", .. })
));
assert!(parse(b"{a=b;} junk").is_err());
assert!(parse(b"(a) junk").is_err());
}
#[test]
fn strings_file_recovery_reparses_the_full_input() {
let legacy = b"\"Key\" = \"Value\";\n\t\t\t\"Key2\" = \"Value2\";";
assert_eq!(
value_of(legacy),
dict([("Key", s("Value")), ("Key2", s("Value2"))])
);
let shortcut = b"\"Key\";\n\t\t\t\"Key2\";";
assert_eq!(
value_of(shortcut),
dict([("Key", s("Key")), ("Key2", s("Key2"))])
);
assert_eq!(value_of(b"a=b;"), dict([("a", s("b"))]));
assert!(parse(b"Hello world").is_err());
assert_eq!(
value_of(b"\"CF$UID\" = \"1024\";"),
Value::Uid(Uid::from(1024))
);
}
#[test]
fn comments_fixture_decodes_and_comments_hide_only_at_skip_sites() {
let doc: &[u8] = b"{\n\t\t\t\t\tA=1 /* A is 1 because it is the first letter */;\n\t\t\t\t\tB=2; // B is 2 because comment-to-end-of-line.\n\t\t\t\t\tC=3;\n\t\t\t\t\tS = /not/a/comment/;\n\t\t\t\t\tS2 = /not*a/*comm*en/t;\n\t\t\t\t}";
assert_eq!(
value_of(doc),
dict([
("A", s("1")),
("B", s("2")),
("C", s("3")),
("S", s("/not/a/comment/")),
("S2", s("/not*a/*comm*en/t")),
])
);
assert!(parse(b"<6/68>").is_err());
assert_eq!(value_of(b"/* /* */ x"), s("x"));
}
#[test]
fn escapes_fixture_decodes_exactly() {
let doc: &[u8] = b"{\n\t\t\t\tW=\"\\w\";\n\t\t\t\tA=\"\\a\";\n\t\t\t\tB=\"\\b\";\n\t\t\t\tV=\"\\v\";\n\t\t\t\tF=\"\\f\";\n\t\t\t\tT=\"\\t\";\n\t\t\t\tR=\"\\r\";\n\t\t\t\tN=\"\\n\";\n\t\t\t\tHex1=\"\\xAB\";\n\t\t\t\tUnicode1=\"\\u00AC\";\n\t\t\t\tUnicode2=\"\\U00AD\";\n\t\t\t\tOctal1=\"\\033\";\n\t\t\t}";
assert_eq!(
value_of(doc),
dict([
("W", s("w")),
("A", s("\u{7}")),
("B", s("\u{8}")),
("V", s("\u{B}")),
("F", s("\u{C}")),
("T", s("\t")),
("R", s("\r")),
("N", s("\n")),
("Hex1", s("\u{AB}")),
("Unicode1", s("\u{AC}")),
("Unicode2", s("\u{AD}")),
("Octal1", s("\u{1B}")),
])
);
}
#[test]
fn escape_digit_scanners_are_greedy_with_early_stop() {
assert_eq!(
value_of(b"\"\\x1\\u02\\U003\\4\\0057\""),
s("\u{1}\u{2}\u{3}\u{4}\u{5}7")
);
assert_eq!(value_of(b"\"\\xaB\\uCdEf\""), s("\u{AB}\u{CDEF}"));
assert_eq!(value_of(b"\"\\xg\""), s("\u{0}g"));
assert_eq!(value_of(b"\"\\ug\""), s("\u{0}g"));
}
#[test]
fn surrogate_escapes_never_recombine() {
assert_eq!(value_of(b"\"\\Ud83d\\Ude00\""), s("\u{FFFD}\u{FFFD}"));
assert_eq!(value_of(b"\"\\Ud800\""), s("\u{FFFD}"));
}
#[test]
fn quoted_string_content_rules() {
assert_eq!(value_of(b"\"\""), s(""));
assert_eq!(value_of(b"\"a\nb\tc\0d\""), s("a\nb\tc\0d"));
assert_eq!(value_of(b"\"\\w\\q\""), s("wq"));
assert!(parse(b"\"abc\\").is_err());
}
#[test]
fn unquoted_strings_stop_exactly_at_gs_quotable_members() {
assert_eq!(value_of("世界".as_bytes()), s("世界"));
assert_eq!(value_of(b"a-zA-Z0-9$:./_"), s("a-zA-Z0-9$:./_"));
let (value, _) = ok(b"(1 2)");
assert_eq!(value, Value::Array(vec![s("1"), s("2")]));
assert!(parse("{¬=A;}".as_bytes()).is_err());
}
#[test]
fn dictionary_shorthand_duplicates_and_errors() {
assert_eq!(value_of(b"{foo;}"), dict([("foo", s("foo"))]));
assert_eq!(value_of(b"{Name=Dustin;}"), dict([("Name", s("Dustin"))]));
assert_eq!(value_of(b"{\"\"=Hello;}"), dict([("", s("Hello"))]));
assert_eq!(
value_of(b"{\"key\" = \"value\"; \"key\" = \"second value\";}"),
dict([("key", s("second value"))])
);
assert!(parse(b"{a=b}").is_err());
assert!(parse(b"{a b;}").is_err());
}
#[test]
fn array_comma_tolerance_and_empty_string_dropping() {
assert_eq!(value_of(b"()"), Value::Array(vec![]));
assert_eq!(value_of(b"(,a,,b,,)"), Value::Array(vec![s("a"), s("b")]));
assert_eq!(value_of(b"(a,b,)"), Value::Array(vec![s("a"), s("b")]));
assert_eq!(value_of(b"(A,,,\"\",)"), Value::Array(vec![s("A")]));
assert_eq!(
value_of(b"(<>,{},())"),
Value::Array(vec![Value::Data(vec![]), dict([]), Value::Array(vec![])])
);
}
#[test]
fn hex_data_grouping_whitespace_and_errors() {
assert_eq!(value_of(b"<>"), Value::Data(vec![]));
assert_eq!(value_of(b"<68656c6c 6f>"), Value::Data(b"hello".to_vec()));
assert_eq!(
value_of("<6\t8 65\r6c\n6c 6\u{2028}f\u{2029}>".as_bytes()),
Value::Data(b"hello".to_vec())
);
assert!(parse(b"<3\x0B3>").is_err());
let mut blob = String::from("<");
blob.push_str(&"00".repeat(256));
blob.push_str("01>");
let mut expected = vec![0u8; 256];
expected.push(1);
assert_eq!(value_of(blob.as_bytes()), Value::Data(expected));
}
#[test]
fn gnustep_integer_matrix() {
assert_eq!(value_of(b"<*I5>"), Value::from(5u64));
assert_eq!(value_of(b"<*I-5>"), Value::from(-5i64));
assert_eq!(value_of(b"<*I\"5>"), Value::from(5u64));
assert_eq!(value_of(b"<*I5\">"), Value::from(5u64));
assert_eq!(value_of(b"<*I\"1048576\">"), Value::from(1_048_576u64));
assert_eq!(value_of(b"<*I007>"), Value::from(7u64));
assert_eq!(value_of(b"<*I18446744073709551615>"), Value::from(u64::MAX));
assert_eq!(value_of(b"<*I-9223372036854775808>"), Value::from(i64::MIN));
for bad in [
&b"<*I+5>"[..],
b"<*I\"\">",
b"<*I1\"2>",
b"<*I18446744073709551616>",
b"<*I-9223372036854775809>",
b"<*I5x>",
] {
assert!(parse(bad).is_err(), "{}", String::from_utf8_lossy(bad));
}
}
#[test]
fn gnustep_real_follows_c_style_parse() {
assert_eq!(value_of(b"<*R1.5>"), Value::from(1.5));
assert_eq!(value_of(b"<*R-0.5>"), Value::from(-0.5));
let nan = value_of(b"<*RNaN>");
assert!(nan.as_real().unwrap().is_nan());
assert_eq!(value_of(b"<*R+Inf>"), Value::from(f64::INFINITY));
assert_eq!(value_of(b"<*R-Inf>"), Value::from(f64::NEG_INFINITY));
assert_eq!(value_of(b"<*Rinfinity>"), Value::from(f64::INFINITY));
assert!(parse(b"<*R1e999>").is_err());
assert_eq!(value_of(b"<*R1e-999>"), Value::from(0.0));
assert_eq!(value_of(b"<*R1_000.5>"), Value::from(1000.5));
assert_eq!(value_of(b"<*R0x1p-2>"), Value::from(0.25));
for bad in [&b"<*R+NaN>"[..], b"<*R-nan>", b"<*R>", b"<*Rx>"] {
assert!(parse(bad).is_err(), "{}", String::from_utf8_lossy(bad));
}
}
#[test]
fn gnustep_boolean_is_first_byte_y() {
assert_eq!(value_of(b"<*BY>"), Value::from(true));
assert_eq!(value_of(b"<*BYES>"), Value::from(true));
assert_eq!(value_of(b"<*BN>"), Value::from(false));
assert_eq!(value_of(b"<*BZ>"), Value::from(false));
assert_eq!(value_of(b"<*Bnope>"), Value::from(false));
assert_eq!(value_of(b"<*B\"Y>"), Value::from(true));
assert!(parse(b"<*B\">").is_err());
assert!(parse(b"<*B\"\">").is_err());
}
#[test]
fn gnustep_date_follows_the_text_layout() {
let expected = crate::date::Date::parse_text_layout("2013-11-27 00:34:00 +0000").unwrap();
assert_eq!(
value_of(b"<*D2013-11-27 00:34:00 +0000>"),
Value::Date(expected)
);
assert!(parse(b"<*D2013-11-27 0:34:00 +0000>").is_ok());
assert!(parse(b"<*D2013-11-27 00:34:00.25 +0000>").is_ok());
for bad in [
&b"<*D2013-11-27T00:34:00 +0000>"[..],
b"<*D2013-11-27 00:34:00 Z>",
b"<*D2013-11-27 00:34:00 +00:00>",
b"<*D2013-02-30 00:34:00 +0000>",
] {
assert!(parse(bad).is_err(), "{}", String::from_utf8_lossy(bad));
}
}
#[test]
fn quoted_gnustep_values_fixture_decodes() {
let (value, format) = ok(b"(<*I\"1048576\">, <*I\"1234>, <*B\"Y>)");
assert_eq!(format, Format::GnuStep);
assert_eq!(
value,
Value::Array(vec![
Value::from(1_048_576u64),
Value::from(1234u64),
Value::from(true),
])
);
}
#[test]
fn gnustep_base64_filters_then_decodes() {
let (value, format) = ok(b"(<[aGVs^^bG8=]>,<[ a G V s b G 8 = ]>)");
assert_eq!(format, Format::GnuStep);
assert_eq!(
value,
Value::Array(vec![
Value::Data(b"hello".to_vec()),
Value::Data(b"hello".to_vec()),
])
);
assert_eq!(value_of(b"<[aGVsbG8=]>"), Value::Data(b"hello".to_vec()));
assert_eq!(value_of(b"<[]>"), Value::Data(vec![]));
assert_eq!(value_of(b"<[33==]>"), Value::Data(vec![0xDF]));
assert!(parse(b"<[3]>").is_err());
assert!(parse(b"<[aGVsbG8]>").is_err());
}
#[test]
fn uid_collapse_is_lax_only_while_the_document_is_pure_openstep() {
assert_eq!(value_of(b"{CF$UID=1024;}"), Value::Uid(Uid::from(1024)));
assert_eq!(value_of(b"{CF$UID=<*I1024>;}"), Value::Uid(Uid::from(1024)));
assert_eq!(
value_of(b"({CF$UID=255;},<*I1>)"),
Value::Array(vec![Value::Uid(Uid::from(255)), Value::from(1u64)])
);
assert_eq!(
value_of(b"(<*I1>,{CF$UID=255;})"),
Value::Array(vec![Value::from(1u64), dict([("CF$UID", s("255"))])])
);
assert_eq!(
value_of(b"(<*I1>,{CF$UID=<*I255>;})"),
Value::Array(vec![Value::from(1u64), Value::Uid(Uid::from(255))])
);
assert_eq!(
value_of(b"{CF$UID=1;CF$UID=2;}"),
dict([("CF$UID", s("2"))])
);
assert_eq!(value_of(b"{CF$UID=x;}"), dict([("CF$UID", s("x"))]));
}
#[test]
fn uid_fixtures_decode_in_both_dialects() {
let expected = Value::Array(
[255u64, 65535, 16_777_215, 4_294_967_295, 1_099_511_627_775]
.into_iter()
.map(|n| Value::Uid(Uid::from(n)))
.collect(),
);
let os = b"({CF$UID=255;},{CF$UID=65535;},{CF$UID=16777215;},{CF$UID=4294967295;},{CF$UID=1099511627775;},)";
let gs = b"({CF$UID=<*I255>;},{CF$UID=<*I65535>;},{CF$UID=<*I16777215>;},{CF$UID=<*I4294967295>;},{CF$UID=<*I1099511627775>;},)";
assert_eq!(ok(os), (expected.clone(), Format::OpenStep));
assert_eq!(ok(gs), (expected, Format::GnuStep));
}
#[test]
fn encoding_sniff_handles_all_six_pinned_fixtures() {
let hello = s("Hello");
assert_eq!(value_of(b"\xEF\xBB\xBFHello"), hello);
assert_eq!(value_of(b"\xFF\xFEH\x00e\x00l\x00l\x00o\x00"), hello);
assert_eq!(value_of(b"\xFE\xFF\x00H\x00e\x00l\x00l\x00o"), hello);
assert_eq!(value_of(b"H\x00e\x00l\x00l\x00o\x00"), hello);
assert_eq!(value_of(b"\x00H\x00e\x00l\x00l\x00o"), hello);
let high: &[u8] = &[
0, b'"', 0, b'H', 0, b'e', 0, b'l', 0, b'l', 0, b'o', 0, b',', 0, b' ', 0x4E, 0x16,
0x75, 0x4C, 0, b'"',
];
assert_eq!(value_of(high), s("Hello, 世界"));
}
#[test]
fn utf16_surrogates_recombine_but_unpaired_become_replacement() {
let paired: &[u8] = &[0xFE, 0xFF, 0x00, b'"', 0xD8, 0x3D, 0xDE, 0x00, 0x00, b'"'];
assert_eq!(value_of(paired), s("\u{1F600}"));
let unpaired: &[u8] = &[0xFE, 0xFF, 0x00, b'"', 0xD8, 0x3D, 0x00, b'"'];
assert_eq!(value_of(unpaired), s("\u{FFFD}"));
}
#[test]
fn encoding_sniff_edge_cases() {
assert!(parse(&[0x00, 0x00]).is_err());
assert_eq!(value_of(&[0xFE, 0xFF]), dict([]));
assert_eq!(value_of(&[0xFF, 0xFE]), dict([]));
assert!(matches!(
parse(&[0xFF, 0xFE, 0x68]),
Err(Error::Parse { format: "text", .. })
));
}
#[test]
fn invalid_utf8_decodes_as_latin1_per_ruling_r18() {
assert_eq!(value_of(b"\"\xC8\""), s("\u{C8}"));
assert_eq!(value_of(b"\"\xC3\x88\xC8\""), s("\u{C8}\u{C8}"));
assert!(parse(b"{\xAC=A;}").is_err());
}
}