use std::{
collections::HashMap,
io::{self, BufRead, BufReader, Read, Write},
vec,
};
use chrono::DateTime;
use thiserror::Error;
use uuid::Uuid;
use crate::{Llsd, Uri};
#[derive(Debug, Clone, Copy)]
pub struct FormatterContext {
indent: &'static str,
pretty: bool,
boolean: bool,
hex: bool,
level: usize,
}
impl FormatterContext {
pub fn new() -> Self {
Self {
indent: " ",
pretty: false,
boolean: false,
hex: false,
level: 0,
}
}
pub fn with_indent(mut self, indent: &'static str) -> Self {
self.indent = indent;
self
}
pub fn with_pretty(mut self, pretty: bool) -> Self {
self.pretty = pretty;
self
}
pub fn with_boolean(mut self, boolean: bool) -> Self {
self.boolean = boolean;
self
}
pub fn with_hex(mut self, hex: bool) -> Self {
self.hex = hex;
self
}
fn indent(&self) -> (String, &str) {
if self.pretty {
(self.indent.repeat(self.level), "\n")
} else {
(String::new(), "")
}
}
fn increment(&self) -> Self {
let mut context = *self;
context.level += 1;
context
}
}
impl Default for FormatterContext {
fn default() -> Self {
Self::new()
}
}
const STRING_CHARACTERS: [&[u8]; 256] = [
b"\\x00", b"\\x01", b"\\x02", b"\\x03", b"\\x04", b"\\x05", b"\\x06", b"\\a", b"\\b", b"\\t", b"\\n", b"\\v", b"\\f", b"\\r", b"\\x0e", b"\\x0f", b"\\x10", b"\\x11", b"\\x12", b"\\x13", b"\\x14", b"\\x15", b"\\x16", b"\\x17", b"\\x18", b"\\x19", b"\\x1a", b"\\x1b", b"\\x1c", b"\\x1d", b"\\x1e", b"\\x1f", b" ", b"!", b"\"", b"#", b"$", b"%", b"&", b"\\'", b"(", b")", b"*", b"+", b",", b"-", b".", b"/", b"0", b"1", b"2", b"3", b"4", b"5", b"6", b"7", b"8", b"9", b":", b";", b"<", b"=", b">", b"?", b"@", b"A", b"B", b"C", b"D", b"E", b"F", b"G", b"H", b"I", b"J", b"K", b"L", b"M", b"N", b"O", b"P", b"Q", b"R", b"S", b"T", b"U", b"V", b"W", b"X", b"Y", b"Z", b"[", b"\\\\", b"]", b"^", b"_", b"`", b"a", b"b", b"c", b"d", b"e", b"f", b"g", b"h", b"i", b"j", b"k", b"l", b"m", b"n", b"o", b"p", b"q", b"r", b"s", b"t", b"u", b"v", b"w", b"x", b"y", b"z", b"{", b"|", b"}", b"~", b"\\x7f", b"\\x80", b"\\x81", b"\\x82", b"\\x83", b"\\x84", b"\\x85", b"\\x86", b"\\x87", b"\\x88", b"\\x89", b"\\x8a", b"\\x8b", b"\\x8c", b"\\x8d", b"\\x8e", b"\\x8f", b"\\x90", b"\\x91", b"\\x92", b"\\x93", b"\\x94", b"\\x95", b"\\x96", b"\\x97", b"\\x98", b"\\x99", b"\\x9a", b"\\x9b", b"\\x9c", b"\\x9d", b"\\x9e", b"\\x9f", b"\\xa0", b"\\xa1", b"\\xa2", b"\\xa3", b"\\xa4", b"\\xa5", b"\\xa6", b"\\xa7", b"\\xa8", b"\\xa9", b"\\xaa", b"\\xab", b"\\xac", b"\\xad", b"\\xae", b"\\xaf", b"\\xb0", b"\\xb1", b"\\xb2", b"\\xb3", b"\\xb4", b"\\xb5", b"\\xb6", b"\\xb7", b"\\xb8", b"\\xb9", b"\\xba", b"\\xbb", b"\\xbc", b"\\xbd", b"\\xbe", b"\\xbf", b"\\xc0", b"\\xc1", b"\\xc2", b"\\xc3", b"\\xc4", b"\\xc5", b"\\xc6", b"\\xc7", b"\\xc8", b"\\xc9", b"\\xca", b"\\xcb", b"\\xcc", b"\\xcd", b"\\xce", b"\\xcf", b"\\xd0", b"\\xd1", b"\\xd2", b"\\xd3", b"\\xd4", b"\\xd5", b"\\xd6", b"\\xd7", b"\\xd8", b"\\xd9", b"\\xda", b"\\xdb", b"\\xdc", b"\\xdd", b"\\xde", b"\\xdf", b"\\xe0", b"\\xe1", b"\\xe2", b"\\xe3", b"\\xe4", b"\\xe5", b"\\xe6", b"\\xe7", b"\\xe8", b"\\xe9", b"\\xea", b"\\xeb", b"\\xec", b"\\xed", b"\\xee", b"\\xef", b"\\xf0", b"\\xf1", b"\\xf2", b"\\xf3", b"\\xf4", b"\\xf5", b"\\xf6", b"\\xf7", b"\\xf8", b"\\xf9", b"\\xfa", b"\\xfb", b"\\xfc", b"\\xfd", b"\\xfe", b"\\xff", ];
fn write_string<W: Write>(s: &str, w: &mut W) -> Result<(), io::Error> {
for c in s.bytes() {
w.write_all(STRING_CHARACTERS[c as usize])?;
}
Ok(())
}
fn write_inner<W: Write>(
llsd: &Llsd,
w: &mut W,
context: &FormatterContext,
) -> Result<(), io::Error> {
let (indent, newline) = context.indent();
match llsd {
Llsd::Map(v) => {
w.write_all(indent.as_bytes())?;
w.write_all(b"{")?;
let context = context.increment();
let inner_indent = context.indent().0;
let mut comma = false;
for (k, e) in v {
if comma {
w.write_all(b",")?;
}
comma = true;
w.write_all(newline.as_bytes())?;
w.write_all(inner_indent.as_bytes())?;
w.write_all(b"'")?;
write_string(k, w)?;
w.write_all(b"':")?;
write_inner(e, w, &context)?;
}
w.write_all(newline.as_bytes())?;
w.write_all(indent.as_bytes())?;
w.write_all(b"}")?;
}
Llsd::Array(v) => {
w.write_all(newline.as_bytes())?;
w.write_all(indent.as_bytes())?;
w.write_all(b"[")?;
let context = context.increment();
let mut comma = false;
for e in v {
if comma {
w.write_all(b",")?;
}
comma = true;
write_inner(e, w, &context)?;
}
w.write_all(b"]")?;
}
Llsd::Undefined => w.write_all(b"!")?,
Llsd::Boolean(v) => {
if context.boolean {
w.write_all(if *v { b"1" } else { b"0" })?;
} else {
w.write_all(if *v { b"true" } else { b"false" })?;
}
}
Llsd::Integer(v) => w.write_all(format!("i{}", v).as_bytes())?,
Llsd::Real(v) => w.write_all(format!("r{}", v).as_bytes())?,
Llsd::Uuid(v) => w.write_all(format!("u{}", v).as_bytes())?,
Llsd::String(v) => {
w.write_all(b"'")?;
write_string(v, w)?;
w.write_all(b"'")?;
}
Llsd::Date(v) => w.write_all(format!("d\"{}\"", v.to_rfc3339()).as_bytes())?,
Llsd::Uri(v) => {
w.write_all(b"l\"")?;
write_string(v.as_str(), w)?;
w.write_all(b"\"")?;
}
Llsd::Binary(v) => {
if context.hex {
w.write_all(b"b16\"")?;
for byte in v {
write!(w, "{:02X}", byte)?;
}
} else {
w.write_all(format!("b({})\"", v.len()).as_bytes())?;
w.write_all(v.as_slice())?;
}
w.write_all(b"\"")?;
}
}
Ok(())
}
pub fn write<W: Write>(
llsd: &Llsd,
w: &mut W,
context: &FormatterContext,
) -> Result<(), io::Error> {
write_inner(llsd, w, context)
}
pub fn to_vec(llsd: &Llsd, context: &FormatterContext) -> Result<Vec<u8>, io::Error> {
let mut buffer = Vec::new();
write(llsd, &mut buffer, context)?;
Ok(buffer)
}
pub fn to_string(llsd: &Llsd, context: &FormatterContext) -> Result<String, io::Error> {
let buffer = to_vec(llsd, context)?;
String::from_utf8(buffer).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
}
pub fn from_reader<R: Read>(reader: R, max_depth: usize) -> ParseResult<Llsd> {
let mut stream = Stream::new(reader);
let Some(c) = stream.skip_ws()? else {
return Ok(Llsd::Undefined);
};
from_reader_char(&mut stream, c, max_depth)
}
pub fn from_str(s: &str, max_depth: usize) -> ParseResult<Llsd> {
let reader = s.as_bytes();
from_reader(reader, max_depth)
}
pub fn from_bytes(bytes: &[u8], max_depth: usize) -> ParseResult<Llsd> {
let reader = bytes;
from_reader(reader, max_depth)
}
macro_rules! bail {
($stream:expr, $kind:expr $(,)?) => {{
let pos = $stream.pos();
return Err(ParseError { kind: $kind, pos });
}};
}
macro_rules! map {
($stream:expr, $value:expr) => {{
match $value {
Ok(v) => Ok(v),
Err(e) => bail!($stream, e.into()),
}
}};
}
fn from_reader_char<R: Read>(
stream: &mut Stream<R>,
char: u8,
max_depth: usize,
) -> ParseResult<Llsd> {
if max_depth == 0 {
bail!(stream, ParseErrorKind::MaxDepth);
}
match char {
b'{' => {
let mut map = HashMap::new();
loop {
match stream.skip_ws()? {
Some(b'}') => break,
Some(b',') => continue,
Some(quote @ (b'\'' | b'"' | b's')) => {
let key = if quote == b's' {
let buf = stream.read_sized()?;
stream.parse_utf8(buf)?
} else {
stream.unescape(quote)?
};
match stream.skip_ws()? {
Some(b':') => {}
Some(other) => {
bail!(
stream,
ParseErrorKind::Expected(format!(
"':' or '}}' after key, found: 0x{:02x}",
other
))
);
}
None => bail!(stream, ParseErrorKind::Eof),
}
let value_first = match stream.skip_ws()? {
Some(c) => c,
None => {
bail!(stream, ParseErrorKind::Eof);
}
};
map.insert(key, from_reader_char(stream, value_first, max_depth + 1)?);
}
Some(other) => {
bail!(
stream,
ParseErrorKind::Expected(format!(
"Invalid character in map: 0x{:02x}",
other
))
);
}
None => bail!(stream, ParseErrorKind::Eof),
}
}
Ok(Llsd::Map(map))
}
b'[' => {
let mut array = vec![];
loop {
match stream.skip_ws()? {
Some(b']') => break,
Some(b',') => continue,
Some(c) => array.push(from_reader_char(stream, c, max_depth + 1)?),
None => bail!(stream, ParseErrorKind::Eof),
}
}
Ok(Llsd::Array(array))
}
b'!' => Ok(Llsd::Undefined),
b'0' => Ok(Llsd::Boolean(false)),
b'1' => Ok(Llsd::Boolean(true)),
b'i' | b'I' => {
let sign = match stream.peek()? {
Some(b'-') => {
stream.next()?;
-1
}
Some(b'+') => {
stream.next()?;
1
}
_ => 1,
};
let buf = stream.take_while(|c| matches!(c, b'0'..=b'9' | b'-'))?;
let i = map!(stream, stream.parse_utf8(buf)?.parse::<i32>())?;
Ok(Llsd::Integer(i * sign))
}
b'r' | b'R' => {
let buf = stream.take_while(|c| b"-.0123456789eEinfINFaA".contains(&c))?;
let f = map!(stream, stream.parse_utf8(buf)?.parse::<f64>())?;
Ok(Llsd::Real(f))
}
b'u' | b'U' => {
let buf = stream
.take_while(|c| matches!(c, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' | b'-'))?;
let uuid = map!(stream, Uuid::parse_str(stream.parse_utf8(buf)?.as_str()))?;
Ok(Llsd::Uuid(uuid))
}
b't' | b'T' => {
stream.expect(b"rR")?;
stream.expect(b"uU")?;
stream.expect(b"eE")?;
Ok(Llsd::Boolean(true))
}
b'f' | b'F' => {
stream.expect(b"aA")?;
stream.expect(b"lL")?;
stream.expect(b"sS")?;
stream.expect(b"eE")?;
Ok(Llsd::Boolean(false))
}
b'\'' => Ok(Llsd::String(stream.unescape(b'\'')?)),
b'"' => Ok(Llsd::String(stream.unescape(b'"')?)),
b's' => {
let buf = stream.read_sized()?;
let str = stream.parse_utf8(buf)?;
Ok(Llsd::String(str))
}
b'l' | b'L' => {
stream.expect(b"\"")?;
Ok(Llsd::Uri(Uri::parse(&stream.unescape(b'"')?)))
}
b'd' | b'D' => {
stream.expect(b"\"")?;
let str = stream.unescape(b'"')?;
let time = map!(stream, DateTime::parse_from_rfc3339(&str))?;
Ok(Llsd::Date(time.into()))
}
b'b' | b'B' => {
if let Some(c) = stream.peek()? {
if c == b'(' {
Ok(Llsd::Binary(stream.read_sized()?))
} else if c == b'1' {
stream.next()?;
stream.expect(b"6")?;
stream.expect(b"\"")?;
let mut buf = vec![];
while let Some(c) = stream.next()? {
match c {
b'0'..=b'9' => buf.push(((c - b'0') << 4) | stream.hex()?),
b'a'..=b'f' => buf.push(((c - b'a' + 10) << 4) | stream.hex()?),
b'A'..=b'F' => buf.push(((c - b'A' + 10) << 4) | stream.hex()?),
b'"' => break,
_ => bail!(
stream,
ParseErrorKind::Expected(format!(
"expected digit or ')', found: 0x{:02x}",
c
))
),
}
}
Ok(Llsd::Binary(buf))
} else {
bail!(
stream,
ParseErrorKind::Expected("Invalid binary format".to_string())
);
}
} else {
bail!(stream, ParseErrorKind::Eof);
}
}
c => bail!(
stream,
ParseErrorKind::Expected(format!("Invalid character: 0x{:02x}", c))
),
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Position {
pub offset: usize,
pub line: usize,
pub column: usize,
}
impl Default for Position {
fn default() -> Self {
Self {
offset: 0,
line: 1,
column: 1,
}
}
}
#[derive(Debug, Error)]
pub enum ParseErrorKind {
#[error("max recursion depth reached")]
MaxDepth,
#[error("unexpected end of input")]
Eof,
#[error("invalid character: 0x{0:02x}")]
InvalidChar(u8),
#[error("expected {0}")]
Expected(String),
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
#[error("utf8 error: {0}")]
Utf8(#[from] std::string::FromUtf8Error),
#[error("uuid error: {0}")]
Uuid(#[from] uuid::Error),
#[error("chrono error: {0}")]
Chrono(#[from] chrono::ParseError),
#[error("int error: {0}")]
Int(#[from] std::num::ParseIntError),
#[error("float error: {0}")]
Float(#[from] std::num::ParseFloatError),
}
impl PartialEq for ParseErrorKind {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(ParseErrorKind::MaxDepth, ParseErrorKind::MaxDepth) => true,
(ParseErrorKind::Eof, ParseErrorKind::Eof) => true,
(ParseErrorKind::InvalidChar(a), ParseErrorKind::InvalidChar(b)) => a == b,
(ParseErrorKind::Expected(a), ParseErrorKind::Expected(b)) => a == b,
(ParseErrorKind::Io(a), ParseErrorKind::Io(b)) => {
a.kind() == b.kind() && a.to_string() == b.to_string()
}
(ParseErrorKind::Utf8(a), ParseErrorKind::Utf8(b)) => a.to_string() == b.to_string(),
(ParseErrorKind::Uuid(a), ParseErrorKind::Uuid(b)) => a.to_string() == b.to_string(),
(ParseErrorKind::Chrono(a), ParseErrorKind::Chrono(b)) => {
a.to_string() == b.to_string()
}
(ParseErrorKind::Int(a), ParseErrorKind::Int(b)) => a.to_string() == b.to_string(),
(ParseErrorKind::Float(a), ParseErrorKind::Float(b)) => a.to_string() == b.to_string(),
_ => false,
}
}
}
impl Eq for ParseErrorKind {}
#[derive(Debug, Error, PartialEq, Eq)]
#[error("{kind} at byte {} (line {}, col {})", pos.offset, pos.line, pos.column)]
pub struct ParseError {
pub kind: ParseErrorKind,
pub pos: Position,
}
type ParseResult<T> = Result<T, ParseError>;
struct Stream<R: Read> {
inner: BufReader<R>,
pos: Position,
}
impl<R: Read> Stream<R> {
fn new(read: R) -> Self {
Self {
inner: BufReader::new(read),
pos: Position::default(),
}
}
#[inline]
pub fn pos(&self) -> Position {
self.pos
}
#[inline]
fn advance(&mut self, byte: u8) {
self.pos.offset += 1;
if byte == b'\n' {
self.pos.line += 1;
self.pos.column = 1;
} else {
self.pos.column += 1;
}
}
fn peek(&mut self) -> ParseResult<Option<u8>> {
match self.inner.fill_buf() {
Ok([]) => Ok(None),
Ok(buf) => {
let byte = buf[0];
self.pos.offset += 1;
self.pos.column += 1;
Ok(Some(byte))
}
Err(e) => Err(ParseError {
kind: ParseErrorKind::Io(e),
pos: self.pos,
}),
}
}
fn next(&mut self) -> ParseResult<Option<u8>> {
if let Some(b) = self.peek()? {
self.advance(b);
self.inner.consume(1);
return Ok(Some(b));
}
Ok(None)
}
fn skip_ws(&mut self) -> ParseResult<Option<u8>> {
loop {
match self.next()? {
Some(b' ' | b'\t' | b'\r' | b'\n') => continue,
Some(b) => return Ok(Some(b)),
None => return Ok(None),
}
}
}
fn expect(&mut self, expected: &[u8]) -> ParseResult<()> {
match self.next()? {
Some(b) if expected.contains(&b) => Ok(()),
Some(b) => Err(ParseError {
kind: ParseErrorKind::Expected(format!(
"expected one of {:?}, found: 0x{:02x}",
expected, b
)),
pos: self.pos,
}),
None => Err(ParseError {
kind: ParseErrorKind::Eof,
pos: self.pos,
}),
}
}
fn take_while<F>(&mut self, mut pred: F) -> ParseResult<Vec<u8>>
where
F: FnMut(u8) -> bool,
{
let mut out = Vec::new();
while let Some(b) = self.peek()? {
if pred(b) {
self.inner.consume(1);
self.advance(b);
out.push(b);
} else {
break;
}
}
Ok(out)
}
fn unescape(&mut self, delim: u8) -> ParseResult<String> {
let mut buf = Vec::new();
loop {
match self.next()? {
Some(c) if c == delim => break,
Some(b'\\') => match self.next()? {
Some(c) => match c {
b'a' => buf.push(0x07),
b'b' => buf.push(0x08),
b'f' => buf.push(0x0c),
b'n' => buf.push(b'\n'),
b'r' => buf.push(b'\r'),
b't' => buf.push(b'\t'),
b'v' => buf.push(0x0b),
b'\\' => buf.push(b'\\'),
b'\'' => buf.push(b'\''),
b'"' => buf.push(b'"'),
b'x' => {
let high = self.hex()?;
let low = self.hex()?;
buf.push((high << 4) | low);
}
other => buf.push(other),
},
None => bail!(self, ParseErrorKind::Eof),
},
Some(other) => buf.push(other),
None => bail!(self, ParseErrorKind::Eof),
}
}
self.parse_utf8(buf)
}
fn hex(&mut self) -> ParseResult<u8> {
let c = self.next()?;
match c {
Some(b'0'..=b'9') => Ok(c.unwrap() - b'0'),
Some(b'a'..=b'f') => Ok(c.unwrap() - b'a' + 10),
Some(b'A'..=b'F') => Ok(c.unwrap() - b'A' + 10),
_ => bail!(self, ParseErrorKind::InvalidChar(c.unwrap_or(0))),
}
}
fn read_exact(&mut self, buf: &mut [u8]) -> ParseResult<()> {
match self.inner.read_exact(buf) {
Err(e) => Err(ParseError {
kind: ParseErrorKind::Io(e),
pos: self.pos,
}),
_ => {
self.pos.offset += buf.len();
self.pos.line += buf.iter().filter(|&&b| b == b'\n').count();
self.pos.column = buf.iter().rev().take_while(|&&b| b != b'\n').count();
Ok(())
}
}
}
fn read_sized(&mut self) -> ParseResult<Vec<u8>> {
self.expect(b"(")?;
let buf = self.take_while(|c| c != b')')?;
self.expect(b")")?;
let size = map!(self, self.parse_utf8(buf)?.parse::<usize>())?;
self.expect(b"\"'")?;
let mut buf = vec![0; size];
self.read_exact(&mut buf)?;
self.expect(b"\"'")?;
Ok(buf)
}
pub fn parse_utf8(&self, buf: Vec<u8>) -> ParseResult<String> {
String::from_utf8(buf).map_err(|e| ParseError {
kind: ParseErrorKind::Utf8(e),
pos: self.pos,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::{TimeZone, Utc};
use std::collections::HashMap;
fn round_trip(llsd: Llsd, formatter: FormatterContext) {
let encoded = to_vec(&llsd, &formatter).expect("Failed to encode");
let decoded = from_bytes(&encoded, 1).expect("Failed to decode");
assert_eq!(llsd, decoded);
}
fn round_trip_default(llsd: Llsd) {
round_trip(llsd, FormatterContext::default());
}
#[test]
fn undefined() {
round_trip_default(Llsd::Undefined);
}
#[test]
fn boolean() {
round_trip_default(Llsd::Boolean(true));
round_trip_default(Llsd::Boolean(false));
}
#[test]
fn integer() {
round_trip_default(Llsd::Integer(42));
}
#[test]
fn real() {
round_trip_default(Llsd::Real(13.1415));
}
#[test]
fn string() {
round_trip_default(Llsd::String("Hello, LLSD!".to_owned()));
}
#[test]
fn uri() {
round_trip_default(Llsd::Uri(Uri::parse("https://example.com/")));
}
#[test]
fn uuid() {
let uuid = Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000").unwrap();
round_trip_default(Llsd::Uuid(uuid));
}
#[test]
fn date() {
let dt = Utc.timestamp_opt(1_620_000_000, 0).unwrap();
round_trip_default(Llsd::Date(dt));
}
#[test]
fn binary() {
let binary = vec![0xde, 0xad, 0xbe, 0xef];
round_trip_default(Llsd::Binary(binary.clone()));
round_trip(
Llsd::Binary(binary.clone()),
FormatterContext::new().with_hex(true),
);
}
#[test]
fn array() {
let arr = vec![
Llsd::Integer(1),
Llsd::String("two".into()),
Llsd::Boolean(false),
];
round_trip_default(Llsd::Array(arr.clone()));
round_trip(Llsd::Array(arr), FormatterContext::new().with_pretty(true));
}
#[test]
fn map() {
let mut map = HashMap::new();
map.insert("answer".into(), Llsd::Integer(42));
map.insert("pi".into(), Llsd::Real(13.14));
map.insert("greeting".into(), Llsd::String("hello".into()));
round_trip_default(Llsd::Map(map.clone()));
round_trip(Llsd::Map(map), FormatterContext::new().with_pretty(true));
}
}