use super::Tag;
use byteorder::{BigEndian, ReadBytesExt};
use std::{convert::TryFrom, io::Read, str};
pub type Name = Option<String>;
#[derive(Debug, PartialEq)]
pub enum Value {
CompoundEnd,
Byte(Name, i8),
Short(Name, i16),
Int(Name, i32),
Long(Name, i64),
Float(Name, f32),
Double(Name, f64),
ByteArray(Name, Vec<i8>),
String(Name, String),
List(Name, Tag, i32),
ListEnd,
Compound(Name),
IntArray(Name, Vec<i32>),
LongArray(Name, Vec<i64>),
}
#[derive(Debug, Clone)]
pub struct Error {
msg: String,
kind: ErrorKind,
}
#[derive(Debug, Clone)]
#[non_exhaustive]
pub enum ErrorKind {
Other,
Eof,
UnexpectedEof,
InvalidTag,
Nonunicode(Vec<u8>),
}
impl Error {
pub fn kind(&self) -> &ErrorKind {
&self.kind
}
pub fn is_eof(&self) -> bool {
matches!(self.kind, ErrorKind::Eof)
}
fn bespoke(msg: impl Into<String>) -> Self {
Self {
msg: msg.into(),
kind: ErrorKind::Other,
}
}
fn invalid_tag(t: u8) -> Self {
Self {
msg: format!("invalid tag: {}", t),
kind: ErrorKind::InvalidTag,
}
}
fn nonunicode(d: Vec<u8>) -> Self {
Self {
msg: format!(
"invalid string, non-unicode: {}",
String::from_utf8_lossy(&d),
),
kind: ErrorKind::Nonunicode(d),
}
}
fn eof() -> Self {
Self {
msg: "EOF".into(),
kind: ErrorKind::Eof,
}
}
}
impl std::error::Error for Error {}
impl std::fmt::Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> {
write!(f, "{}", self.msg)
}
}
impl From<std::io::Error> for Error {
fn from(e: std::io::Error) -> Self {
match e.kind() {
std::io::ErrorKind::UnexpectedEof => Self {
msg: e.to_string(),
kind: ErrorKind::UnexpectedEof,
},
_ => Self {
msg: e.to_string(),
kind: ErrorKind::Other,
},
}
}
}
pub type Result<T> = std::result::Result<T, Error>;
pub struct Parser<R: Read> {
reader: R,
layers: Vec<Layer>,
}
impl<R: Read> Parser<R> {
pub fn new(reader: R) -> Self {
Self {
reader,
layers: Vec::new(),
}
}
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> Result<Value> {
self.next_inner()
}
pub fn get_ref(&self) -> &R {
&self.reader
}
pub fn get_mut(&mut self) -> &mut R {
&mut self.reader
}
pub fn into_inner(self) -> R {
self.reader
}
fn next_inner(&mut self) -> Result<Value> {
let last_layer = self.layers.last().map(|l| (*l).clone());
match last_layer {
Some(Layer::List(_, 0)) => {
self.layers.pop();
return Ok(Value::ListEnd);
}
Some(_) => {}
None => {}
}
if let Some(layer) = self.layers.last_mut() {
match layer {
Layer::List(_, remainder) => {
*remainder -= 1;
}
Layer::Compound => {}
};
}
let last_layer = self.layers.last().map(|l| (*l).clone());
if let Some(layer) = last_layer {
match layer {
Layer::List(tag, _) => return self.read_payload(tag, None),
Layer::Compound => {}
};
}
let tag = match self.reader.read_u8() {
Ok(t) => t,
Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => return Err(Error::eof()),
e => e?,
};
let tag = u8_to_tag(tag)?;
if tag == Tag::End {
let last_layer = self.layers.last().map(|l| (*l).clone());
return match last_layer {
Some(Layer::Compound) => {
self.layers.pop();
Ok(Value::CompoundEnd)
}
Some(_) => Err(Error::bespoke("expected to be in compound")),
None => Err(Error::bespoke("expected to be in compound")),
};
}
let name = Some(self.read_size_prefixed_string()?);
self.read_payload(tag, name)
}
fn read_size_prefixed_string(&mut self) -> Result<String> {
let name_len = self.reader.read_u16::<BigEndian>()? as usize;
let mut buf = vec![0; name_len];
self.reader.read_exact(&mut buf[..])?;
Ok(cesu8::from_java_cesu8(&buf[..])
.map_err(|_| Error::nonunicode(Vec::from(&buf[..])))?
.into_owned())
}
fn read_payload(&mut self, tag: Tag, name: Name) -> Result<Value> {
match tag {
Tag::Byte => Ok(Value::Byte(name, self.reader.read_i8()?)),
Tag::Short => Ok(Value::Short(name, self.reader.read_i16::<BigEndian>()?)),
Tag::Int => Ok(Value::Int(name, self.reader.read_i32::<BigEndian>()?)),
Tag::Long => Ok(Value::Long(name, self.reader.read_i64::<BigEndian>()?)),
Tag::Float => Ok(Value::Float(name, self.reader.read_f32::<BigEndian>()?)),
Tag::Double => Ok(Value::Double(name, self.reader.read_f64::<BigEndian>()?)),
Tag::Compound => {
self.layers.push(Layer::Compound);
Ok(Value::Compound(name))
}
Tag::End => panic!("end tag should have returned early"),
Tag::List => {
let element_tag = self.reader.read_u8()?;
let element_tag = u8_to_tag(element_tag)?;
let size = self.reader.read_i32::<BigEndian>()?;
self.layers.push(Layer::List(element_tag, size));
Ok(Value::List(name, element_tag, size))
}
Tag::String => Ok(Value::String(name, self.read_size_prefixed_string()?)),
Tag::ByteArray => {
let size = self.reader.read_i32::<BigEndian>()?;
let mut buf = vec![0u8; size as usize];
self.reader.read_exact(&mut buf[..])?;
Ok(Value::ByteArray(name, vec_u8_into_i8(buf)))
}
Tag::IntArray => {
let size = self.reader.read_i32::<BigEndian>()?;
let mut buf = vec![0i32; size as usize];
for i in 0..size {
buf[i as usize] = self.reader.read_i32::<BigEndian>()?;
}
Ok(Value::IntArray(name, buf))
}
Tag::LongArray => {
let size = self.reader.read_i32::<BigEndian>()?;
let mut buf = vec![0i64; size as usize];
for i in 0..size {
buf[i as usize] = self.reader.read_i64::<BigEndian>()?;
}
Ok(Value::LongArray(name, buf))
}
}
}
}
pub fn skip_compound<R: Read>(parser: &mut Parser<R>) -> Result<()> {
let mut depth = 1;
while depth != 0 {
let value = parser.next()?;
match value {
Value::CompoundEnd => depth -= 1,
Value::Compound(_) => depth += 1,
_ => {}
}
}
Ok(())
}
pub fn find_compound<R: Read>(parser: &mut Parser<R>, name: Option<&str>) -> Result<()> {
loop {
match parser.next()? {
Value::Compound(n) if n.as_deref() == name => break,
_ => {}
}
}
Ok(())
}
pub fn find_list<R: Read>(parser: &mut Parser<R>, name: Option<&str>) -> Result<usize> {
loop {
match parser.next()? {
Value::List(n, _, size) if n.as_deref() == name => return Ok(size as usize),
_ => {}
}
}
}
fn vec_u8_into_i8(v: Vec<u8>) -> Vec<i8> {
let mut v = std::mem::ManuallyDrop::new(v);
let p = v.as_mut_ptr();
let len = v.len();
let cap = v.capacity();
unsafe { Vec::from_raw_parts(p as *mut i8, len, cap) }
}
fn u8_to_tag(tag: u8) -> Result<Tag> {
Tag::try_from(tag).map_err(|_| Error::invalid_tag(tag))
}
#[derive(Clone)]
enum Layer {
List(Tag, i32),
Compound,
}