use std::collections::HashMap;
use nom::branch::alt;
use nom::bytes::complete::{is_not, tag, take};
use nom::combinator::{map, map_opt, value, verify};
use nom::error::{ErrorKind as NomErrorKind, ParseError as NomParseError};
use nom::multi::{count, separated_list1};
use nom::number::complete::{le_i16, le_i32, le_u16};
use nom::Offset;
use nom::sequence::{terminated, tuple};
use super::{Entry, FieldType, ParseError, ParseErrorKind as ErrorKind};
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
enum InternalErrorKind {
Ours(ErrorKind),
Nom(NomErrorKind),
}
impl From<NomErrorKind> for InternalErrorKind {
fn from(k: NomErrorKind) -> Self {
match k {
NomErrorKind::Eof => Self::Ours(ErrorKind::UnexpectedEof),
NomErrorKind::Count => Self::Ours(ErrorKind::UnexpectedEof),
NomErrorKind::Tag => Self::Ours(ErrorKind::UnexpectedEof),
NomErrorKind::IsNot => Self::Ours(ErrorKind::UnexpectedEof),
_ => InternalErrorKind::Nom(k),
}
}
}
impl From<ErrorKind> for InternalErrorKind {
fn from(k: ErrorKind) -> Self {
Self::Ours(k)
}
}
struct InternalError<I> {
position: I,
kind: InternalErrorKind,
}
impl<I> NomParseError<I> for InternalError<I> {
fn from_error_kind(position: I, kind: NomErrorKind) -> Self {
Self { position, kind: kind.into() }
}
fn append(input: I, kind: NomErrorKind, other: Self) -> Self {
match other.kind {
InternalErrorKind::Ours(_) => other,
InternalErrorKind::Nom(_) => Self::from_error_kind(input, kind),
}
}
}
fn finish<I: Offset>(this: nom::Err<InternalError<I>>, source: I) -> ParseError {
let this = match this {
nom::Err::Error(e) | nom::Err::Failure(e) => e,
_ => unreachable!(),
};
let position = source.offset(&this.position);
let kind = match this.kind {
InternalErrorKind::Ours(x) => x,
InternalErrorKind::Nom(x) => panic!("uncaught nom error: {x:?} at {position}"),
};
ParseError { position, kind }
}
type Result<'a, T, E = nom::Err<InternalError<&'a [u8]>>> = std::result::Result<T, E>;
type IResult<I, O> = nom::IResult<I, O, InternalError<I>>;
const MAGIC_STANDARD: u16 = 0o432;
const MAGIC_EXTENDED: u16 = 0o1036;
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
enum Format {
Standard,
Extended,
}
struct Header {
format: Format,
len_names: usize,
num_bools: usize,
num_ints: usize,
num_strs: usize,
len_strs: usize,
}
struct UserHeader {
num_bools: usize,
num_ints: usize,
num_strs: usize,
len_strs: usize,
}
fn map_k<I: Clone, O, F: nom::Parser<I, O, InternalError<I>>>(mut f: F, kind: ErrorKind) ->
impl FnMut(I) -> IResult<I, O>
{
use nom::Err as E;
move |i: I| match f.parse(i.clone()) {
Ok(o) => Ok(o),
Err(E::Incomplete(i)) => Err(E::Incomplete(i)),
Err(E::Error(e)) => Err(E::Error(InternalError { kind: kind.into(), ..e })),
Err(E::Failure(e)) => Err(E::Failure(InternalError { kind: kind.into(), ..e })),
}
}
fn size_from_i16(input: &[u8]) -> IResult<&[u8], usize> {
map(le_i16, |i| if i < 0 { 0 } else { i as usize })(input)
}
fn from_ascii(bytes: &[u8]) -> Option<String> {
if bytes.is_empty() || !bytes.is_ascii() {
None
} else {
unsafe { Some(String::from_utf8_unchecked(bytes.to_vec())) }
}
}
fn terminated_ascii<'a, T>(term: T) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], String>
where
T: nom::FindToken<<&'a [u8] as nom::InputTakeAtPosition>::Item> {
map_k(
map_opt(
map_k(
verify(is_not(term), |x: &[u8]| !x.is_empty()),
ErrorKind::MissingName,
),
from_ascii
),
ErrorKind::NonAsciiName
)
}
fn magic(input: &[u8]) -> IResult<&[u8], Format> {
let (r, m) = le_u16(input)?;
match m {
MAGIC_STANDARD => Ok((r, Format::Standard)),
MAGIC_EXTENDED => Ok((r, Format::Extended)),
_ => Err(nom::Err::Failure(InternalError { position: input, kind: ErrorKind::InvalidHeader.into() })),
}
}
fn header(input: &[u8]) -> IResult<&[u8], Header> {
map(
tuple((
magic,
size_from_i16,
size_from_i16,
size_from_i16,
size_from_i16,
size_from_i16,
)),
|(format, len_names, num_bools, num_ints, num_strs, len_strs)| Header {
format,
len_names,
num_bools,
num_ints,
num_strs,
len_strs,
}
)(input)
}
fn user_header(input: &[u8]) -> IResult<&[u8], UserHeader> {
map(
tuple((
size_from_i16,
size_from_i16,
size_from_i16,
size_from_i16,
size_from_i16,
)),
|(num_bools, num_ints, num_strs, _, len_strs)| UserHeader {
num_bools,
num_ints,
num_strs,
len_strs,
}
)(input)
}
fn names(input: &[u8]) -> IResult<&[u8], Vec<String>> {
terminated(
separated_list1(tag("|"), terminated_ascii("|\0")),
tag("\0")
)(input)
}
fn booleans(input: &[u8], count_: usize, align: usize) -> IResult<&[u8], Vec<bool>> {
terminated(
count(
map_k(
alt((
value(false, tag("\x00")),
value(true, tag("\x01")),
)),
ErrorKind::InvalidField(FieldType::Boolean)
),
count_
),
take(align)
)(input)
}
fn integers(input: &[u8], count_: usize, format: Format) -> IResult<&[u8], Vec<Option<i32>>> {
let int = match format {
Format::Standard => |input| map(le_i16, i32::from)(input),
Format::Extended => |input| le_i32(input),
};
count(
map_k(
map_opt(
int,
|x| match x {
-2 => Some(None),
-1 => Some(None),
x if x >= 0 => Some(Some(x)),
_ => None,
}
),
ErrorKind::InvalidField(FieldType::Integer)
),
count_
)(input)
}
fn string_offsets(input: &[u8], count_: usize) -> IResult<&[u8], Vec<Option<usize>>> {
count(
map_k(
map_opt(
le_i16,
|x| match x {
-2 => Some(None),
-1 => Some(None),
x if x < 0 => None,
x if x as usize >= input.len() => Some(None),
x => Some(Some(x as usize)),
}
),
ErrorKind::InvalidField(FieldType::String)
),
count_
)(input)
}
fn byte_string(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
map(terminated(is_not("\0"), tag("\0")), <[u8]>::to_vec)(input)
}
fn strings(input: &[u8], offs: Vec<Option<usize>>) -> Result<Vec<Option<Vec<u8>>>> {
offs
.into_iter()
.map(|m_off| {
m_off
.map(|off| byte_string(&input[off..]).map(|x| x.1))
.transpose()
})
.collect()
}
fn name_offsets(input: &[u8], count_: usize) -> IResult<&[u8], Vec<usize>> {
count(
map_k(
map_opt(
le_i16,
|x| if x >= 0 && (x as usize) < input.len() {
Some(x as usize)
} else {
None
}
),
ErrorKind::MissingName,
),
count_
)(input)
}
fn name_string(input: &[u8]) -> IResult<&[u8], String> {
terminated(
terminated_ascii("\0"),
tag("\0")
)(input)
}
fn cap_names(input: &[u8], offs: Vec<usize>) -> Result<Vec<String>> {
offs
.into_iter()
.map(|off| name_string(&input[off..]).map(|x| x.1))
.collect()
}
fn file(input: &[u8]) -> Result<Entry> {
let (input, header @ Header { format, .. }) = header(input)?;
let (input, names) = names(input)?;
let align = (header.len_names + header.num_bools) % 2;
let (input, bools) = booleans(input, header.num_bools, align)?;
let (input, ints) = integers(input, header.num_ints, format)?;
let (input, offs) = string_offsets(input, header.num_strs)?;
let (strings_, rest) = input.split_at(header.len_strs);
let strs = strings(strings_, offs)?;
let mut ext_bools = HashMap::new();
let mut ext_ints = HashMap::new();
let mut ext_strs = HashMap::new();
if !rest.is_empty() {
let align = header.len_strs % 2;
let input = &rest[align..];
let (input, header) = user_header(input)?;
let (input, bools) = booleans(input, header.num_bools, header.num_bools % 2)?;
let (input, ints) = integers(input, header.num_ints, format)?;
let (input, offs) = string_offsets(input, header.num_strs)?;
let (input, names) = name_offsets(input, header.num_bools + header.num_ints + header.num_strs)?;
let input = &input[..header.len_strs];
let strs = strings(input, offs)?;
let base = strs.iter().fold(0, |acc, x| acc + x.as_ref().map(|x| x.len() + 1).unwrap_or_default());
let mut boolnames = cap_names(&input[base..], names)?;
let mut intnames = boolnames.split_off(header.num_bools);
let strnames = intnames.split_off(header.num_ints);
ext_bools.extend(boolnames.into_iter().zip(bools));
ext_ints.extend(intnames.into_iter().zip(ints));
ext_strs.extend(strnames.into_iter().zip(strs));
}
Ok(Entry {
names,
bools,
ints,
strs,
ext_bools,
ext_ints,
ext_strs,
})
}
pub fn parse(input: &[u8]) -> Result<Entry, ParseError> {
file(input).map_err(|e| finish(e, input))
}