infoterm 0.1.1

ncurses-compatible terminfo parsing library
Documentation
use std::collections::HashMap;

use nom::branch::alt;
use nom::bytes::complete::{is_not, tag, take};
use nom::combinator::{map, map_opt, value, verify};
use nom::error::{ErrorKind as NomErrorKind, ParseError as NomParseError};
use nom::multi::{count, separated_list1};
use nom::number::complete::{le_i16, le_i32, le_u16};
use nom::Offset;
use nom::sequence::{terminated, tuple};

use super::{Entry, FieldType, ParseError, ParseErrorKind as ErrorKind};

#[derive(Copy, Clone, Debug, Eq, PartialEq)]
enum InternalErrorKind {
    Ours(ErrorKind),
    Nom(NomErrorKind),
}

impl From<NomErrorKind> for InternalErrorKind {
    fn from(k: NomErrorKind) -> Self {
        match k {
            NomErrorKind::Eof => Self::Ours(ErrorKind::UnexpectedEof),
            // This branch is only hit if we didn't map the inner parser of count() already.
            // Treat it as EOF.
            NomErrorKind::Count => Self::Ours(ErrorKind::UnexpectedEof),
            // This branch is only hit if we didn't map the result of tag() already.
            // Which probably means it's a terminator and we hit EOF.
            NomErrorKind::Tag => Self::Ours(ErrorKind::UnexpectedEof),
            // Missing terminator character. Treat as EOF.
            NomErrorKind::IsNot => Self::Ours(ErrorKind::UnexpectedEof),
            _ => InternalErrorKind::Nom(k),
        }
    }
}

impl From<ErrorKind> for InternalErrorKind {
    fn from(k: ErrorKind) -> Self {
        Self::Ours(k)
    }
}

struct InternalError<I> {
    position: I,
    kind: InternalErrorKind,
}

impl<I> NomParseError<I> for InternalError<I> {
    fn from_error_kind(position: I, kind: NomErrorKind) -> Self {
        Self { position, kind: kind.into() }
    }

    fn append(input: I, kind: NomErrorKind, other: Self) -> Self {
        match other.kind {
            // If we already found an error we care about, return it as is
            InternalErrorKind::Ours(_) => other,
            // If going from one nom error to another (e.g. Count), see if we can
            // map it to one of your error types. This prevents e.g. Count from
            // returning UnexpectedEof because an inner parser failed.
            InternalErrorKind::Nom(_) => Self::from_error_kind(input, kind),
        }
    }
}

fn finish<I: Offset>(this: nom::Err<InternalError<I>>, source: I) -> ParseError {
    let this = match this {
        nom::Err::Error(e) | nom::Err::Failure(e) => e,
        _ => unreachable!(),
    };

    let position = source.offset(&this.position);

    let kind = match this.kind {
        InternalErrorKind::Ours(x) => x,
        InternalErrorKind::Nom(x) => panic!("uncaught nom error: {x:?} at {position}"),
    };

    ParseError { position, kind }
}

type Result<'a, T, E = nom::Err<InternalError<&'a [u8]>>> = std::result::Result<T, E>;
type IResult<I, O> = nom::IResult<I, O, InternalError<I>>;

const MAGIC_STANDARD: u16 = 0o432;
const MAGIC_EXTENDED: u16 = 0o1036;

#[derive(Copy, Clone, Debug, Eq, PartialEq)]
enum Format {
    Standard,
    Extended,
}

struct Header {
    format: Format,
    len_names: usize,
    num_bools: usize,
    num_ints: usize,
    num_strs: usize,
    len_strs: usize,
}

struct UserHeader {
    num_bools: usize,
    num_ints: usize,
    num_strs: usize,
    /* skipping fourth field */
    len_strs: usize,
}

// Combinator to apply the provided ErrorKind on parser failure.
// Loosely based on nom's `context` combinator.
//
// InternalError's ParseError impl by default converts some nom errors to custom ones, like EOF.
//
// For some things like Eof, Tag and Count that's a useful default, but sometimes there's a
// context-specific error that's more appropriate.
//
// map_k unconditionally replaces the stored error kind with the one provided.
fn map_k<I: Clone, O, F: nom::Parser<I, O, InternalError<I>>>(mut f: F, kind: ErrorKind) ->
    impl FnMut(I) -> IResult<I, O>
{
    use nom::Err as E;

    move |i: I| match f.parse(i.clone()) {
        Ok(o) => Ok(o),
        Err(E::Incomplete(i)) => Err(E::Incomplete(i)),
        Err(E::Error(e)) => Err(E::Error(InternalError { kind: kind.into(), ..e })),
        Err(E::Failure(e)) => Err(E::Failure(InternalError { kind: kind.into(), ..e })),
    }
}

fn size_from_i16(input: &[u8]) -> IResult<&[u8], usize> {
    map(le_i16, |i| if i < 0 { 0 } else { i as usize })(input)
}

fn from_ascii(bytes: &[u8]) -> Option<String> {
    if bytes.is_empty() || !bytes.is_ascii() {
        None
    } else {
        // SAFETY: slice is ASCII, cannot contain invalid UTF-8
        unsafe { Some(String::from_utf8_unchecked(bytes.to_vec())) }
    }
}

fn terminated_ascii<'a, T>(term: T) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], String>
where
    T: nom::FindToken<<&'a [u8] as nom::InputTakeAtPosition>::Item> {
    map_k(
        map_opt(
            map_k(
                verify(is_not(term), |x: &[u8]| !x.is_empty()),
                ErrorKind::MissingName,
            ),
            from_ascii
        ),
        ErrorKind::NonAsciiName
    )
}

fn magic(input: &[u8]) -> IResult<&[u8], Format> {
    let (r, m) = le_u16(input)?;

    match m {
        MAGIC_STANDARD => Ok((r, Format::Standard)),
        MAGIC_EXTENDED => Ok((r, Format::Extended)),
        _              => Err(nom::Err::Failure(InternalError { position: input, kind: ErrorKind::InvalidHeader.into() })),
    }
}

fn header(input: &[u8]) -> IResult<&[u8], Header> {
    map(
        tuple((
            magic,
            size_from_i16,
            size_from_i16,
            size_from_i16,
            size_from_i16,
            size_from_i16,
        )),
        |(format, len_names, num_bools, num_ints, num_strs, len_strs)| Header {
            format,
            len_names,
            num_bools,
            num_ints,
            num_strs,
            len_strs,
        }
    )(input)
}

fn user_header(input: &[u8]) -> IResult<&[u8], UserHeader> {
    map(
        tuple((
            size_from_i16,
            size_from_i16,
            size_from_i16,
            size_from_i16,
            size_from_i16,
        )),
        |(num_bools, num_ints, num_strs, _, len_strs)| UserHeader {
            num_bools,
            num_ints,
            num_strs,
            len_strs,
        }
    )(input)
}

fn names(input: &[u8]) -> IResult<&[u8], Vec<String>> {
    terminated(
        separated_list1(tag("|"), terminated_ascii("|\0")),
        tag("\0")
    )(input)
}

fn booleans(input: &[u8], count_: usize, align: usize) -> IResult<&[u8], Vec<bool>> {
    terminated(
        count(
            map_k(
                alt((
                    value(false, tag("\x00")),
                    value(true, tag("\x01")),
                )),
                ErrorKind::InvalidField(FieldType::Boolean)
            ),
            count_
        ),
        take(align)
    )(input)
}

fn integers(input: &[u8], count_: usize, format: Format) -> IResult<&[u8], Vec<Option<i32>>> {
    let int = match format {
        Format::Standard => |input| map(le_i16, i32::from)(input),
        Format::Extended => |input| le_i32(input),
    };

    count(
        map_k(
            map_opt(
                int,
                |x| match x {
                    -2 => Some(None),
                    -1 => Some(None),
                    x if x >= 0 => Some(Some(x)),
                    _ => None,
                }
            ),
            ErrorKind::InvalidField(FieldType::Integer)
        ),
        count_
    )(input)
}

fn string_offsets(input: &[u8], count_: usize) -> IResult<&[u8], Vec<Option<usize>>> {
    count(
        map_k(
            map_opt(
                le_i16,
                |x| match x {
                    -2 => Some(None),
                    -1 => Some(None),
                    // Any other negative value is invalid
                    x if x < 0 => None,
                    // Treat out of range offset as absent
                    x if x as usize >= input.len() => Some(None),
                    x => Some(Some(x as usize)),
                }
            ),
            ErrorKind::InvalidField(FieldType::String)
        ),
        count_
    )(input)
}

fn byte_string(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
    map(terminated(is_not("\0"), tag("\0")), <[u8]>::to_vec)(input)
}

fn strings(input: &[u8], offs: Vec<Option<usize>>) -> Result<Vec<Option<Vec<u8>>>> {
    offs
    .into_iter()
    .map(|m_off| {
        m_off
        .map(|off| byte_string(&input[off..]).map(|x| x.1))
        .transpose()
    })
    .collect()
}

fn name_offsets(input: &[u8], count_: usize) -> IResult<&[u8], Vec<usize>> {
    count(
        map_k(
            map_opt(
                le_i16,
                |x| if x >= 0 && (x as usize) < input.len() {
                    Some(x as usize)
                } else {
                    None
                }
            ),
            ErrorKind::MissingName,
        ),
        count_
    )(input)
}

fn name_string(input: &[u8]) -> IResult<&[u8], String> {
    terminated(
        terminated_ascii("\0"),
        tag("\0")
    )(input)
}

fn cap_names(input: &[u8], offs: Vec<usize>) -> Result<Vec<String>> {
    offs
    .into_iter()
    .map(|off| name_string(&input[off..]).map(|x| x.1))
    .collect()
}

fn file(input: &[u8]) -> Result<Entry> {
    let (input, header @ Header { format, .. }) = header(input)?;

    let (input, names) = names(input)?;

    let align = (header.len_names + header.num_bools) % 2;

    let (input, bools) = booleans(input, header.num_bools, align)?;
    let (input, ints) = integers(input, header.num_ints, format)?;
    let (input, offs) = string_offsets(input, header.num_strs)?;

    let (strings_, rest) = input.split_at(header.len_strs);

    let strs = strings(strings_, offs)?;

    let mut ext_bools = HashMap::new();
    let mut ext_ints = HashMap::new();
    let mut ext_strs = HashMap::new();

    if !rest.is_empty() {
        let align = header.len_strs % 2;

        let input = &rest[align..];

        let (input, header) = user_header(input)?;

        let (input, bools) = booleans(input, header.num_bools, header.num_bools % 2)?;
        let (input, ints) = integers(input, header.num_ints, format)?;
        let (input, offs) = string_offsets(input, header.num_strs)?;

        let (input, names) = name_offsets(input, header.num_bools + header.num_ints + header.num_strs)?;

        let input = &input[..header.len_strs];

        let strs = strings(input, offs)?;

        // This is what ncurses does. It's not super correct (assumes strings are densely packed)
        // but the format doesn't actually store the offset of the name table...
        // Since this is an ncurses-only extension anyway, let's hope this is fine enough.
        let base = strs.iter().fold(0, |acc, x| acc + x.as_ref().map(|x| x.len() + 1).unwrap_or_default());

        let mut boolnames = cap_names(&input[base..], names)?;
        let mut intnames = boolnames.split_off(header.num_bools);
        let strnames = intnames.split_off(header.num_ints);

        ext_bools.extend(boolnames.into_iter().zip(bools));
        ext_ints.extend(intnames.into_iter().zip(ints));
        ext_strs.extend(strnames.into_iter().zip(strs));
    }

    Ok(Entry {
        names,
        bools,
        ints,
        strs,
        ext_bools,
        ext_ints,
        ext_strs,
    })
}

pub fn parse(input: &[u8]) -> Result<Entry, ParseError> {
    file(input).map_err(|e| finish(e, input))
}