token-parser 0.6.0

Utilities for parsing texts into data structures
Documentation
#![deny(missing_docs)]

/*!
Some utilities for parsing some format based on nested lists into arbitrary data structures.
It's also meant to be used as a backend for parsers.
**/

use std::path::PathBuf;

use thiserror::Error;

/// A trait required for all contexts being used for token parsing.
///
/// By default, only the empty tuple implements it.
/// It currently does not contain anything by default. It's just there to achieve compatibility with features and to allow more changes without breaking anything.
pub trait Context {
    #[cfg(feature = "radix-parsing")]
    #[inline]
    /// Specifies the radix if the feature radix parsing is enabled.
    fn radix(&self) -> u32 {
        10
    }
}

impl Context for () {}

/// A source position with line and column (both 0-based).
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub struct Span {
    /// The line number (0-based).
    pub line: usize,
    /// The column number (0-based).
    pub column: usize,
}

impl std::fmt::Display for Span {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}:{}", self.line + 1, self.column + 1)
    }
}

/// The kind of error that occurred during token parsing.
#[derive(Debug, Error)]
pub enum ErrorKind {
    /// The sublist contains less elements than expected by a specified amount.
    #[error("Not enough elements: {0} more expected")]
    NotEnoughElements(usize),

    /// The sublist contains more elements than expected by a specified amount.
    #[error("Too many elements: {0} less expected")]
    TooManyElements(usize),

    /// No list is allowed in this context.
    #[error("List not allowed")]
    ListNotAllowed,

    /// No symbol is allowed in this context.
    #[error("Symbol not allowed")]
    SymbolNotAllowed,

    /// Error with string parsing.
    #[error("String parsing error")]
    StringParsing,

    /// Some specific element is invalid.
    #[error("Invalid element")]
    InvalidElement,
}

/// The error type for token parsing, containing a kind and an optional source position.
#[derive(Debug)]
pub struct Error {
    /// The kind of error.
    pub kind: ErrorKind,
    /// The source position where the error occurred, if known.
    pub span: Option<Span>,
}

impl std::fmt::Display for Error {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        if let Some(span) = self.span {
            write!(f, "{span}: {}", self.kind)
        } else {
            write!(f, "{}", self.kind)
        }
    }
}

impl std::error::Error for Error {}

impl From<ErrorKind> for Error {
    fn from(kind: ErrorKind) -> Self {
        Self { kind, span: None }
    }
}

impl Error {
    fn at(self, span: Span) -> Self {
        Self {
            kind: self.kind,
            span: self.span.or(Some(span)),
        }
    }
}

/// The result type for token parsing.
pub type Result<T> = std::result::Result<T, Error>;

/// Some unit, which represents an intermediate state.
#[derive(Clone)]
pub enum Unit {
    /// The current unit is a single symbol.
    Symbol(Box<str>, Span),
    /// The current unit is a parser, which can yield multiple units.
    Parser(Parser),
}

impl Unit {
    /// Returns the source span of this unit.
    pub fn span(&self) -> Span {
        match self {
            Self::Symbol(_, span) => *span,
            Self::Parser(parser) => parser.span,
        }
    }

    /// Returns the symbol, if applicable, as a result type.
    pub fn symbol(self) -> Result<Box<str>> {
        if let Self::Symbol(name, _) = self {
            Ok(name)
        } else {
            Err(ErrorKind::ListNotAllowed.into())
        }
    }

    /// Returns the parser, if applicable, as a result type.
    pub fn parser(self) -> Result<Parser> {
        if let Self::Parser(parser) = self {
            Ok(parser)
        } else {
            Err(ErrorKind::SymbolNotAllowed.into())
        }
    }

    /// Replaces all occurrences of a symbol with another symbol, recursively.
    pub fn substitute(&mut self, variable: &str, value: &str) {
        match self {
            Self::Symbol(name, _) => {
                if name.as_ref() == variable {
                    *name = value.into();
                }
            }
            Self::Parser(parser) => parser.substitute(variable, value),
        }
    }
}

impl<C: Context> Parsable<C> for Unit {
    fn parse_symbol(name: Box<str>, span: Span, _context: &C) -> Result<Self> {
        Ok(Self::Symbol(name, span))
    }

    fn parse_list(parser: &mut Parser, _context: &C) -> Result<Self> {
        let form = std::mem::take(&mut parser.form);
        let span = parser.span;
        Ok(Self::Parser(Parser {
            form,
            count: 0,
            span,
        }))
    }
}

/// This trait needs to be implemented for every struct which can be parsed using the token parser.
#[allow(clippy::boxed_local)]
pub trait Parsable<C: Context>: Sized {
    /// When a symbol is found by the parser, this will be called.
    fn parse_symbol(_name: Box<str>, _span: Span, _context: &C) -> Result<Self> {
        Err(ErrorKind::SymbolNotAllowed.into())
    }

    /// When a subparser is found by the parser, this will be called.
    fn parse_list(_parser: &mut Parser, _context: &C) -> Result<Self> {
        Err(ErrorKind::ListNotAllowed.into())
    }
}

fn parse<C: Context, P: Parsable<C>>(unit: Unit, context: &C) -> Result<P> {
    match unit {
        Unit::Symbol(name, span) => {
            Parsable::parse_symbol(name, span, context).map_err(|e| e.at(span))
        }
        Unit::Parser(mut parser) => {
            let span = parser.span;
            Parsable::parse_list(&mut parser, context).map_err(|e| e.at(span))
        }
    }
}

impl<C: Context, T: Parsable<C>> Parsable<C> for Box<T> {
    fn parse_symbol(name: Box<str>, span: Span, context: &C) -> Result<Self> {
        Ok(Self::new(Parsable::parse_symbol(name, span, context)?))
    }

    fn parse_list(parser: &mut Parser, context: &C) -> Result<Self> {
        Ok(Self::new(parser.parse_list(context)?))
    }
}

impl<C: Context, T: Parsable<C>> Parsable<C> for Vec<T> {
    fn parse_list(parser: &mut Parser, context: &C) -> Result<Self> {
        let Parser { form, count, .. } = parser;
        form.drain(..)
            .rev()
            .map(|unit| {
                *count += 1;
                parse(unit, context)
            })
            .collect()
    }
}

impl<C: Context> Parsable<C> for String {
    fn parse_symbol(name: Box<str>, _span: Span, _context: &C) -> Result<Self> {
        Ok(name.into())
    }
}

impl<C: Context> Parsable<C> for Box<str> {
    fn parse_symbol(name: Box<str>, _span: Span, _context: &C) -> Result<Self> {
        Ok(name)
    }
}

impl<C: Context> Parsable<C> for PathBuf {
    fn parse_symbol(name: Box<str>, _span: Span, _context: &C) -> Result<Self> {
        Ok(name.as_ref().into())
    }
}

/// Derives `Parsable` from symbol for types which implement `FromStr`.
#[macro_export]
macro_rules! derive_symbol_parsable {
    ($t:ty) => {
        impl<C: $crate::Context> $crate::Parsable<C> for $t {
            fn parse_symbol(name: Box<str>, _span: $crate::Span, _context: &C) -> $crate::Result<Self> {
                name.parse().map_err(|_| $crate::ErrorKind::StringParsing.into())
            }
        }
    };
    ($t:ty, $($rest:ty),+) => {
        derive_symbol_parsable!($t);
        derive_symbol_parsable!($($rest),+);
    };
}

#[cfg(not(feature = "radix-parsing"))]
mod numbers;
derive_symbol_parsable!(bool);

/// The token parser to parse the units into wanted types.
#[derive(Clone)]
pub struct Parser {
    form: Vec<Unit>,
    count: usize,
    span: Span,
}

impl Parser {
    /// Creates a new parser from a list of objects.
    pub fn new<I: IntoIterator>(form: I) -> Self
    where
        I::Item: Into<Unit>,
    {
        let mut form: Vec<_> = form.into_iter().map(I::Item::into).collect();
        form.reverse();
        Self {
            form,
            count: 0,
            span: Span::default(),
        }
    }

    /// Sets the span for this parser (builder pattern).
    pub fn with_span(mut self, span: Span) -> Self {
        self.span = span;
        self
    }

    /// Returns the source span of this parser.
    pub fn span(&self) -> Span {
        self.span
    }

    /// Returns whether the parser has no remaining elements.
    pub fn is_empty(&self) -> bool {
        self.form.is_empty()
    }

    /// Replaces all occurrences of a symbol with another symbol, recursively.
    pub fn substitute(&mut self, variable: &str, value: &str) {
        for unit in &mut self.form {
            unit.substitute(variable, value);
        }
    }

    /// Returns the next unit without parsing it, or `None` if empty.
    pub fn next_unit(&mut self) -> Option<Unit> {
        self.count += 1;
        self.form.pop()
    }

    /// Tries to parse the next unit as the required type.
    pub fn parse_next<C: Context, T: Parsable<C>>(&mut self, context: &C) -> Result<T> {
        self.count += 1;
        if let Some(token) = self.form.pop() {
            parse(token, context)
        } else {
            Result::Err(Error {
                kind: ErrorKind::NotEnoughElements(self.count),
                span: Some(self.span),
            })
        }
    }

    /// Tries to parse the rest of the current list into the required type.
    /// If not every available token is used, this will be an error.
    pub fn parse_rest<C: Context, T: Parsable<C>>(&mut self, context: &C) -> Result<T> {
        let result = self.parse_list(context);
        let count = self.form.len();
        if count > 0 {
            self.form.clear();
            Err(Error {
                kind: ErrorKind::TooManyElements(count),
                span: Some(self.span),
            })
        } else {
            result
        }
    }

    /// Tries to parse as many tokens of the current list as needed into the required type.
    pub fn parse_list<C: Context, T: Parsable<C>>(&mut self, context: &C) -> Result<T> {
        Parsable::parse_list(self, context)
    }
}

impl Iterator for Parser {
    type Item = Result<Self>;

    fn next(&mut self) -> Option<Result<Self>> {
        self.count += 1;
        Some(self.form.pop()?.parser())
    }
}

#[cfg(feature = "radix-parsing")]
/// Contains utilities for radix parsing.
pub mod radix;