Skip to main content

token_parser/
lib.rs

1#![deny(missing_docs)]
2
3/*!
4Some utilities for parsing some format based on nested lists into arbitrary data structures.
5It's also meant to be used as a backend for parsers.
6**/
7
8use std::{path::PathBuf, rc::Rc, sync::Arc};
9
10use thiserror::Error;
11
12/// A trait required for all contexts being used for token parsing.
13///
14/// By default, only the empty tuple implements it.
15/// It currently does not contain anything by default. It's just there to achieve compatibility with features and to allow more changes without breaking anything.
16pub trait Context {
17    #[cfg(feature = "radix-parsing")]
18    #[inline]
19    /// Specifies the radix if the feature radix parsing is enabled.
20    fn radix(&self) -> u32 {
21        10
22    }
23}
24
25impl Context for () {}
26
27/// A source position with line and column (both 0-based).
28#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
29pub struct Span {
30    /// The line number (0-based).
31    pub line: usize,
32    /// The column number (0-based).
33    pub column: usize,
34}
35
36impl std::fmt::Display for Span {
37    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
38        write!(f, "{}:{}", self.line + 1, self.column + 1)
39    }
40}
41
42/// The kind of error that occurred during token parsing.
43#[derive(Debug, Error)]
44pub enum ErrorKind {
45    /// The sublist contains less elements than expected by a specified amount.
46    #[error("Not enough elements: {0} more expected")]
47    NotEnoughElements(usize),
48
49    /// The sublist contains more elements than expected by a specified amount.
50    #[error("Too many elements: {0} unexpected")]
51    TooManyElements(usize),
52
53    /// No list is allowed in this context.
54    #[error("List not allowed")]
55    ListNotAllowed,
56
57    /// No symbol is allowed in this context.
58    #[error("Symbol not allowed")]
59    SymbolNotAllowed,
60
61    /// String parsing failed for the named type.
62    #[error("Expected {0}")]
63    StringParsing(&'static str),
64
65    /// Some specific element is invalid.
66    #[error("Invalid element")]
67    InvalidElement,
68}
69
70/// The error type for token parsing, containing a kind and an optional source position.
71#[derive(Debug)]
72pub struct Error {
73    /// The kind of error.
74    pub kind: ErrorKind,
75    /// The source position where the error occurred, if known.
76    pub span: Option<Span>,
77    /// Optional context describing what was being parsed.
78    pub context: Option<Box<str>>,
79}
80
81impl std::fmt::Display for Error {
82    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83        if let Some(span) = self.span {
84            write!(f, "{span}: ")?;
85        }
86        if let Some(ctx) = &self.context {
87            write!(f, "{ctx}: ")?;
88        }
89        write!(f, "{}", self.kind)
90    }
91}
92
93impl std::error::Error for Error {}
94
95impl From<ErrorKind> for Error {
96    fn from(kind: ErrorKind) -> Self {
97        Self {
98            kind,
99            span: None,
100            context: None,
101        }
102    }
103}
104
105impl Error {
106    fn at(mut self, span: Span) -> Self {
107        if self.span.is_none() {
108            self.span = Some(span);
109        }
110        self
111    }
112
113    /// Adds descriptive context to the error (e.g., which field was being parsed).
114    pub fn context(mut self, msg: impl Into<Box<str>>) -> Self {
115        self.context = Some(msg.into());
116        self
117    }
118}
119
120/// The result type for token parsing.
121pub type Result<T> = std::result::Result<T, Error>;
122
123/// Some unit, which represents an intermediate state.
124#[derive(Clone)]
125pub enum Unit {
126    /// The current unit is a single symbol.
127    Symbol(Box<str>, Span),
128    /// The current unit is a parser, which can yield multiple units.
129    Parser(Parser),
130}
131
132impl Unit {
133    /// Returns the source span of this unit.
134    pub fn span(&self) -> Span {
135        match self {
136            Self::Symbol(_, span) => *span,
137            Self::Parser(parser) => parser.span,
138        }
139    }
140
141    /// Returns the symbol, if applicable, as a result type.
142    pub fn symbol(self) -> Result<Box<str>> {
143        if let Self::Symbol(name, _) = self {
144            Ok(name)
145        } else {
146            Err(ErrorKind::ListNotAllowed.into())
147        }
148    }
149
150    /// Returns the parser, if applicable, as a result type.
151    pub fn parser(self) -> Result<Parser> {
152        if let Self::Parser(parser) = self {
153            Ok(parser)
154        } else {
155            Err(ErrorKind::SymbolNotAllowed.into())
156        }
157    }
158
159    /// Replaces all occurrences of a symbol with another symbol, recursively.
160    pub fn substitute(&mut self, variable: &str, value: &str) {
161        match self {
162            Self::Symbol(name, _) => {
163                if name.as_ref() == variable {
164                    *name = value.into();
165                }
166            }
167            Self::Parser(parser) => parser.substitute(variable, value),
168        }
169    }
170}
171
172impl<C: Context> Parsable<C> for Unit {
173    fn parse_symbol(name: Box<str>, span: Span, _context: &C) -> Result<Self> {
174        Ok(Self::Symbol(name, span))
175    }
176
177    fn parse_list(parser: &mut Parser, _context: &C) -> Result<Self> {
178        let form = std::mem::take(&mut parser.form);
179        let span = parser.span;
180        Ok(Self::Parser(Parser {
181            form,
182            count: 0,
183            span,
184        }))
185    }
186}
187
188/// This trait needs to be implemented for every struct which can be parsed using the token parser.
189#[allow(clippy::boxed_local)]
190pub trait Parsable<C: Context>: Sized {
191    /// When a symbol is found by the parser, this will be called.
192    fn parse_symbol(_name: Box<str>, _span: Span, _context: &C) -> Result<Self> {
193        Err(ErrorKind::SymbolNotAllowed.into())
194    }
195
196    /// When a subparser is found by the parser, this will be called.
197    fn parse_list(_parser: &mut Parser, _context: &C) -> Result<Self> {
198        Err(ErrorKind::ListNotAllowed.into())
199    }
200}
201
202fn parse<C: Context, P: Parsable<C>>(unit: Unit, context: &C) -> Result<P> {
203    match unit {
204        Unit::Symbol(name, span) => {
205            Parsable::parse_symbol(name, span, context).map_err(|e| e.at(span))
206        }
207        Unit::Parser(mut parser) => {
208            let span = parser.span;
209            Parsable::parse_list(&mut parser, context).map_err(|e| e.at(span))
210        }
211    }
212}
213
214impl<C: Context, T: Parsable<C>> Parsable<C> for Box<T> {
215    fn parse_symbol(name: Box<str>, span: Span, context: &C) -> Result<Self> {
216        Ok(Self::new(Parsable::parse_symbol(name, span, context)?))
217    }
218
219    fn parse_list(parser: &mut Parser, context: &C) -> Result<Self> {
220        Ok(Self::new(parser.parse_list(context)?))
221    }
222}
223
224impl<C: Context, T: Parsable<C>> Parsable<C> for Rc<T> {
225    fn parse_symbol(name: Box<str>, span: Span, context: &C) -> Result<Self> {
226        Ok(Self::new(Parsable::parse_symbol(name, span, context)?))
227    }
228
229    fn parse_list(parser: &mut Parser, context: &C) -> Result<Self> {
230        Ok(Self::new(parser.parse_list(context)?))
231    }
232}
233
234impl<C: Context, T: Parsable<C>> Parsable<C> for Arc<T> {
235    fn parse_symbol(name: Box<str>, span: Span, context: &C) -> Result<Self> {
236        Ok(Self::new(Parsable::parse_symbol(name, span, context)?))
237    }
238
239    fn parse_list(parser: &mut Parser, context: &C) -> Result<Self> {
240        Ok(Self::new(parser.parse_list(context)?))
241    }
242}
243
244impl<C: Context, T: Parsable<C>> Parsable<C> for Vec<T> {
245    fn parse_list(parser: &mut Parser, context: &C) -> Result<Self> {
246        let Parser { form, count, .. } = parser;
247        form.drain(..)
248            .rev()
249            .map(|unit| {
250                *count += 1;
251                parse(unit, context)
252            })
253            .collect()
254    }
255}
256
257impl<C: Context> Parsable<C> for String {
258    fn parse_symbol(name: Box<str>, _span: Span, _context: &C) -> Result<Self> {
259        Ok(name.into())
260    }
261}
262
263impl<C: Context> Parsable<C> for Box<str> {
264    fn parse_symbol(name: Box<str>, _span: Span, _context: &C) -> Result<Self> {
265        Ok(name)
266    }
267}
268
269impl<C: Context> Parsable<C> for PathBuf {
270    fn parse_symbol(name: Box<str>, _span: Span, _context: &C) -> Result<Self> {
271        Ok(name.as_ref().into())
272    }
273}
274
275/// Derives `Parsable` from symbol for types which implement `FromStr`.
276#[macro_export]
277macro_rules! derive_symbol_parsable {
278    ($t:ty) => {
279        impl<C: $crate::Context> $crate::Parsable<C> for $t {
280            fn parse_symbol(name: Box<str>, _span: $crate::Span, _context: &C) -> $crate::Result<Self> {
281                name.parse().map_err(|_| $crate::ErrorKind::StringParsing(stringify!($t)).into())
282            }
283        }
284    };
285    ($t:ty, $($rest:ty),+) => {
286        derive_symbol_parsable!($t);
287        derive_symbol_parsable!($($rest),+);
288    };
289}
290
291#[cfg(not(feature = "radix-parsing"))]
292mod numbers;
293derive_symbol_parsable!(bool);
294
295/// The token parser to parse the units into wanted types.
296#[derive(Clone)]
297pub struct Parser {
298    form: Vec<Unit>,
299    count: usize,
300    span: Span,
301}
302
303impl Parser {
304    /// Creates a new parser from a list of objects.
305    pub fn new<I: IntoIterator>(form: I) -> Self
306    where
307        I::Item: Into<Unit>,
308    {
309        let mut form: Vec<_> = form.into_iter().map(I::Item::into).collect();
310        form.reverse();
311        Self {
312            form,
313            count: 0,
314            span: Span::default(),
315        }
316    }
317
318    /// Sets the span for this parser (builder pattern).
319    pub fn with_span(mut self, span: Span) -> Self {
320        self.span = span;
321        self
322    }
323
324    /// Returns the source span of this parser.
325    pub fn span(&self) -> Span {
326        self.span
327    }
328
329    /// Returns whether the parser has no remaining elements.
330    pub fn is_empty(&self) -> bool {
331        self.form.is_empty()
332    }
333
334    /// Replaces all occurrences of a symbol with another symbol, recursively.
335    pub fn substitute(&mut self, variable: &str, value: &str) {
336        for unit in &mut self.form {
337            unit.substitute(variable, value);
338        }
339    }
340
341    /// Returns the next unit without parsing it, or `None` if empty.
342    pub fn next_unit(&mut self) -> Option<Unit> {
343        self.count += 1;
344        self.form.pop()
345    }
346
347    /// Tries to parse the next unit as the required type.
348    pub fn parse_next<C: Context, T: Parsable<C>>(&mut self, context: &C) -> Result<T> {
349        self.count += 1;
350        if let Some(token) = self.form.pop() {
351            parse(token, context)
352        } else {
353            Result::Err(Error {
354                kind: ErrorKind::NotEnoughElements(self.count),
355                span: Some(self.span),
356                context: None,
357            })
358        }
359    }
360
361    /// Tries to parse the rest of the current list into the required type.
362    /// If not every available token is used, this will be an error.
363    pub fn parse_rest<C: Context, T: Parsable<C>>(&mut self, context: &C) -> Result<T> {
364        let result = self.parse_list(context);
365        let count = self.form.len();
366        if count > 0 {
367            self.form.clear();
368            Err(Error {
369                kind: ErrorKind::TooManyElements(count),
370                span: Some(self.span),
371                context: None,
372            })
373        } else {
374            result
375        }
376    }
377
378    /// Tries to parse as many tokens of the current list as needed into the required type.
379    pub fn parse_list<C: Context, T: Parsable<C>>(&mut self, context: &C) -> Result<T> {
380        Parsable::parse_list(self, context)
381    }
382}
383
384impl Iterator for Parser {
385    type Item = Result<Self>;
386
387    fn next(&mut self) -> Option<Result<Self>> {
388        self.count += 1;
389        Some(self.form.pop()?.parser())
390    }
391}
392
393#[cfg(feature = "radix-parsing")]
394/// Contains utilities for radix parsing.
395pub mod radix;