Skip to main content

token_parser/
lib.rs

1#![deny(missing_docs)]
2
3/*!
4Some utilities for parsing some format based on nested lists into arbitrary data structures.
5It's also meant to be used as a backend for parsers.
6**/
7
8use std::{path::PathBuf, rc::Rc, sync::Arc};
9
10use thiserror::Error;
11
12#[cfg(feature = "derive")]
13pub use token_parser_derive::{Parsable, SymbolParsable};
14
15/// A trait required for all contexts being used for token parsing.
16///
17/// By default, only the empty tuple implements it.
18/// It currently does not contain anything by default. It's just there to achieve compatibility with features and to allow more changes without breaking anything.
19pub trait Context {
20    #[cfg(feature = "radix-parsing")]
21    #[inline]
22    /// Specifies the radix if the feature radix parsing is enabled.
23    fn radix(&self) -> u32 {
24        10
25    }
26}
27
28impl Context for () {}
29
30/// A source position with line and column (both 0-based).
31#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
32pub struct Span {
33    /// The line number (0-based).
34    pub line: usize,
35    /// The column number (0-based).
36    pub column: usize,
37}
38
39impl std::fmt::Display for Span {
40    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
41        write!(f, "{}:{}", self.line + 1, self.column + 1)
42    }
43}
44
45/// The kind of error that occurred during token parsing.
46#[derive(Debug, Error)]
47#[non_exhaustive]
48pub enum ErrorKind {
49    /// The sublist contains fewer elements than the given minimum.
50    #[error("Not enough elements: at least {0} expected")]
51    NotEnoughElements(usize),
52
53    /// The sublist contains more elements than expected by a specified amount.
54    #[error("Too many elements: {0} unexpected")]
55    TooManyElements(usize),
56
57    /// No list is allowed in this context.
58    #[error("List not allowed")]
59    ListNotAllowed,
60
61    /// No symbol is allowed in this context.
62    #[error("Symbol not allowed")]
63    SymbolNotAllowed,
64
65    /// String parsing failed for the named type.
66    #[error("Expected {0}")]
67    StringParsing(&'static str),
68
69    /// The named field does not exist.
70    #[error("Unknown field {0}")]
71    UnknownField(Box<str>),
72
73    /// Some specific element is invalid.
74    #[error("Invalid element")]
75    InvalidElement,
76}
77
78/// The error type for token parsing, containing a kind and an optional source position.
79#[derive(Debug)]
80pub struct Error {
81    /// The kind of error.
82    pub kind: ErrorKind,
83    /// The source position where the error occurred, if known.
84    pub span: Option<Span>,
85    /// Optional context describing what was being parsed.
86    pub context: Option<Box<str>>,
87}
88
89impl std::fmt::Display for Error {
90    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
91        if let Some(span) = self.span {
92            write!(f, "{span}: ")?;
93        }
94        if let Some(ctx) = &self.context {
95            write!(f, "{ctx}: ")?;
96        }
97        write!(f, "{}", self.kind)
98    }
99}
100
101impl std::error::Error for Error {}
102
103impl From<ErrorKind> for Error {
104    fn from(kind: ErrorKind) -> Self {
105        Self {
106            kind,
107            span: None,
108            context: None,
109        }
110    }
111}
112
113impl Error {
114    /// Attaches a source position to the error, keeping an already attached position.
115    pub fn at(mut self, span: Span) -> Self {
116        if self.span.is_none() {
117            self.span = Some(span);
118        }
119        self
120    }
121
122    /// Adds descriptive context to the error (e.g., which field was being parsed), keeping already attached context.
123    pub fn context(mut self, msg: impl Into<Box<str>>) -> Self {
124        if self.context.is_none() {
125            self.context = Some(msg.into());
126        }
127        self
128    }
129}
130
131/// The result type for token parsing.
132pub type Result<T> = std::result::Result<T, Error>;
133
134/// Some unit, which represents an intermediate state.
135#[derive(Clone)]
136pub enum Unit {
137    /// The current unit is a single symbol.
138    Symbol(Box<str>, Span),
139    /// The current unit is a parser, which can yield multiple units.
140    Parser(Parser),
141}
142
143impl Unit {
144    /// Returns the source span of this unit.
145    pub fn span(&self) -> Span {
146        match self {
147            Self::Symbol(_, span) => *span,
148            Self::Parser(parser) => parser.span,
149        }
150    }
151
152    /// Returns the symbol, if applicable, as a result type.
153    pub fn symbol(self) -> Result<Box<str>> {
154        match self {
155            Self::Symbol(name, _) => Ok(name),
156            Self::Parser(parser) => Err(Error::from(ErrorKind::ListNotAllowed).at(parser.span)),
157        }
158    }
159
160    /// Returns the parser, if applicable, as a result type.
161    pub fn parser(self) -> Result<Parser> {
162        match self {
163            Self::Parser(parser) => Ok(parser),
164            Self::Symbol(_, span) => Err(Error::from(ErrorKind::SymbolNotAllowed).at(span)),
165        }
166    }
167
168    /// Replaces all occurrences of a symbol with another symbol, recursively.
169    pub fn substitute(&mut self, variable: &str, value: &str) {
170        match self {
171            Self::Symbol(name, _) => {
172                if name.as_ref() == variable {
173                    *name = value.into();
174                }
175            }
176            Self::Parser(parser) => parser.substitute(variable, value),
177        }
178    }
179}
180
181impl<C: Context> Parsable<C> for Unit {
182    fn parse_symbol(name: Box<str>, span: Span, _context: &C) -> Result<Self> {
183        Ok(Self::Symbol(name, span))
184    }
185
186    fn parse_list(parser: &mut Parser, _context: &C) -> Result<Self> {
187        let form = std::mem::take(&mut parser.form);
188        let span = parser.span;
189        Ok(Self::Parser(Parser {
190            form,
191            count: 0,
192            span,
193        }))
194    }
195}
196
197/// This trait needs to be implemented for every struct which can be parsed using the token parser.
198#[allow(clippy::boxed_local)]
199pub trait Parsable<C: Context>: Sized {
200    /// When a symbol is found by the parser, this will be called.
201    fn parse_symbol(_name: Box<str>, _span: Span, _context: &C) -> Result<Self> {
202        Err(ErrorKind::SymbolNotAllowed.into())
203    }
204
205    /// When a subparser is found by the parser, this will be called.
206    fn parse_list(_parser: &mut Parser, _context: &C) -> Result<Self> {
207        Err(ErrorKind::ListNotAllowed.into())
208    }
209}
210
211fn parse<C: Context, P: Parsable<C>>(unit: Unit, context: &C) -> Result<P> {
212    match unit {
213        Unit::Symbol(name, span) => {
214            Parsable::parse_symbol(name, span, context).map_err(|e| e.at(span))
215        }
216        Unit::Parser(mut parser) => {
217            let span = parser.span;
218            Parsable::parse_list(&mut parser, context).map_err(|e| e.at(span))
219        }
220    }
221}
222
223impl<C: Context, T: Parsable<C>> Parsable<C> for Box<T> {
224    fn parse_symbol(name: Box<str>, span: Span, context: &C) -> Result<Self> {
225        Ok(Self::new(Parsable::parse_symbol(name, span, context)?))
226    }
227
228    fn parse_list(parser: &mut Parser, context: &C) -> Result<Self> {
229        Ok(Self::new(parser.parse_list(context)?))
230    }
231}
232
233impl<C: Context, T: Parsable<C>> Parsable<C> for Rc<T> {
234    fn parse_symbol(name: Box<str>, span: Span, context: &C) -> Result<Self> {
235        Ok(Self::new(Parsable::parse_symbol(name, span, context)?))
236    }
237
238    fn parse_list(parser: &mut Parser, context: &C) -> Result<Self> {
239        Ok(Self::new(parser.parse_list(context)?))
240    }
241}
242
243impl<C: Context, T: Parsable<C>> Parsable<C> for Arc<T> {
244    fn parse_symbol(name: Box<str>, span: Span, context: &C) -> Result<Self> {
245        Ok(Self::new(Parsable::parse_symbol(name, span, context)?))
246    }
247
248    fn parse_list(parser: &mut Parser, context: &C) -> Result<Self> {
249        Ok(Self::new(parser.parse_list(context)?))
250    }
251}
252
253impl<C: Context, T: Parsable<C>> Parsable<C> for Vec<T> {
254    fn parse_list(parser: &mut Parser, context: &C) -> Result<Self> {
255        let Parser { form, count, .. } = parser;
256        form.drain(..)
257            .rev()
258            .map(|unit| {
259                *count += 1;
260                parse(unit, context)
261            })
262            .collect()
263    }
264}
265
266impl<C: Context> Parsable<C> for String {
267    fn parse_symbol(name: Box<str>, _span: Span, _context: &C) -> Result<Self> {
268        Ok(name.into())
269    }
270}
271
272impl<C: Context> Parsable<C> for Box<str> {
273    fn parse_symbol(name: Box<str>, _span: Span, _context: &C) -> Result<Self> {
274        Ok(name)
275    }
276}
277
278impl<C: Context> Parsable<C> for PathBuf {
279    fn parse_symbol(name: Box<str>, _span: Span, _context: &C) -> Result<Self> {
280        Ok(name.as_ref().into())
281    }
282}
283
284/// Derives `Parsable` from symbol for types which implement `FromStr`.
285#[macro_export]
286macro_rules! derive_symbol_parsable {
287    ($t:ty) => {
288        impl<C: $crate::Context> $crate::Parsable<C> for $t {
289            fn parse_symbol(name: Box<str>, _span: $crate::Span, _context: &C) -> $crate::Result<Self> {
290                name.parse().map_err(|_| $crate::ErrorKind::StringParsing(stringify!($t)).into())
291            }
292        }
293    };
294    ($t:ty, $($rest:ty),+) => {
295        derive_symbol_parsable!($t);
296        derive_symbol_parsable!($($rest),+);
297    };
298}
299
300#[cfg(not(feature = "radix-parsing"))]
301mod numbers;
302derive_symbol_parsable!(bool);
303
304/// The token parser to parse the units into wanted types.
305#[derive(Clone)]
306pub struct Parser {
307    form: Vec<Unit>,
308    count: usize,
309    span: Span,
310}
311
312impl Parser {
313    /// Creates a new parser from a list of objects.
314    pub fn new<I: IntoIterator>(form: I) -> Self
315    where
316        I::Item: Into<Unit>,
317    {
318        let mut form: Vec<_> = form.into_iter().map(I::Item::into).collect();
319        form.reverse();
320        Self {
321            form,
322            count: 0,
323            span: Span::default(),
324        }
325    }
326
327    /// Sets the span for this parser (builder pattern).
328    pub fn with_span(mut self, span: Span) -> Self {
329        self.span = span;
330        self
331    }
332
333    /// Returns the source span of this parser.
334    pub fn span(&self) -> Span {
335        self.span
336    }
337
338    /// Returns whether the parser has no remaining elements.
339    pub fn is_empty(&self) -> bool {
340        self.form.is_empty()
341    }
342
343    /// Replaces all occurrences of a symbol with another symbol, recursively.
344    pub fn substitute(&mut self, variable: &str, value: &str) {
345        for unit in &mut self.form {
346            unit.substitute(variable, value);
347        }
348    }
349
350    /// Returns the next unit without parsing it, or `None` if empty.
351    pub fn next_unit(&mut self) -> Option<Unit> {
352        self.count += 1;
353        self.form.pop()
354    }
355
356    /// Tries to parse the next unit as the required type.
357    pub fn parse_next<C: Context, T: Parsable<C>>(&mut self, context: &C) -> Result<T> {
358        self.count += 1;
359        if let Some(token) = self.form.pop() {
360            parse(token, context)
361        } else {
362            Result::Err(Error {
363                kind: ErrorKind::NotEnoughElements(self.count),
364                span: Some(self.span),
365                context: None,
366            })
367        }
368    }
369
370    /// Tries to parse the next unit as the required type, returning `None` if no elements remain.
371    pub fn parse_next_optional<C: Context, T: Parsable<C>>(
372        &mut self,
373        context: &C,
374    ) -> Result<Option<T>> {
375        if self.is_empty() {
376            Ok(None)
377        } else {
378            self.parse_next(context).map(Some)
379        }
380    }
381
382    /// Tries to parse the rest of the current list into the required type.
383    /// If not every available token is used, this will be an error.
384    pub fn parse_rest<C: Context, T: Parsable<C>>(&mut self, context: &C) -> Result<T> {
385        let result = self.parse_list(context);
386        let count = self.form.len();
387        if count > 0 {
388            self.form.clear();
389            result?;
390            Err(Error {
391                kind: ErrorKind::TooManyElements(count),
392                span: Some(self.span),
393                context: None,
394            })
395        } else {
396            result
397        }
398    }
399
400    /// Tries to parse as many tokens of the current list as needed into the required type.
401    pub fn parse_list<C: Context, T: Parsable<C>>(&mut self, context: &C) -> Result<T> {
402        Parsable::parse_list(self, context)
403    }
404}
405
406impl Iterator for Parser {
407    type Item = Result<Self>;
408
409    fn next(&mut self) -> Option<Result<Self>> {
410        self.count += 1;
411        Some(self.form.pop()?.parser())
412    }
413}
414
415#[cfg(feature = "radix-parsing")]
416/// Contains utilities for radix parsing.
417pub mod radix;