Skip to main content

token_parser/
lib.rs

1#![deny(missing_docs)]
2
3/*!
4Some utilities for parsing some format based on nested lists into arbitrary data structures.
5It's also meant to be used as a backend for parsers.
6**/
7
8use std::{path::PathBuf, rc::Rc, sync::Arc};
9
10use thiserror::Error;
11
12/// A trait required for all contexts being used for token parsing.
13///
14/// By default, only the empty tuple implements it.
15/// It currently does not contain anything by default. It's just there to achieve compatibility with features and to allow more changes without breaking anything.
16pub trait Context {
17    #[cfg(feature = "radix-parsing")]
18    #[inline]
19    /// Specifies the radix if the feature radix parsing is enabled.
20    fn radix(&self) -> u32 {
21        10
22    }
23}
24
25impl Context for () {}
26
27/// A source position with line and column (both 0-based).
28#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
29pub struct Span {
30    /// The line number (0-based).
31    pub line: usize,
32    /// The column number (0-based).
33    pub column: usize,
34}
35
36impl std::fmt::Display for Span {
37    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
38        write!(f, "{}:{}", self.line + 1, self.column + 1)
39    }
40}
41
42/// The kind of error that occurred during token parsing.
43#[derive(Debug, Error)]
44pub enum ErrorKind {
45    /// The sublist contains less elements than expected by a specified amount.
46    #[error("Not enough elements: {0} more expected")]
47    NotEnoughElements(usize),
48
49    /// The sublist contains more elements than expected by a specified amount.
50    #[error("Too many elements: {0} less expected")]
51    TooManyElements(usize),
52
53    /// No list is allowed in this context.
54    #[error("List not allowed")]
55    ListNotAllowed,
56
57    /// No symbol is allowed in this context.
58    #[error("Symbol not allowed")]
59    SymbolNotAllowed,
60
61    /// Error with string parsing.
62    #[error("String parsing error")]
63    StringParsing,
64
65    /// Some specific element is invalid.
66    #[error("Invalid element")]
67    InvalidElement,
68}
69
70/// The error type for token parsing, containing a kind and an optional source position.
71#[derive(Debug)]
72pub struct Error {
73    /// The kind of error.
74    pub kind: ErrorKind,
75    /// The source position where the error occurred, if known.
76    pub span: Option<Span>,
77}
78
79impl std::fmt::Display for Error {
80    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
81        if let Some(span) = self.span {
82            write!(f, "{span}: {}", self.kind)
83        } else {
84            write!(f, "{}", self.kind)
85        }
86    }
87}
88
89impl std::error::Error for Error {}
90
91impl From<ErrorKind> for Error {
92    fn from(kind: ErrorKind) -> Self {
93        Self { kind, span: None }
94    }
95}
96
97impl Error {
98    fn at(self, span: Span) -> Self {
99        Self {
100            kind: self.kind,
101            span: self.span.or(Some(span)),
102        }
103    }
104}
105
106/// The result type for token parsing.
107pub type Result<T> = std::result::Result<T, Error>;
108
109/// Some unit, which represents an intermediate state.
110#[derive(Clone)]
111pub enum Unit {
112    /// The current unit is a single symbol.
113    Symbol(Box<str>, Span),
114    /// The current unit is a parser, which can yield multiple units.
115    Parser(Parser),
116}
117
118impl Unit {
119    /// Returns the source span of this unit.
120    pub fn span(&self) -> Span {
121        match self {
122            Self::Symbol(_, span) => *span,
123            Self::Parser(parser) => parser.span,
124        }
125    }
126
127    /// Returns the symbol, if applicable, as a result type.
128    pub fn symbol(self) -> Result<Box<str>> {
129        if let Self::Symbol(name, _) = self {
130            Ok(name)
131        } else {
132            Err(ErrorKind::ListNotAllowed.into())
133        }
134    }
135
136    /// Returns the parser, if applicable, as a result type.
137    pub fn parser(self) -> Result<Parser> {
138        if let Self::Parser(parser) = self {
139            Ok(parser)
140        } else {
141            Err(ErrorKind::SymbolNotAllowed.into())
142        }
143    }
144
145    /// Replaces all occurrences of a symbol with another symbol, recursively.
146    pub fn substitute(&mut self, variable: &str, value: &str) {
147        match self {
148            Self::Symbol(name, _) => {
149                if name.as_ref() == variable {
150                    *name = value.into();
151                }
152            }
153            Self::Parser(parser) => parser.substitute(variable, value),
154        }
155    }
156}
157
158impl<C: Context> Parsable<C> for Unit {
159    fn parse_symbol(name: Box<str>, span: Span, _context: &C) -> Result<Self> {
160        Ok(Self::Symbol(name, span))
161    }
162
163    fn parse_list(parser: &mut Parser, _context: &C) -> Result<Self> {
164        let form = std::mem::take(&mut parser.form);
165        let span = parser.span;
166        Ok(Self::Parser(Parser {
167            form,
168            count: 0,
169            span,
170        }))
171    }
172}
173
174/// This trait needs to be implemented for every struct which can be parsed using the token parser.
175#[allow(clippy::boxed_local)]
176pub trait Parsable<C: Context>: Sized {
177    /// When a symbol is found by the parser, this will be called.
178    fn parse_symbol(_name: Box<str>, _span: Span, _context: &C) -> Result<Self> {
179        Err(ErrorKind::SymbolNotAllowed.into())
180    }
181
182    /// When a subparser is found by the parser, this will be called.
183    fn parse_list(_parser: &mut Parser, _context: &C) -> Result<Self> {
184        Err(ErrorKind::ListNotAllowed.into())
185    }
186}
187
188fn parse<C: Context, P: Parsable<C>>(unit: Unit, context: &C) -> Result<P> {
189    match unit {
190        Unit::Symbol(name, span) => {
191            Parsable::parse_symbol(name, span, context).map_err(|e| e.at(span))
192        }
193        Unit::Parser(mut parser) => {
194            let span = parser.span;
195            Parsable::parse_list(&mut parser, context).map_err(|e| e.at(span))
196        }
197    }
198}
199
200impl<C: Context, T: Parsable<C>> Parsable<C> for Box<T> {
201    fn parse_symbol(name: Box<str>, span: Span, context: &C) -> Result<Self> {
202        Ok(Self::new(Parsable::parse_symbol(name, span, context)?))
203    }
204
205    fn parse_list(parser: &mut Parser, context: &C) -> Result<Self> {
206        Ok(Self::new(parser.parse_list(context)?))
207    }
208}
209
210impl<C: Context, T: Parsable<C>> Parsable<C> for Rc<T> {
211    fn parse_symbol(name: Box<str>, span: Span, context: &C) -> Result<Self> {
212        Ok(Self::new(Parsable::parse_symbol(name, span, context)?))
213    }
214
215    fn parse_list(parser: &mut Parser, context: &C) -> Result<Self> {
216        Ok(Self::new(parser.parse_list(context)?))
217    }
218}
219
220impl<C: Context, T: Parsable<C>> Parsable<C> for Arc<T> {
221    fn parse_symbol(name: Box<str>, span: Span, context: &C) -> Result<Self> {
222        Ok(Self::new(Parsable::parse_symbol(name, span, context)?))
223    }
224
225    fn parse_list(parser: &mut Parser, context: &C) -> Result<Self> {
226        Ok(Self::new(parser.parse_list(context)?))
227    }
228}
229
230impl<C: Context, T: Parsable<C>> Parsable<C> for Vec<T> {
231    fn parse_list(parser: &mut Parser, context: &C) -> Result<Self> {
232        let Parser { form, count, .. } = parser;
233        form.drain(..)
234            .rev()
235            .map(|unit| {
236                *count += 1;
237                parse(unit, context)
238            })
239            .collect()
240    }
241}
242
243impl<C: Context> Parsable<C> for String {
244    fn parse_symbol(name: Box<str>, _span: Span, _context: &C) -> Result<Self> {
245        Ok(name.into())
246    }
247}
248
249impl<C: Context> Parsable<C> for Box<str> {
250    fn parse_symbol(name: Box<str>, _span: Span, _context: &C) -> Result<Self> {
251        Ok(name)
252    }
253}
254
255impl<C: Context> Parsable<C> for PathBuf {
256    fn parse_symbol(name: Box<str>, _span: Span, _context: &C) -> Result<Self> {
257        Ok(name.as_ref().into())
258    }
259}
260
261/// Derives `Parsable` from symbol for types which implement `FromStr`.
262#[macro_export]
263macro_rules! derive_symbol_parsable {
264    ($t:ty) => {
265        impl<C: $crate::Context> $crate::Parsable<C> for $t {
266            fn parse_symbol(name: Box<str>, _span: $crate::Span, _context: &C) -> $crate::Result<Self> {
267                name.parse().map_err(|_| $crate::ErrorKind::StringParsing.into())
268            }
269        }
270    };
271    ($t:ty, $($rest:ty),+) => {
272        derive_symbol_parsable!($t);
273        derive_symbol_parsable!($($rest),+);
274    };
275}
276
277#[cfg(not(feature = "radix-parsing"))]
278mod numbers;
279derive_symbol_parsable!(bool);
280
281/// The token parser to parse the units into wanted types.
282#[derive(Clone)]
283pub struct Parser {
284    form: Vec<Unit>,
285    count: usize,
286    span: Span,
287}
288
289impl Parser {
290    /// Creates a new parser from a list of objects.
291    pub fn new<I: IntoIterator>(form: I) -> Self
292    where
293        I::Item: Into<Unit>,
294    {
295        let mut form: Vec<_> = form.into_iter().map(I::Item::into).collect();
296        form.reverse();
297        Self {
298            form,
299            count: 0,
300            span: Span::default(),
301        }
302    }
303
304    /// Sets the span for this parser (builder pattern).
305    pub fn with_span(mut self, span: Span) -> Self {
306        self.span = span;
307        self
308    }
309
310    /// Returns the source span of this parser.
311    pub fn span(&self) -> Span {
312        self.span
313    }
314
315    /// Returns whether the parser has no remaining elements.
316    pub fn is_empty(&self) -> bool {
317        self.form.is_empty()
318    }
319
320    /// Replaces all occurrences of a symbol with another symbol, recursively.
321    pub fn substitute(&mut self, variable: &str, value: &str) {
322        for unit in &mut self.form {
323            unit.substitute(variable, value);
324        }
325    }
326
327    /// Returns the next unit without parsing it, or `None` if empty.
328    pub fn next_unit(&mut self) -> Option<Unit> {
329        self.count += 1;
330        self.form.pop()
331    }
332
333    /// Tries to parse the next unit as the required type.
334    pub fn parse_next<C: Context, T: Parsable<C>>(&mut self, context: &C) -> Result<T> {
335        self.count += 1;
336        if let Some(token) = self.form.pop() {
337            parse(token, context)
338        } else {
339            Result::Err(Error {
340                kind: ErrorKind::NotEnoughElements(self.count),
341                span: Some(self.span),
342            })
343        }
344    }
345
346    /// Tries to parse the rest of the current list into the required type.
347    /// If not every available token is used, this will be an error.
348    pub fn parse_rest<C: Context, T: Parsable<C>>(&mut self, context: &C) -> Result<T> {
349        let result = self.parse_list(context);
350        let count = self.form.len();
351        if count > 0 {
352            self.form.clear();
353            Err(Error {
354                kind: ErrorKind::TooManyElements(count),
355                span: Some(self.span),
356            })
357        } else {
358            result
359        }
360    }
361
362    /// Tries to parse as many tokens of the current list as needed into the required type.
363    pub fn parse_list<C: Context, T: Parsable<C>>(&mut self, context: &C) -> Result<T> {
364        Parsable::parse_list(self, context)
365    }
366}
367
368impl Iterator for Parser {
369    type Item = Result<Self>;
370
371    fn next(&mut self) -> Option<Result<Self>> {
372        self.count += 1;
373        Some(self.form.pop()?.parser())
374    }
375}
376
377#[cfg(feature = "radix-parsing")]
378/// Contains utilities for radix parsing.
379pub mod radix;