token-parser 0.8.0

Utilities for parsing texts into data structures
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
#![deny(missing_docs)]

/*!
Some utilities for parsing some format based on nested lists into arbitrary data structures.
It's also meant to be used as a backend for parsers.
**/

use std::{path::PathBuf, rc::Rc, sync::Arc};

use thiserror::Error;

#[cfg(feature = "derive")]
pub use token_parser_derive::{Parsable, SymbolParsable};

/// A trait required for all contexts being used for token parsing.
///
/// By default, only the empty tuple implements it.
/// It currently does not contain anything by default. It's just there to achieve compatibility with features and to allow more changes without breaking anything.
pub trait Context {
    #[cfg(feature = "radix-parsing")]
    #[inline]
    /// Specifies the radix if the feature radix parsing is enabled.
    fn radix(&self) -> u32 {
        10
    }
}

impl Context for () {}

/// A source position with line and column (both 0-based).
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub struct Span {
    /// The line number (0-based).
    pub line: usize,
    /// The column number (0-based).
    pub column: usize,
}

impl std::fmt::Display for Span {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}:{}", self.line + 1, self.column + 1)
    }
}

/// The kind of error that occurred during token parsing.
#[derive(Debug, Error)]
#[non_exhaustive]
pub enum ErrorKind {
    /// The sublist contains fewer elements than the given minimum.
    #[error("Not enough elements: at least {0} expected")]
    NotEnoughElements(usize),

    /// The sublist contains more elements than expected by a specified amount.
    #[error("Too many elements: {0} unexpected")]
    TooManyElements(usize),

    /// No list is allowed in this context.
    #[error("List not allowed")]
    ListNotAllowed,

    /// No symbol is allowed in this context.
    #[error("Symbol not allowed")]
    SymbolNotAllowed,

    /// String parsing failed for the named type.
    #[error("Expected {0}")]
    StringParsing(&'static str),

    /// The named field does not exist.
    #[error("Unknown field {0}")]
    UnknownField(Box<str>),

    /// Some specific element is invalid.
    #[error("Invalid element")]
    InvalidElement,
}

/// The error type for token parsing, containing a kind and an optional source position.
#[derive(Debug)]
pub struct Error {
    /// The kind of error.
    pub kind: ErrorKind,
    /// The source position where the error occurred, if known.
    pub span: Option<Span>,
    /// Optional context describing what was being parsed.
    pub context: Option<Box<str>>,
}

impl std::fmt::Display for Error {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        if let Some(span) = self.span {
            write!(f, "{span}: ")?;
        }
        if let Some(ctx) = &self.context {
            write!(f, "{ctx}: ")?;
        }
        write!(f, "{}", self.kind)
    }
}

impl std::error::Error for Error {}

impl From<ErrorKind> for Error {
    fn from(kind: ErrorKind) -> Self {
        Self {
            kind,
            span: None,
            context: None,
        }
    }
}

impl Error {
    /// Attaches a source position to the error, keeping an already attached position.
    pub fn at(mut self, span: Span) -> Self {
        if self.span.is_none() {
            self.span = Some(span);
        }
        self
    }

    /// Adds descriptive context to the error (e.g., which field was being parsed), keeping already attached context.
    pub fn context(mut self, msg: impl Into<Box<str>>) -> Self {
        if self.context.is_none() {
            self.context = Some(msg.into());
        }
        self
    }
}

/// The result type for token parsing.
pub type Result<T> = std::result::Result<T, Error>;

/// Some unit, which represents an intermediate state.
#[derive(Clone)]
pub enum Unit {
    /// The current unit is a single symbol.
    Symbol(Box<str>, Span),
    /// The current unit is a parser, which can yield multiple units.
    Parser(Parser),
}

impl Unit {
    /// Returns the source span of this unit.
    pub fn span(&self) -> Span {
        match self {
            Self::Symbol(_, span) => *span,
            Self::Parser(parser) => parser.span,
        }
    }

    /// Returns the symbol, if applicable, as a result type.
    pub fn symbol(self) -> Result<Box<str>> {
        match self {
            Self::Symbol(name, _) => Ok(name),
            Self::Parser(parser) => Err(Error::from(ErrorKind::ListNotAllowed).at(parser.span)),
        }
    }

    /// Returns the parser, if applicable, as a result type.
    pub fn parser(self) -> Result<Parser> {
        match self {
            Self::Parser(parser) => Ok(parser),
            Self::Symbol(_, span) => Err(Error::from(ErrorKind::SymbolNotAllowed).at(span)),
        }
    }

    /// Replaces all occurrences of a symbol with another symbol, recursively.
    pub fn substitute(&mut self, variable: &str, value: &str) {
        match self {
            Self::Symbol(name, _) => {
                if name.as_ref() == variable {
                    *name = value.into();
                }
            }
            Self::Parser(parser) => parser.substitute(variable, value),
        }
    }
}

impl<C: Context> Parsable<C> for Unit {
    fn parse_symbol(name: Box<str>, span: Span, _context: &C) -> Result<Self> {
        Ok(Self::Symbol(name, span))
    }

    fn parse_list(parser: &mut Parser, _context: &C) -> Result<Self> {
        let form = std::mem::take(&mut parser.form);
        let span = parser.span;
        Ok(Self::Parser(Parser {
            form,
            count: 0,
            span,
        }))
    }
}

/// This trait needs to be implemented for every struct which can be parsed using the token parser.
#[allow(clippy::boxed_local)]
pub trait Parsable<C: Context>: Sized {
    /// When a symbol is found by the parser, this will be called.
    fn parse_symbol(_name: Box<str>, _span: Span, _context: &C) -> Result<Self> {
        Err(ErrorKind::SymbolNotAllowed.into())
    }

    /// When a subparser is found by the parser, this will be called.
    fn parse_list(_parser: &mut Parser, _context: &C) -> Result<Self> {
        Err(ErrorKind::ListNotAllowed.into())
    }
}

fn parse<C: Context, P: Parsable<C>>(unit: Unit, context: &C) -> Result<P> {
    match unit {
        Unit::Symbol(name, span) => {
            Parsable::parse_symbol(name, span, context).map_err(|e| e.at(span))
        }
        Unit::Parser(mut parser) => {
            let span = parser.span;
            Parsable::parse_list(&mut parser, context).map_err(|e| e.at(span))
        }
    }
}

impl<C: Context, T: Parsable<C>> Parsable<C> for Box<T> {
    fn parse_symbol(name: Box<str>, span: Span, context: &C) -> Result<Self> {
        Ok(Self::new(Parsable::parse_symbol(name, span, context)?))
    }

    fn parse_list(parser: &mut Parser, context: &C) -> Result<Self> {
        Ok(Self::new(parser.parse_list(context)?))
    }
}

impl<C: Context, T: Parsable<C>> Parsable<C> for Rc<T> {
    fn parse_symbol(name: Box<str>, span: Span, context: &C) -> Result<Self> {
        Ok(Self::new(Parsable::parse_symbol(name, span, context)?))
    }

    fn parse_list(parser: &mut Parser, context: &C) -> Result<Self> {
        Ok(Self::new(parser.parse_list(context)?))
    }
}

impl<C: Context, T: Parsable<C>> Parsable<C> for Arc<T> {
    fn parse_symbol(name: Box<str>, span: Span, context: &C) -> Result<Self> {
        Ok(Self::new(Parsable::parse_symbol(name, span, context)?))
    }

    fn parse_list(parser: &mut Parser, context: &C) -> Result<Self> {
        Ok(Self::new(parser.parse_list(context)?))
    }
}

impl<C: Context, T: Parsable<C>> Parsable<C> for Vec<T> {
    fn parse_list(parser: &mut Parser, context: &C) -> Result<Self> {
        let Parser { form, count, .. } = parser;
        form.drain(..)
            .rev()
            .map(|unit| {
                *count += 1;
                parse(unit, context)
            })
            .collect()
    }
}

impl<C: Context> Parsable<C> for String {
    fn parse_symbol(name: Box<str>, _span: Span, _context: &C) -> Result<Self> {
        Ok(name.into())
    }
}

impl<C: Context> Parsable<C> for Box<str> {
    fn parse_symbol(name: Box<str>, _span: Span, _context: &C) -> Result<Self> {
        Ok(name)
    }
}

impl<C: Context> Parsable<C> for PathBuf {
    fn parse_symbol(name: Box<str>, _span: Span, _context: &C) -> Result<Self> {
        Ok(name.as_ref().into())
    }
}

/// Derives `Parsable` from symbol for types which implement `FromStr`.
#[macro_export]
macro_rules! derive_symbol_parsable {
    ($t:ty) => {
        impl<C: $crate::Context> $crate::Parsable<C> for $t {
            fn parse_symbol(name: Box<str>, _span: $crate::Span, _context: &C) -> $crate::Result<Self> {
                name.parse().map_err(|_| $crate::ErrorKind::StringParsing(stringify!($t)).into())
            }
        }
    };
    ($t:ty, $($rest:ty),+) => {
        derive_symbol_parsable!($t);
        derive_symbol_parsable!($($rest),+);
    };
}

#[cfg(not(feature = "radix-parsing"))]
mod numbers;
derive_symbol_parsable!(bool);

/// The token parser to parse the units into wanted types.
#[derive(Clone)]
pub struct Parser {
    form: Vec<Unit>,
    count: usize,
    span: Span,
}

impl Parser {
    /// Creates a new parser from a list of objects.
    pub fn new<I: IntoIterator>(form: I) -> Self
    where
        I::Item: Into<Unit>,
    {
        let mut form: Vec<_> = form.into_iter().map(I::Item::into).collect();
        form.reverse();
        Self {
            form,
            count: 0,
            span: Span::default(),
        }
    }

    /// Sets the span for this parser (builder pattern).
    pub fn with_span(mut self, span: Span) -> Self {
        self.span = span;
        self
    }

    /// Returns the source span of this parser.
    pub fn span(&self) -> Span {
        self.span
    }

    /// Returns whether the parser has no remaining elements.
    pub fn is_empty(&self) -> bool {
        self.form.is_empty()
    }

    /// Replaces all occurrences of a symbol with another symbol, recursively.
    pub fn substitute(&mut self, variable: &str, value: &str) {
        for unit in &mut self.form {
            unit.substitute(variable, value);
        }
    }

    /// Returns the next unit without parsing it, or `None` if empty.
    pub fn next_unit(&mut self) -> Option<Unit> {
        self.count += 1;
        self.form.pop()
    }

    /// Tries to parse the next unit as the required type.
    pub fn parse_next<C: Context, T: Parsable<C>>(&mut self, context: &C) -> Result<T> {
        self.count += 1;
        if let Some(token) = self.form.pop() {
            parse(token, context)
        } else {
            Result::Err(Error {
                kind: ErrorKind::NotEnoughElements(self.count),
                span: Some(self.span),
                context: None,
            })
        }
    }

    /// Tries to parse the next unit as the required type, returning `None` if no elements remain.
    pub fn parse_next_optional<C: Context, T: Parsable<C>>(
        &mut self,
        context: &C,
    ) -> Result<Option<T>> {
        if self.is_empty() {
            Ok(None)
        } else {
            self.parse_next(context).map(Some)
        }
    }

    /// Tries to parse the rest of the current list into the required type.
    /// If not every available token is used, this will be an error.
    pub fn parse_rest<C: Context, T: Parsable<C>>(&mut self, context: &C) -> Result<T> {
        let result = self.parse_list(context);
        let count = self.form.len();
        if count > 0 {
            self.form.clear();
            result?;
            Err(Error {
                kind: ErrorKind::TooManyElements(count),
                span: Some(self.span),
                context: None,
            })
        } else {
            result
        }
    }

    /// Tries to parse as many tokens of the current list as needed into the required type.
    pub fn parse_list<C: Context, T: Parsable<C>>(&mut self, context: &C) -> Result<T> {
        Parsable::parse_list(self, context)
    }
}

impl Iterator for Parser {
    type Item = Result<Self>;

    fn next(&mut self) -> Option<Result<Self>> {
        self.count += 1;
        Some(self.form.pop()?.parser())
    }
}

#[cfg(feature = "radix-parsing")]
/// Contains utilities for radix parsing.
pub mod radix;