parser_combinators/
primitives.rs

1use std::fmt;
2use std::error::Error as StdError;
3use std::any::Any;
4
5///Struct which represents a position in a source file
6#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
7pub struct SourcePosition {
8    ///Current line of the input
9    pub line: i32,
10    ///Current column of the input
11    pub column: i32
12}
13
14///Struct which represents a position in a byte stream
15#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
16pub struct BytePosition {
17    ///Current position
18    pub position: usize
19}
20
21impl fmt::Display for BytePosition {
22    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
23        write!(f, "position: {}", self.position)
24    }
25}
26
27///Enum holding error information
28///As there is implementations of `From` for `T: Positioner`, `String` and `&'static str` the
29///constructor need not be used directly as calling `msg.into()` should turn a message into the
30///correct `Info` variant
31#[derive(Clone, Debug)]
32pub enum Info<T> {
33    Token(T),
34    Owned(String),
35    Borrowed(&'static str)
36}
37
38impl <T: PartialEq> PartialEq for Info<T> {
39    fn eq(&self, other: &Info<T>) -> bool {
40        match (self, other) {
41            (&Info::Token(ref l), &Info::Token(ref r)) => l == r,
42            (&Info::Owned(ref l), &Info::Owned(ref r)) => l == r,
43            (&Info::Borrowed(ref l), &Info::Owned(ref r)) => l == r,
44            (&Info::Owned(ref l), &Info::Borrowed(ref r)) => l == r,
45            (&Info::Borrowed(ref l), &Info::Borrowed(ref r)) => l == r,
46            _ => false
47        }
48    }
49}
50impl <T: fmt::Display> fmt::Display for Info<T> {
51    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
52        match *self {
53            Info::Token(ref c) => write!(f, "{}", c),
54            Info::Owned(ref s) => write!(f, "{}", s),
55            Info::Borrowed(s) => write!(f, "{}", s),
56        }
57    }
58}
59
60impl <T: Positioner> From<T> for Info<T> {
61    fn from(s: T) -> Info<T> {
62        Info::Token(s)
63    }
64}
65
66impl <T> From<String> for Info<T> {
67    fn from(s: String) -> Info<T> {
68        Info::Owned(s)
69    }
70}
71
72impl <T> From<&'static str> for Info<T> {
73    fn from(s: &'static str) -> Info<T> {
74        Info::Borrowed(s)
75    }
76}
77
78///Enum used to store information about an error that has occured
79#[derive(Debug)]
80pub enum Error<T> {
81    ///Error indicating an unexpected token has been encountered in the stream
82    Unexpected(T),
83    ///Error indicating that the parser expected something else
84    Expected(Info<T>),
85    ///Generic message
86    Message(Info<T>),
87    ///Variant for containing other types of errors
88    Other(Box<StdError+Send>)
89}
90
91impl <T: PartialEq> PartialEq for Error<T> {
92    fn eq(&self, other: &Error<T>) -> bool {
93        match (self, other) {
94            (&Error::Unexpected(ref l), &Error::Unexpected(ref r)) => l == r,
95            (&Error::Expected(ref l), &Error::Expected(ref r)) => l == r,
96            (&Error::Message(ref l), &Error::Message(ref r)) => l == r,
97            _ => false
98        }
99    }
100}
101
102impl <E, T> From<E> for Error<T> where E: StdError + 'static + Send {
103    fn from(e: E) -> Error<T> {
104        Error::Other(Box::new(e))
105    }
106}
107
108impl <T> Error<T> {
109    pub fn end_of_input() -> Error<T> {
110        Error::Message("End of input".into())
111    }
112}
113
114///Enum used to indicate if a parser consumed any items of the stream it was given as an input
115#[derive(Clone, PartialEq, Debug, Copy)]
116pub enum Consumed<T> {
117    ///Constructor indicating that the parser has consumed elements
118    Consumed(T),
119    ///Constructor indicating that the parser did not consume any elements
120    Empty(T)
121}
122
123impl <T> Consumed<T> {
124
125    ///Returns true if `self` is empty
126    pub fn is_empty(&self) -> bool {
127        match *self {
128            Consumed::Empty(_) => true,
129            Consumed::Consumed(_) => false
130        }
131    }
132
133    ///Extracts the contained value
134    pub fn into_inner(self) -> T {
135        match self {
136            Consumed::Empty(x) => x,
137            Consumed::Consumed(x) => x
138        }
139    }
140
141    ///Converts `self` into the Consumed state
142    pub fn as_consumed(self) -> Consumed<T> {
143        Consumed::Consumed(self.into_inner())
144    }
145
146    ///Converts `self` into theEmpty state
147    pub fn as_empty(self) -> Consumed<T> {
148        Consumed::Empty(self.into_inner())
149    }
150
151    ///Maps over the contained value without changing the consumed state
152    pub fn map<F, U>(self, f: F) -> Consumed<U>
153        where F: FnOnce(T) -> U {
154        match self {
155            Consumed::Empty(x) => Consumed::Empty(f(x)),
156            Consumed::Consumed(x) => Consumed::Consumed(f(x))
157        }
158    }
159
160    ///Combines the Consumed flags from `self` and the result of `f`
161    ///
162    ///```
163    /// # extern crate parser_combinators as pc;
164    /// # use pc::*;
165    /// # fn main() {
166    /// //Parses a characther of string literal and handles the escaped characthers \\ and \" as \
167    /// //and " respectively
168    /// fn char(input: State<&str>) -> ParseResult<char, &str> {
169    ///     let (c, input) = try!(satisfy(|c| c != '"').parse_state(input));
170    ///     match c {
171    ///         //Since the `char` parser has already consumed some of the input `combine` is used
172    ///         //propagate the consumed state to the next part of the parser
173    ///         '\\' => input.combine(|input| {
174    ///             satisfy(|c| c == '"' || c == '\\')
175    ///                 .map(|c| {
176    ///                     match c {
177    ///                         '"' => '"',
178    ///                         '\\' => '\\',
179    ///                         c => c
180    ///                     }
181    ///                 })
182    ///                 .parse_state(input)
183    ///             }),
184    ///         _ => Ok((c, input))
185    ///     }
186    /// }
187    /// let result = many(parser(char))
188    ///     .parse(r#"abc\"\\"#);
189    /// assert_eq!(result, Ok((r#"abc"\"#.to_string(), "")));
190    /// }
191    ///```
192    pub fn combine<F, U, I>(self, f: F) -> ParseResult<U, I, I::Item>
193        where F: FnOnce(T) -> ParseResult<U, I, I::Item>
194            , I: Stream {
195        match self {
196            Consumed::Consumed(x) => {
197                match f(x) {
198                    Ok((v, Consumed::Empty(rest))) => Ok((v, Consumed::Consumed(rest))),
199                    Err(Consumed::Empty(err)) => Err(Consumed::Consumed(err)),
200                    y => y
201                }
202            }
203            Consumed::Empty(x) => f(x)
204        }
205    }
206}
207///Struct which hold information about an error that occured at a specific position.
208///Can hold multiple instances of `Error` if more that one error occured at the position.
209#[derive(Debug, PartialEq)]
210pub struct ParseError<P: Positioner> {
211    ///The position where the error occured
212    pub position: P::Position,
213    ///A vector containing specific information on what errors occured at `position`
214    pub errors: Vec<Error<P>>
215}
216
217impl <P: Positioner> ParseError<P> {
218    
219    pub fn new(position: P::Position, error: Error<P>) -> ParseError<P> {
220        ParseError::from_errors(position, vec![error])
221    }
222
223    pub fn empty(position: P::Position) -> ParseError<P> {
224        ParseError::from_errors(position, vec![])
225    }
226
227    pub fn from_errors(position: P::Position, errors: Vec<Error<P>>) -> ParseError<P> {
228        ParseError { position: position, errors: errors }
229    }
230
231    pub fn end_of_input(position: P::Position) -> ParseError<P> {
232        ParseError::from_errors(position, vec![Error::end_of_input()])
233    }
234
235    pub fn add_message<S>(&mut self, message: S)
236        where S: Into<Info<P>> {
237        self.add_error(Error::Message(message.into()));
238    }
239
240    pub fn add_error(&mut self, message: Error<P>) {
241        //Don't add duplicate errors
242        if self.errors.iter().find(|msg| **msg == message).is_none() {
243            self.errors.push(message);
244        }
245    }
246
247    pub fn set_expected(&mut self, message: Info<P>) {
248        //Remove all other expected messages
249        self.errors.retain(|e| match *e { Error::Expected(_) => false, _ => true });
250        self.errors.push(Error::Expected(message));
251    }
252
253    pub fn merge(mut self, other: ParseError<P>) -> ParseError<P> {
254        use std::cmp::Ordering;
255        //Only keep the errors which occured after consuming the most amount of data
256        match self.position.cmp(&other.position) {
257            Ordering::Less => other,
258            Ordering::Greater => self,
259            Ordering::Equal => {
260                for message in other.errors.into_iter() {
261                    self.add_error(message);
262                }
263                self
264            }
265        }
266    }
267}
268
269impl <P> StdError for ParseError<P>
270    where P: Positioner + fmt::Display + fmt::Debug + Any
271        , P::Position: fmt::Display + fmt::Debug + Any {
272    fn description(&self) -> &str { "parse error" }
273}
274
275impl <P: Positioner + fmt::Display> fmt::Display for ParseError<P>
276    where P::Position: fmt::Display {
277    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
278        try!(writeln!(f, "Parse error at {}", self.position));
279
280        //First print the token that we did not expect
281        //There should really just be one unexpected message at this point though we print them
282        //all to be safe
283        let unexpected = self.errors.iter()
284            .filter(|e| match **e { Error::Unexpected(_) => true, _ => false } );
285        for error in unexpected {
286            try!(writeln!(f, "{}", error));
287        }
288
289        //Then we print out all the things that were expected in a comma separated list
290        //'Expected 'a', 'expression' or 'let'
291        let expected_count = self.errors.iter()
292            .filter(|e| match **e { Error::Expected(_) => true, _ => false } )
293            .count();
294        let mut i = 0;
295        for error in self.errors.iter() {
296            match *error {
297                Error::Expected(ref message) => {
298                    i += 1;
299                    if i == 1 {
300                        try!(write!(f, "Expected"));
301                    }
302                    else if i == expected_count {//Last expected message to be written
303                        try!(write!(f, " or"));
304                    }
305                    else {
306                        try!(write!(f, ","));
307                    }
308                    try!(write!(f, " '{}'", message));
309                }
310                _ => ()
311            }
312        }
313        if expected_count != 0 {
314            try!(writeln!(f, ""));
315        }
316        //If there are any generic messages we print them out last
317        let messages = self.errors.iter()
318            .filter(|e| match **e { Error::Message(_) | Error::Other(_) => true, _ => false } );
319        for error in messages {
320            try!(writeln!(f, "{}", error));
321        }
322        Ok(())
323    }
324}
325impl fmt::Display for SourcePosition {
326    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
327        write!(f, "line: {}, column: {}", self.line, self.column)
328    }
329}
330impl <T: fmt::Display> fmt::Display for Error<T> {
331    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
332        match *self {
333            Error::Unexpected(ref c) => write!(f, "Unexpected token '{}'", c),
334            Error::Expected(ref s) => write!(f, "Expected {}", s),
335            Error::Message(ref msg) => write!(f, "{}", msg),
336            Error::Other(ref err) => err.fmt(f)
337        }
338    }
339}
340
341///The `State<I>` struct keeps track of the current position in the stream `I`
342#[derive(Clone, PartialEq)]
343pub struct State<I>
344    where I: Stream {
345    pub position: <I::Item as Positioner>::Position,
346    pub input: I
347}
348
349impl <I> fmt::Debug for State<I>
350    where I: Stream + fmt::Debug
351        , <I::Item as Positioner>::Position: fmt::Debug {
352    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
353        write!(f, "State {{ position: {:?}, input: {:?} }}", self.position, self.input)
354    }
355}
356
357impl <I: Stream> State<I> {
358    pub fn new(input: I) -> State<I> {
359        State { position: <I::Item as Positioner>::start(), input: input }
360    }
361
362    pub fn as_empty(&self) -> State<I> {
363        State { position: self.position.clone(), input: self.input.clone() }
364    }
365
366    ///`uncons` is the most general way of extracting and item from a stream
367    ///It takes a function `f` as argument which should update the position
368    ///according to the item that was extracted
369    ///Usually you want to use `uncons_char` instead which works directly on character streams
370    pub fn uncons(self) -> ParseResult<I::Item, I, I::Item> {
371        let State { mut position, input, .. } = self;
372        match input.uncons() {
373            Ok((c, input)) => {
374                c.update(&mut position);
375                Ok((c, Consumed::Consumed(State { position: position, input: input })))
376            }
377            Err(err) => Err(Consumed::Empty(ParseError::new(position, err)))
378        }
379    }
380    pub fn update(mut self, i: I::Item, rest: I) -> ParseResult<I::Item, I, I::Item> {
381        i.update(&mut self.position);
382        self.input = rest;
383        Ok((i, Consumed::Consumed(self)))
384    }
385}
386
387///A type alias over the specific `Result` type used by parsers to indicate wether they were
388///successful or not.
389///`O` is the type that is output on success
390///`I` is the specific stream type used in the parser
391///`T` is the item type of `I`, this parameter will be removed once type declarations are allowed
392///to have trait bounds
393pub type ParseResult<O, I, T> = Result<(O, Consumed<State<I>>), Consumed<ParseError<T>>>;
394
395///A stream is a sequence of items that can be extracted one by one
396pub trait Stream : Clone {
397    type Item: Positioner;
398    ///Takes a stream and removes its first item, yielding the item and the rest of the elements
399    ///Returns `Err` when no more elements could be retrieved
400    fn uncons(self) -> Result<(Self::Item, Self), Error<Self::Item>>;
401}
402
403impl <'a> Stream for &'a str {
404    type Item = char;
405    fn uncons(self) -> Result<(char, &'a str), Error<char>> {
406        match self.chars().next() {
407            Some(c) => Ok((c, &self[c.len_utf8()..])),
408            None => Err(Error::end_of_input())
409        }
410    }
411}
412
413impl <'a, T> Stream for &'a [T]
414    where T: Positioner {
415    type Item = &'a T;
416    fn uncons(self) -> Result<(&'a T, &'a [T]), Error<&'a T>> {
417        if self.len() > 0 {
418            Ok((&self[0], &self[1..]))
419        }
420        else {
421            Err(Error::end_of_input())
422        }
423    }
424}
425
426///Wrapper around iterators which allows them to be treated as a stream.
427///Returned by `from_iter`.
428#[derive(Clone, Debug)]
429pub struct IteratorStream<I>(I)
430    where I: Iterator + Clone;
431
432
433///Converts an `Iterator` into a stream.
434pub fn from_iter<I>(iter: I) -> IteratorStream<I>
435    where I: Iterator + Clone {
436    IteratorStream(iter)
437}
438
439impl <I: Iterator + Clone> Stream for IteratorStream<I>
440    where I::Item: Positioner {
441    type Item = <I as Iterator>::Item;
442    fn uncons(mut self) -> Result<(I::Item, Self), Error<I::Item>> {
443        match self.0.next() {
444            Some(x) => Ok((x, self)),
445            None => Err(Error::end_of_input())
446        }
447    }
448}
449
450///`Positioner` represents the operations needed to update a position given an item from the stream
451///When implementing stream for custom token type this must be implemented for that token to allow
452///the position to be updated
453pub trait Positioner: Clone + PartialEq {
454    type Position: Clone + Ord;
455    ///Creates a start position
456    fn start() -> Self::Position;
457    ///Updates the position given that `self` has been taken from the stream
458    fn update(&self, position: &mut Self::Position);
459}
460impl <'a, T> Positioner for &'a T
461    where T: Positioner {
462    type Position = <T as Positioner>::Position;
463    fn start() -> <T as Positioner>::Position {
464        <T as Positioner>::start()
465    }
466    fn update(&self, position: &mut <T as Positioner>::Position) {
467        (*self).update(position)
468    }
469}
470
471impl Positioner for char {
472    type Position = SourcePosition;
473    fn start() -> SourcePosition {
474        SourcePosition { line: 1, column: 1 }
475    }
476    fn update(&self, position: &mut SourcePosition) {
477        position.column += 1;
478        if *self == '\n' {
479            position.column = 1;
480            position.line += 1;
481        }
482    }
483}
484
485impl Positioner for u8 {
486    type Position = BytePosition;
487
488    fn start() -> BytePosition {
489        BytePosition { position: 0 }
490    }
491
492    fn update(&self, b: &mut BytePosition) {
493        b.position += 1;
494    }
495}
496
497///By implementing the `Parser` trait a type says that it can be used to parse an input stream into
498///the type `Output`.
499///
500///All methods have a default implementation but there needs to be at least an implementation of
501///`parse_state` or`parse_lazy`. If `parse_ok` is implemented an implementation of `add_error` is
502///also recommended to improve error reporting.
503pub trait Parser {
504    ///A type implementing the `Stream` trait which is the specific type
505    ///that is parsed.
506    type Input: Stream;
507    ///The type which is returned when the parsing is successful.
508    type Output;
509
510    ///Entrypoint of the parser
511    ///Takes some input and tries to parse it returning a `ParseResult`
512    fn parse(&mut self, input: Self::Input) -> Result<(Self::Output, Self::Input), ParseError<<Self::Input as Stream>::Item>> {
513        match self.parse_state(State::new(input)) {
514            Ok((v, state)) => Ok((v, state.into_inner().input)),
515            Err(error) => Err(error.into_inner())
516        }
517    }
518    ///Parses using the state `input` by calling Stream::uncons one or more times
519    ///On success returns `Ok((value, new_state))` on failure it returns `Err(error)`
520    fn parse_state(&mut self, input: State<Self::Input>) -> ParseResult<Self::Output, Self::Input, <Self::Input as Stream>::Item> {
521        let mut result = self.parse_lazy(input.clone());
522        if let Err(Consumed::Empty(ref mut error)) = result {
523            if let Ok((t, _)) = input.input.uncons() {
524                error.add_error(Error::Unexpected(t.into()));
525            }
526            self.add_error(error);
527        }
528        result
529    }
530
531    ///Specialized version of parse_state where the parser does not need to add an error to the
532    ///`ParseError` when it does not consume any input before encountering the error.
533    ///Instead the error can be added later through the `add_error` method
534    fn parse_lazy(&mut self, input: State<Self::Input>) -> ParseResult<Self::Output, Self::Input, <Self::Input as Stream>::Item> {
535        self.parse_state(input)
536    }
537
538    ///Adds the first error that would normally be returned by this parser if it failed
539    fn add_error(&mut self, _error: &mut ParseError<<Self::Input as Stream>::Item>) {
540    }
541}
542impl <'a, I, O, P: ?Sized> Parser for &'a mut P 
543    where I: Stream, P: Parser<Input=I, Output=O> {
544    type Input = I;
545    type Output = O;
546    fn parse_state(&mut self, input: State<I>) -> ParseResult<O, I, I::Item> {
547        (**self).parse_state(input)
548    }
549    fn parse_lazy(&mut self, input: State<I>) -> ParseResult<O, I, I::Item> {
550        (**self).parse_lazy(input)
551    }
552    fn add_error(&mut self, error: &mut ParseError<<Self::Input as Stream>::Item>) {
553        (**self).add_error(error)
554    }
555}
556impl <I, O, P: ?Sized> Parser for Box<P> 
557    where I: Stream, P: Parser<Input=I, Output=O> {
558    type Input = I;
559    type Output = O;
560    fn parse_state(&mut self, input: State<I>) -> ParseResult<O, I, I::Item> {
561        (**self).parse_state(input)
562    }
563    fn parse_lazy(&mut self, input: State<I>) -> ParseResult<O, I, I::Item> {
564        (**self).parse_lazy(input)
565    }
566    fn add_error(&mut self, error: &mut ParseError<<Self::Input as Stream>::Item>) {
567        (**self).add_error(error)
568    }
569}