welly_parser/
word.rs

1//! Welly's words, including keywords.
2
3use std::collections::{HashMap};
4use std::{fmt};
5
6use super::{Tree, Stream, Context, Parse};
7
8/// Represents a contiguous string of ASCII whitespace characters.
9///
10/// The characters can only be retrieved if you know the `Whitespace`'s
11/// [`Location`].
12///
13/// [`Location`]: super::Location
14#[derive(Debug, Clone, PartialEq)]
15pub struct Whitespace;
16
17impl Tree for Whitespace {}
18
19/// Represents a contiguous string of ASCII symbol characters.
20///
21/// Symbol characters are those which appear in Welly's arithmetic operators.
22/// Brackets, commas and semicolons are not considered to by symbol characters.
23#[derive(Debug, Clone, PartialEq)]
24pub struct Symbol(pub String);
25
26impl Tree for Symbol {}
27
28/// Represents a contiguous string of ASCII alpha-numeric characters.
29///
30/// Underscore is considered to be alpha-numeric. Note that this type can
31/// represent a decimal integer.
32#[derive(Debug, Clone, PartialEq)]
33pub struct Alphanumeric(pub String);
34
35impl Tree for Alphanumeric {}
36
37// ----------------------------------------------------------------------------
38
39/// Three classes of character combine with similar neighbours to make a word.
40#[derive(Debug, Copy, Clone, PartialEq)]
41enum CharacterClass {
42    /// A whitespace character.
43    WHITESPACE,
44
45    /// A character that can appear in a multi-character operator.
46    SYMBOL,
47
48    /// An ASCII letter, digit or underscore.
49    ALPHANUMERIC,
50}
51
52impl CharacterClass {
53    /// Map `s` to a `Self`, if possible.
54    fn classify(c: char) -> Option<Self> {
55        use CharacterClass::*;
56        match c {
57            '\t' | '\n' | '\r' | ' ' =>
58                Some(WHITESPACE),
59            '!' | '$' | '%' | '^' | '&' | '*' | '-' | '+' | '=' | ':' | '@' | '~' | '<' | '>' | '?' | '.' | '/' =>
60                Some(SYMBOL),
61            '0'..='9' | 'A'..='Z' | 'a'..='z' | '_' =>
62                Some(ALPHANUMERIC),
63            _ => None,
64        }
65    }
66
67    /// Combine `self` with `s` to make a [`dyn Tree`].
68    fn wrap(self, s: String) -> Box<dyn Tree> {
69        use CharacterClass::*;
70        match self {
71            WHITESPACE => Box::new(Whitespace),
72            SYMBOL => Box::new(Symbol(s)),
73            ALPHANUMERIC => Box::new(Alphanumeric(s)),
74        }
75    }
76}
77
78// ----------------------------------------------------------------------------
79
80/// A [`Parse`] implementation that recognises [`Whitespace`]s, [`Symbol`]s and
81/// [`Alphanumeric`]s.
82///
83/// It parses a [`Stream`] that contains [`char`]s.
84#[derive(Default)]
85pub struct Parser(HashMap<&'static str, Box<dyn Fn() -> Box<dyn Tree>>>);
86
87impl Parser {
88    pub fn add_keywords<T: Tree + Clone>(&mut self) {
89        T::declare_keywords(|name, tree| {
90            let old = self.0.insert(name, Box::new(move || Box::new(tree.clone())));
91            assert!(old.is_none(), "Keyword '{}' has multiple meanings", name);
92        });
93    }
94}
95
96impl fmt::Debug for Parser {
97    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
98        f.debug_tuple("Parser")
99            .field(&Vec::from_iter(self.0.keys().copied()))
100            .finish()
101    }
102}
103
104impl<'a> Parse for &'a Parser {
105    fn parse(
106        &self,
107        input: &mut Context<impl Stream>,
108    ) -> Result<Box<dyn Tree>, String> {
109        if let Some(c) = input.read::<char>()? {
110            if let Some(cc) = CharacterClass::classify(*c) {
111                let mut s = String::new();
112                s.push(*c);
113                while let Some(c) = input.read_if(
114                    |&c| CharacterClass::classify(c) == Some(cc)
115                )? {
116                    s.push(*c);
117                }
118                Ok(if let Some(f) = self.0.get(&s.as_ref()) {
119                    f()
120                } else {
121                    s.shrink_to_fit();
122                    cc.wrap(s)
123                })
124            } else {
125                Ok(c)
126            }
127        } else {
128            input.read_any()
129        }
130    }
131}
132
133impl Parse for Parser {
134    fn parse(
135        &self,
136        input: &mut Context<impl Stream>,
137    ) -> Result<Box<dyn Tree>, String> {
138        (&self).parse(input)
139    }
140}
141
142// ----------------------------------------------------------------------------
143
144#[cfg(test)]
145mod tests {
146    use super::*;
147    use crate::{Characters};
148
149    /// A minimal mock-up of some Welly keywords.
150    #[derive(Debug, Copy, Clone, PartialEq)]
151    enum Keyword {RETURN, EQUALS}
152    use Keyword::*;
153
154    impl Tree for Keyword {
155        fn declare_keywords(mut declare: impl FnMut(&'static str, Self)) {
156            declare("return", RETURN);
157            declare("==", EQUALS);
158        }
159    }
160
161    #[test]
162    fn keywords() {
163        let mut parser = Parser::default();
164        parser.add_keywords::<Keyword>();
165        let mut stream = parser.parse_stream(Characters::new("return foo==69;", true));
166        assert_eq!(stream.read(), RETURN);
167        assert_eq!(stream.read(), Whitespace);
168        assert_eq!(stream.read(), Alphanumeric("foo".into()));
169        assert_eq!(stream.read(), EQUALS);
170        assert_eq!(stream.read(), Alphanumeric("69".into()));
171        assert_eq!(stream.read(), ';');
172        assert_eq!(stream.read(), crate::EndOfFile);
173    }
174}